Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print("WARNING: %s >50% bins filtered, check: %s. " % (genome_hic_file, mseq_str))
# set blacklist to NaNs
if mseq.chr in black_chr_trees:
for black_interval in black_chr_trees[mseq.chr][mseq.start:mseq.end]:
# adjust for sequence indexes
black_seq_start = (black_interval.begin - mseq.start)// options.pool_width
black_seq_end = int( np.ceil( (black_interval.end - mseq.start)/ options.pool_width ) )
seq_hic_raw[:,black_seq_start:black_seq_end] = np.nan
seq_hic_raw[black_seq_start:black_seq_end,:] = np.nan
seq_hic_nan = np.isnan(seq_hic_raw)
# clip first diagonals and high values
clipval = np.nanmedian(np.diag(seq_hic_raw,options.diagonal_offset))
for i in range(-options.diagonal_offset+1,options.diagonal_offset):
set_diag(seq_hic_raw, clipval, i)
seq_hic_raw = np.clip(seq_hic_raw, 0, clipval)
seq_hic_raw[seq_hic_nan] = np.nan
# adaptively coarsegrain based on raw counts
seq_hic_smoothed = adaptive_coarsegrain(
seq_hic_raw,
genome_hic_cool.matrix(balance=False).fetch(mseq_str),
cutoff=2, max_levels=8)
seq_hic_nan = np.isnan(seq_hic_smoothed)
#todo: pass an option to add a certain pseudocount value, or the minimum nonzero value
if options.as_obsexp:
# compute obs/exp
if options.global_obsexp: # compute global obs/exp
exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
if len(exp_chr) ==0:
# adjust for sequence indexes
black_seq_start = (black_interval.begin - mseq.start)// options.pool_width
black_seq_end = int( np.ceil( (black_interval.end - mseq.start)/ options.pool_width ) )
seq_hic_raw[:,black_seq_start:black_seq_end] = np.nan
seq_hic_raw[black_seq_start:black_seq_end,:] = np.nan
seq_hic_nan = np.isnan(seq_hic_raw)
# clip first diagonals and high values
clipval = np.nanmedian(np.diag(seq_hic_raw,options.diagonal_offset))
for i in range(-options.diagonal_offset+1,options.diagonal_offset):
set_diag(seq_hic_raw, clipval, i)
seq_hic_raw = np.clip(seq_hic_raw, 0, clipval)
seq_hic_raw[seq_hic_nan] = np.nan
# adaptively coarsegrain based on raw counts
seq_hic_smoothed = adaptive_coarsegrain(
seq_hic_raw,
genome_hic_cool.matrix(balance=False).fetch(mseq_str),
cutoff=2, max_levels=8)
seq_hic_nan = np.isnan(seq_hic_smoothed)
#todo: pass an option to add a certain pseudocount value, or the minimum nonzero value
if options.as_obsexp:
# compute obs/exp
if options.global_obsexp: # compute global obs/exp
exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
if len(exp_chr) ==0:
raise ValueError('no expected values found for chr:'+mseq.chr)
exp_map= np.zeros((seq_len_pool,seq_len_pool))
for i in range(seq_len_pool):
set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
for black_interval in black_chr_trees[mseq.chr][mseq.start:mseq.end]:
# adjust for sequence indexes
black_seq_start = (black_interval.begin - mseq.start)// options.pool_width
black_seq_end = int( np.ceil( (black_interval.end - mseq.start)/ options.pool_width ) )
seq_hic_raw[:,black_seq_start:black_seq_end] = np.nan
seq_hic_raw[black_seq_start:black_seq_end,:] = np.nan
seq_hic_nan = np.isnan(seq_hic_raw)
# clip first diagonals and high values
clipval = np.nanmedian(np.diag(seq_hic_raw,2))
for i in [-1,0,1]: set_diag(seq_hic_raw,clipval,i)
seq_hic_raw = np.clip(seq_hic_raw, 0, seq_hic_raw)
seq_hic_raw[seq_hic_nan] = np.nan
# adaptively coarsegrain based on raw counts
seq_hic_smoothed = adaptive_coarsegrain(
seq_hic_raw,
genome_hic_cool.matrix(balance=False).fetch(mseq_str),
cutoff= 2, max_levels=8)
#todo: pass an option to add a certain pseudocount value, or the minimum nonzero value
if options.as_obsexp == True:
# interpolate single missing bins
seq_hic_interpolated = interpolate_bad_singletons(seq_hic_smoothed, mask=(~seq_hic_nan),
fillDiagonal=True, returnMask=False, secondPass=True,verbose=False)
seq_hic_nan = np.isnan(seq_hic_interpolated)
# compute observed/expected
seq_hic_obsexp = observed_over_expected(seq_hic_interpolated, ~seq_hic_nan)[0]
# todo: allow passing a global expected rather than computing locally
# log
newmap = numutils.zoom_array(newmap, (rescale_size,
rescale_size))
if rot_flip:
newmap = np.rot90(np.flipud(newmap), -1)
elif flip_rot:
newmap = np.flipud(np.rot90(newmap))
mymap += np.nan_to_num(newmap)
if unbalanced and cov_norm and expected is False:
new_cov_start = coverage[lo_left:hi_left]
new_cov_end = coverage[lo_right:hi_right]
if rescale:
if len(new_cov_start)==0:
new_cov_start = np.zeros(rescale_size)
if len(new_cov_end)==0:
new_cov_end = np.zeros(rescale_size)
new_cov_start = numutils.zoom_array(new_cov_start,
(rescale_size,))
new_cov_end = numutils.zoom_array(new_cov_end,
(rescale_size,))
else:
l = len(new_cov_start)
r = len(new_cov_end)
new_cov_start = np.pad(new_cov_start, (mymap.shape[0]-l, 0),
'constant')
new_cov_end = np.pad(new_cov_end,
(0, mymap.shape[1]-r), 'constant')
cov_start += np.nan_to_num(new_cov_start)
cov_end += +np.nan_to_num(new_cov_end)
n += 1
if local:
mymap = np.triu(mymap, 0)
mymap += np.rot90(np.fliplr(np.triu(mymap, 1)))
if rot_flip:
newmap = np.rot90(np.flipud(newmap), -1)
elif flip_rot:
newmap = np.flipud(np.rot90(newmap))
mymap += np.nan_to_num(newmap)
if unbalanced and cov_norm and expected is False:
new_cov_start = coverage[lo_left:hi_left]
new_cov_end = coverage[lo_right:hi_right]
if rescale:
if len(new_cov_start)==0:
new_cov_start = np.zeros(rescale_size)
if len(new_cov_end)==0:
new_cov_end = np.zeros(rescale_size)
new_cov_start = numutils.zoom_array(new_cov_start,
(rescale_size,))
new_cov_end = numutils.zoom_array(new_cov_end,
(rescale_size,))
else:
l = len(new_cov_start)
r = len(new_cov_end)
new_cov_start = np.pad(new_cov_start, (mymap.shape[0]-l, 0),
'constant')
new_cov_end = np.pad(new_cov_end,
(0, mymap.shape[1]-r), 'constant')
cov_start += np.nan_to_num(new_cov_start)
cov_end += +np.nan_to_num(new_cov_end)
n += 1
if local:
mymap = np.triu(mymap, 0)
mymap += np.rot90(np.fliplr(np.triu(mymap, 1)))
return mymap, n, cov_start, cov_end
elif rot:
newmap = np.rot90(newmap, -1)
mymap = np.nansum([mymap, newmap], axis=0)
if self.coverage_norm and not expected and (self.balance is False):
new_cov_start = coverage[lo_left:hi_left]
new_cov_end = coverage[lo_right:hi_right]
if self.rescale:
if len(new_cov_start) == 0:
new_cov_start = np.zeros(self.rescale_size)
if len(new_cov_end) == 0:
new_cov_end = np.zeros(self.rescale_size)
new_cov_start = numutils.zoom_array(
new_cov_start, (self.rescale_size,)
)
new_cov_end = numutils.zoom_array(new_cov_end, (self.rescale_size,))
else:
l = len(new_cov_start)
r = len(new_cov_end)
new_cov_start = np.pad(
new_cov_start, (mymap.shape[0] - l, 0), "constant"
)
new_cov_end = np.pad(
new_cov_end, (0, mymap.shape[1] - r), "constant"
)
cov_start += np.nan_to_num(new_cov_start)
cov_end += +np.nan_to_num(new_cov_end)
num += np.isfinite(newmap).astype(int)
n += 1
return mymap, num, cov_start, cov_end, n
# newmap, [(y, 0), (0, x)], "constant"
# ) # Padding to adjust to the right shape
newmap = newmap.astype(float)
if not self.local:
ignore_indices = np.tril_indices_from(
newmap, diag - (stPad * 2 + 1) - 1 + self.ignore_diags
)
newmap[ignore_indices] = np.nan
else:
newmap = np.triu(newmap, self.ignore_diags)
newmap += np.triu(newmap, 1).T
if self.rescale:
if newmap.size == 0 or np.all(np.isnan(newmap)):
newmap = np.zeros((self.rescale_size, self.rescale_size))
else:
newmap = numutils.zoom_array(
newmap, (self.rescale_size, self.rescale_size)
)
if rot_flip:
newmap = np.rot90(np.flipud(newmap), 1)
elif rot:
newmap = np.rot90(newmap, -1)
mymap = np.nansum([mymap, newmap], axis=0)
if self.coverage_norm and not expected and (self.balance is False):
new_cov_start = coverage[lo_left:hi_left]
new_cov_end = coverage[lo_right:hi_right]
if self.rescale:
if len(new_cov_start) == 0:
new_cov_start = np.zeros(self.rescale_size)
if len(new_cov_end) == 0:
new_cov_end = np.zeros(self.rescale_size)
)
if rot_flip:
newmap = np.rot90(np.flipud(newmap), 1)
elif rot:
newmap = np.rot90(newmap, -1)
mymap = np.nansum([mymap, newmap], axis=0)
if self.coverage_norm and not expected and (self.balance is False):
new_cov_start = coverage[lo_left:hi_left]
new_cov_end = coverage[lo_right:hi_right]
if self.rescale:
if len(new_cov_start) == 0:
new_cov_start = np.zeros(self.rescale_size)
if len(new_cov_end) == 0:
new_cov_end = np.zeros(self.rescale_size)
new_cov_start = numutils.zoom_array(
new_cov_start, (self.rescale_size,)
)
new_cov_end = numutils.zoom_array(new_cov_end, (self.rescale_size,))
else:
l = len(new_cov_start)
r = len(new_cov_end)
new_cov_start = np.pad(
new_cov_start, (mymap.shape[0] - l, 0), "constant"
)
new_cov_end = np.pad(
new_cov_end, (0, mymap.shape[1] - r), "constant"
)
cov_start += np.nan_to_num(new_cov_start)
cov_end += +np.nan_to_num(new_cov_end)
num += np.isfinite(newmap).astype(int)
n += 1
cutoff=2, max_levels=8)
seq_hic_nan = np.isnan(seq_hic_smoothed)
#todo: pass an option to add a certain pseudocount value, or the minimum nonzero value
if options.as_obsexp:
# compute obs/exp
if options.global_obsexp: # compute global obs/exp
exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
if len(exp_chr) ==0:
raise ValueError('no expected values found for chr:'+mseq.chr)
exp_map= np.zeros((seq_len_pool,seq_len_pool))
for i in range(seq_len_pool):
set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
seq_hic_obsexp = seq_hic_smoothed / exp_map
for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
seq_hic_obsexp[seq_hic_nan] = np.nan
else: # compute local obs/exp
seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]
# log
if options.no_log==False:
seq_hic_obsexp = np.log(seq_hic_obsexp)
seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
seq_hic_obsexp = interp_nan(seq_hic_obsexp)
for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
else:
seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
seq_hic_obsexp = interp_nan(seq_hic_obsexp)
for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 1,i)
for i in range(seq_len_pool):
set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
seq_hic_obsexp = seq_hic_smoothed / exp_map
for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
seq_hic_obsexp[seq_hic_nan] = np.nan
else: # compute local obs/exp
seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]
# log
if options.no_log==False:
seq_hic_obsexp = np.log(seq_hic_obsexp)
seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
seq_hic_obsexp = interp_nan(seq_hic_obsexp)
for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
else:
seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
seq_hic_obsexp = interp_nan(seq_hic_obsexp)
for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 1,i)
# apply kernel
if kernel is not None:
seq_hic = convolve(seq_hic_obsexp, kernel)
else:
seq_hic = seq_hic_obsexp
else:
# interpolate all missing bins
seq_hic_interpolated = interp_nan(seq_hic_smoothed)
# rescale, reclip