How to use cooltools - 10 common examples

To help you get started, we’ve selected a few cooltools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github calico / basenji / bin / akita_data_read.py View on Github external
print("WARNING: %s >50% bins filtered, check:  %s. " % (genome_hic_file, mseq_str))

      # set blacklist to NaNs
      if mseq.chr in black_chr_trees:
        for black_interval in black_chr_trees[mseq.chr][mseq.start:mseq.end]:
          # adjust for sequence indexes
          black_seq_start = (black_interval.begin - mseq.start)// options.pool_width
          black_seq_end =   int(  np.ceil( (black_interval.end - mseq.start)/ options.pool_width ) )
          seq_hic_raw[:,black_seq_start:black_seq_end] = np.nan
          seq_hic_raw[black_seq_start:black_seq_end,:] = np.nan
        seq_hic_nan = np.isnan(seq_hic_raw)

      # clip first diagonals and high values
      clipval = np.nanmedian(np.diag(seq_hic_raw,options.diagonal_offset))
      for i in range(-options.diagonal_offset+1,options.diagonal_offset):
        set_diag(seq_hic_raw, clipval, i)
      seq_hic_raw = np.clip(seq_hic_raw, 0, clipval)
      seq_hic_raw[seq_hic_nan] = np.nan

      # adaptively coarsegrain based on raw counts
      seq_hic_smoothed = adaptive_coarsegrain(
                              seq_hic_raw,
                              genome_hic_cool.matrix(balance=False).fetch(mseq_str),
                              cutoff=2, max_levels=8)
      seq_hic_nan = np.isnan(seq_hic_smoothed)
      #todo: pass an option to add a certain pseudocount value, or the minimum nonzero value

      if options.as_obsexp:
        # compute obs/exp        
        if options.global_obsexp: # compute global obs/exp
          exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
          if len(exp_chr) ==0:
github calico / basenji / bin / akita_data_read.py View on Github external
# adjust for sequence indexes
          black_seq_start = (black_interval.begin - mseq.start)// options.pool_width
          black_seq_end =   int(  np.ceil( (black_interval.end - mseq.start)/ options.pool_width ) )
          seq_hic_raw[:,black_seq_start:black_seq_end] = np.nan
          seq_hic_raw[black_seq_start:black_seq_end,:] = np.nan
        seq_hic_nan = np.isnan(seq_hic_raw)

      # clip first diagonals and high values
      clipval = np.nanmedian(np.diag(seq_hic_raw,options.diagonal_offset))
      for i in range(-options.diagonal_offset+1,options.diagonal_offset):
        set_diag(seq_hic_raw, clipval, i)
      seq_hic_raw = np.clip(seq_hic_raw, 0, clipval)
      seq_hic_raw[seq_hic_nan] = np.nan

      # adaptively coarsegrain based on raw counts
      seq_hic_smoothed = adaptive_coarsegrain(
                              seq_hic_raw,
                              genome_hic_cool.matrix(balance=False).fetch(mseq_str),
                              cutoff=2, max_levels=8)
      seq_hic_nan = np.isnan(seq_hic_smoothed)
      #todo: pass an option to add a certain pseudocount value, or the minimum nonzero value

      if options.as_obsexp:
        # compute obs/exp        
        if options.global_obsexp: # compute global obs/exp
          exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
          if len(exp_chr) ==0: 
              raise ValueError('no expected values found for chr:'+mseq.chr)
          exp_map= np.zeros((seq_len_pool,seq_len_pool))
          for i in range(seq_len_pool):
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
github calico / basenji / bin / basenji_data_hic_read.py View on Github external
for black_interval in black_chr_trees[mseq.chr][mseq.start:mseq.end]:
          # adjust for sequence indexes
          black_seq_start = (black_interval.begin - mseq.start)// options.pool_width
          black_seq_end =   int(  np.ceil( (black_interval.end - mseq.start)/ options.pool_width ) )
          seq_hic_raw[:,black_seq_start:black_seq_end] = np.nan
          seq_hic_raw[black_seq_start:black_seq_end,:] = np.nan
        seq_hic_nan = np.isnan(seq_hic_raw)

      # clip first diagonals and high values
      clipval = np.nanmedian(np.diag(seq_hic_raw,2))
      for i in [-1,0,1]: set_diag(seq_hic_raw,clipval,i)
      seq_hic_raw = np.clip(seq_hic_raw, 0, seq_hic_raw)
      seq_hic_raw[seq_hic_nan] = np.nan

      # adaptively coarsegrain based on raw counts
      seq_hic_smoothed =  adaptive_coarsegrain(
                              seq_hic_raw,  
                              genome_hic_cool.matrix(balance=False).fetch(mseq_str),  
                              cutoff= 2, max_levels=8)
      #todo: pass an option to add a certain pseudocount value, or the minimum nonzero value

      if options.as_obsexp == True:
        # interpolate single missing bins
        seq_hic_interpolated =  interpolate_bad_singletons(seq_hic_smoothed, mask=(~seq_hic_nan),
                                                 fillDiagonal=True, returnMask=False, secondPass=True,verbose=False)
        seq_hic_nan = np.isnan(seq_hic_interpolated)

        # compute observed/expected
        seq_hic_obsexp = observed_over_expected(seq_hic_interpolated, ~seq_hic_nan)[0]
        # todo: allow passing a global expected rather than computing locally

        # log
github Phlya / coolpuppy / coolpup.py View on Github external
newmap = numutils.zoom_array(newmap, (rescale_size,
                                                         rescale_size))
            if rot_flip:
                newmap = np.rot90(np.flipud(newmap), -1)
            elif flip_rot:
                newmap = np.flipud(np.rot90(newmap))
            mymap += np.nan_to_num(newmap)
            if unbalanced and cov_norm and expected is False:
                new_cov_start = coverage[lo_left:hi_left]
                new_cov_end = coverage[lo_right:hi_right]
                if rescale:
                    if len(new_cov_start)==0:
                        new_cov_start = np.zeros(rescale_size)
                    if len(new_cov_end)==0:
                        new_cov_end = np.zeros(rescale_size)
                    new_cov_start = numutils.zoom_array(new_cov_start,
                                                       (rescale_size,))
                    new_cov_end = numutils.zoom_array(new_cov_end,
                                                     (rescale_size,))
                else:
                    l = len(new_cov_start)
                    r = len(new_cov_end)
                    new_cov_start = np.pad(new_cov_start, (mymap.shape[0]-l, 0),
                                                       'constant')
                    new_cov_end = np.pad(new_cov_end,
                                     (0, mymap.shape[1]-r), 'constant')
                cov_start += np.nan_to_num(new_cov_start)
                cov_end += +np.nan_to_num(new_cov_end)
            n += 1
    if local:
        mymap = np.triu(mymap, 0)
        mymap += np.rot90(np.fliplr(np.triu(mymap, 1)))
github Phlya / coolpuppy / coolpup.py View on Github external
if rot_flip:
                newmap = np.rot90(np.flipud(newmap), -1)
            elif flip_rot:
                newmap = np.flipud(np.rot90(newmap))
            mymap += np.nan_to_num(newmap)
            if unbalanced and cov_norm and expected is False:
                new_cov_start = coverage[lo_left:hi_left]
                new_cov_end = coverage[lo_right:hi_right]
                if rescale:
                    if len(new_cov_start)==0:
                        new_cov_start = np.zeros(rescale_size)
                    if len(new_cov_end)==0:
                        new_cov_end = np.zeros(rescale_size)
                    new_cov_start = numutils.zoom_array(new_cov_start,
                                                       (rescale_size,))
                    new_cov_end = numutils.zoom_array(new_cov_end,
                                                     (rescale_size,))
                else:
                    l = len(new_cov_start)
                    r = len(new_cov_end)
                    new_cov_start = np.pad(new_cov_start, (mymap.shape[0]-l, 0),
                                                       'constant')
                    new_cov_end = np.pad(new_cov_end,
                                     (0, mymap.shape[1]-r), 'constant')
                cov_start += np.nan_to_num(new_cov_start)
                cov_end += +np.nan_to_num(new_cov_end)
            n += 1
    if local:
        mymap = np.triu(mymap, 0)
        mymap += np.rot90(np.fliplr(np.triu(mymap, 1)))
    return mymap, n, cov_start, cov_end
github Phlya / coolpuppy / coolpuppy / coolpup.py View on Github external
elif rot:
                newmap = np.rot90(newmap, -1)

            mymap = np.nansum([mymap, newmap], axis=0)
            if self.coverage_norm and not expected and (self.balance is False):
                new_cov_start = coverage[lo_left:hi_left]
                new_cov_end = coverage[lo_right:hi_right]
                if self.rescale:
                    if len(new_cov_start) == 0:
                        new_cov_start = np.zeros(self.rescale_size)
                    if len(new_cov_end) == 0:
                        new_cov_end = np.zeros(self.rescale_size)
                    new_cov_start = numutils.zoom_array(
                        new_cov_start, (self.rescale_size,)
                    )
                    new_cov_end = numutils.zoom_array(new_cov_end, (self.rescale_size,))
                else:
                    l = len(new_cov_start)
                    r = len(new_cov_end)
                    new_cov_start = np.pad(
                        new_cov_start, (mymap.shape[0] - l, 0), "constant"
                    )
                    new_cov_end = np.pad(
                        new_cov_end, (0, mymap.shape[1] - r), "constant"
                    )
                cov_start += np.nan_to_num(new_cov_start)
                cov_end += +np.nan_to_num(new_cov_end)
            num += np.isfinite(newmap).astype(int)
            n += 1
        return mymap, num, cov_start, cov_end, n
github Phlya / coolpuppy / coolpuppy / coolpup.py View on Github external
#                        newmap, [(y, 0), (0, x)], "constant"
            #                    )  # Padding to adjust to the right shape
            newmap = newmap.astype(float)
            if not self.local:
                ignore_indices = np.tril_indices_from(
                    newmap, diag - (stPad * 2 + 1) - 1 + self.ignore_diags
                )
                newmap[ignore_indices] = np.nan
            else:
                newmap = np.triu(newmap, self.ignore_diags)
                newmap += np.triu(newmap, 1).T
            if self.rescale:
                if newmap.size == 0 or np.all(np.isnan(newmap)):
                    newmap = np.zeros((self.rescale_size, self.rescale_size))
                else:
                    newmap = numutils.zoom_array(
                        newmap, (self.rescale_size, self.rescale_size)
                    )
            if rot_flip:
                newmap = np.rot90(np.flipud(newmap), 1)
            elif rot:
                newmap = np.rot90(newmap, -1)

            mymap = np.nansum([mymap, newmap], axis=0)
            if self.coverage_norm and not expected and (self.balance is False):
                new_cov_start = coverage[lo_left:hi_left]
                new_cov_end = coverage[lo_right:hi_right]
                if self.rescale:
                    if len(new_cov_start) == 0:
                        new_cov_start = np.zeros(self.rescale_size)
                    if len(new_cov_end) == 0:
                        new_cov_end = np.zeros(self.rescale_size)
github Phlya / coolpuppy / coolpuppy / coolpup.py View on Github external
)
            if rot_flip:
                newmap = np.rot90(np.flipud(newmap), 1)
            elif rot:
                newmap = np.rot90(newmap, -1)

            mymap = np.nansum([mymap, newmap], axis=0)
            if self.coverage_norm and not expected and (self.balance is False):
                new_cov_start = coverage[lo_left:hi_left]
                new_cov_end = coverage[lo_right:hi_right]
                if self.rescale:
                    if len(new_cov_start) == 0:
                        new_cov_start = np.zeros(self.rescale_size)
                    if len(new_cov_end) == 0:
                        new_cov_end = np.zeros(self.rescale_size)
                    new_cov_start = numutils.zoom_array(
                        new_cov_start, (self.rescale_size,)
                    )
                    new_cov_end = numutils.zoom_array(new_cov_end, (self.rescale_size,))
                else:
                    l = len(new_cov_start)
                    r = len(new_cov_end)
                    new_cov_start = np.pad(
                        new_cov_start, (mymap.shape[0] - l, 0), "constant"
                    )
                    new_cov_end = np.pad(
                        new_cov_end, (0, mymap.shape[1] - r), "constant"
                    )
                cov_start += np.nan_to_num(new_cov_start)
                cov_end += +np.nan_to_num(new_cov_end)
            num += np.isfinite(newmap).astype(int)
            n += 1
github calico / basenji / bin / akita_data_read.py View on Github external
cutoff=2, max_levels=8)
      seq_hic_nan = np.isnan(seq_hic_smoothed)
      #todo: pass an option to add a certain pseudocount value, or the minimum nonzero value

      if options.as_obsexp:
        # compute obs/exp        
        if options.global_obsexp: # compute global obs/exp
          exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
          if len(exp_chr) ==0: 
              raise ValueError('no expected values found for chr:'+mseq.chr)
          exp_map= np.zeros((seq_len_pool,seq_len_pool))
          for i in range(seq_len_pool):
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
          seq_hic_obsexp = seq_hic_smoothed / exp_map
          for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
          seq_hic_obsexp[seq_hic_nan] = np.nan          

        else: # compute local obs/exp
          seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]

        # log
        if options.no_log==False:
          seq_hic_obsexp = np.log(seq_hic_obsexp)
          seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
        else:
          seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 1,i)
github calico / basenji / bin / akita_data_read.py View on Github external
for i in range(seq_len_pool):
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
          seq_hic_obsexp = seq_hic_smoothed / exp_map
          for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
          seq_hic_obsexp[seq_hic_nan] = np.nan          

        else: # compute local obs/exp
          seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]

        # log
        if options.no_log==False:
          seq_hic_obsexp = np.log(seq_hic_obsexp)
          seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
        else:
          seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 1,i)

        # apply kernel
        if kernel is not None:
          seq_hic = convolve(seq_hic_obsexp, kernel)
        else:
          seq_hic = seq_hic_obsexp

      else:
        # interpolate all missing bins
        seq_hic_interpolated = interp_nan(seq_hic_smoothed)

        # rescale, reclip