How to use the cooltools.lib.numutils.set_diag function in cooltools

To help you get started, we’ve selected a few cooltools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github calico / basenji / bin / akita_data_read.py View on Github external
print("WARNING: %s >50% bins filtered, check:  %s. " % (genome_hic_file, mseq_str))

      # set blacklist to NaNs
      if mseq.chr in black_chr_trees:
        for black_interval in black_chr_trees[mseq.chr][mseq.start:mseq.end]:
          # adjust for sequence indexes
          black_seq_start = (black_interval.begin - mseq.start)// options.pool_width
          black_seq_end =   int(  np.ceil( (black_interval.end - mseq.start)/ options.pool_width ) )
          seq_hic_raw[:,black_seq_start:black_seq_end] = np.nan
          seq_hic_raw[black_seq_start:black_seq_end,:] = np.nan
        seq_hic_nan = np.isnan(seq_hic_raw)

      # clip first diagonals and high values
      clipval = np.nanmedian(np.diag(seq_hic_raw,options.diagonal_offset))
      for i in range(-options.diagonal_offset+1,options.diagonal_offset):
        set_diag(seq_hic_raw, clipval, i)
      seq_hic_raw = np.clip(seq_hic_raw, 0, clipval)
      seq_hic_raw[seq_hic_nan] = np.nan

      # adaptively coarsegrain based on raw counts
      seq_hic_smoothed = adaptive_coarsegrain(
                              seq_hic_raw,
                              genome_hic_cool.matrix(balance=False).fetch(mseq_str),
                              cutoff=2, max_levels=8)
      seq_hic_nan = np.isnan(seq_hic_smoothed)
      #todo: pass an option to add a certain pseudocount value, or the minimum nonzero value

      if options.as_obsexp:
        # compute obs/exp        
        if options.global_obsexp: # compute global obs/exp
          exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
          if len(exp_chr) ==0:
github calico / basenji / bin / akita_data_read.py View on Github external
cutoff=2, max_levels=8)
      seq_hic_nan = np.isnan(seq_hic_smoothed)
      #todo: pass an option to add a certain pseudocount value, or the minimum nonzero value

      if options.as_obsexp:
        # compute obs/exp        
        if options.global_obsexp: # compute global obs/exp
          exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
          if len(exp_chr) ==0: 
              raise ValueError('no expected values found for chr:'+mseq.chr)
          exp_map= np.zeros((seq_len_pool,seq_len_pool))
          for i in range(seq_len_pool):
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
          seq_hic_obsexp = seq_hic_smoothed / exp_map
          for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
          seq_hic_obsexp[seq_hic_nan] = np.nan          

        else: # compute local obs/exp
          seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]

        # log
        if options.no_log==False:
          seq_hic_obsexp = np.log(seq_hic_obsexp)
          seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
        else:
          seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 1,i)
github calico / basenji / bin / akita_data_read.py View on Github external
for i in range(seq_len_pool):
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
          seq_hic_obsexp = seq_hic_smoothed / exp_map
          for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
          seq_hic_obsexp[seq_hic_nan] = np.nan          

        else: # compute local obs/exp
          seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]

        # log
        if options.no_log==False:
          seq_hic_obsexp = np.log(seq_hic_obsexp)
          seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
        else:
          seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 1,i)

        # apply kernel
        if kernel is not None:
          seq_hic = convolve(seq_hic_obsexp, kernel)
        else:
          seq_hic = seq_hic_obsexp

      else:
        # interpolate all missing bins
        seq_hic_interpolated = interp_nan(seq_hic_smoothed)

        # rescale, reclip
github calico / basenji / bin / akita_data_read.py View on Github external
seq_hic_smoothed = adaptive_coarsegrain(
                              seq_hic_raw,
                              genome_hic_cool.matrix(balance=False).fetch(mseq_str),
                              cutoff=2, max_levels=8)
      seq_hic_nan = np.isnan(seq_hic_smoothed)
      #todo: pass an option to add a certain pseudocount value, or the minimum nonzero value

      if options.as_obsexp:
        # compute obs/exp        
        if options.global_obsexp: # compute global obs/exp
          exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
          if len(exp_chr) ==0: 
              raise ValueError('no expected values found for chr:'+mseq.chr)
          exp_map= np.zeros((seq_len_pool,seq_len_pool))
          for i in range(seq_len_pool):
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
          seq_hic_obsexp = seq_hic_smoothed / exp_map
          for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
          seq_hic_obsexp[seq_hic_nan] = np.nan          

        else: # compute local obs/exp
          seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]

        # log
        if options.no_log==False:
          seq_hic_obsexp = np.log(seq_hic_obsexp)
          seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
        else:
          seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
github calico / basenji / bin / akita_data_read.py View on Github external
for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
          seq_hic_obsexp[seq_hic_nan] = np.nan          

        else: # compute local obs/exp
          seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]

        # log
        if options.no_log==False:
          seq_hic_obsexp = np.log(seq_hic_obsexp)
          seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
        else:
          seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 1,i)

        # apply kernel
        if kernel is not None:
          seq_hic = convolve(seq_hic_obsexp, kernel)
        else:
          seq_hic = seq_hic_obsexp

      else:
        # interpolate all missing bins
        seq_hic_interpolated = interp_nan(seq_hic_smoothed)

        # rescale, reclip
        seq_hic = 100000*seq_hic_interpolated
        clipval = np.nanmedian(np.diag(seq_hic,options.diagonal_offset))
        for i in range(-options.diagonal_offset+1, options.diagonal_offset):
          set_diag(seq_hic,clipval,i)
github calico / basenji / bin / akita_data_read.py View on Github external
# apply kernel
        if kernel is not None:
          seq_hic = convolve(seq_hic_obsexp, kernel)
        else:
          seq_hic = seq_hic_obsexp

      else:
        # interpolate all missing bins
        seq_hic_interpolated = interp_nan(seq_hic_smoothed)

        # rescale, reclip
        seq_hic = 100000*seq_hic_interpolated
        clipval = np.nanmedian(np.diag(seq_hic,options.diagonal_offset))
        for i in range(-options.diagonal_offset+1, options.diagonal_offset):
          set_diag(seq_hic,clipval,i)
        seq_hic = np.clip(seq_hic, 0, clipval)

        #extra smoothing. todo pass kernel specs
        if kernel is not None:
          seq_hic = convolve(seq_hic, kernel)

    except ValueError:
      print("WARNING: %s doesn't see %s. Setting to all zeros." % (genome_hic_file, mseq_str))
      seq_hic = np.zeros((seq_len_pool,seq_len_pool), dtype='float16')

    # crop
    if options.crop_bp > 0:
      seq_hic = seq_hic[crop_start:crop_end,:]
      seq_hic = seq_hic[:,crop_start:crop_end]

    # unroll upper triangular
github calico / basenji / bin / akita_data_read.py View on Github external
seq_hic_raw,
                              genome_hic_cool.matrix(balance=False).fetch(mseq_str),
                              cutoff=2, max_levels=8)
      seq_hic_nan = np.isnan(seq_hic_smoothed)
      #todo: pass an option to add a certain pseudocount value, or the minimum nonzero value

      if options.as_obsexp:
        # compute obs/exp        
        if options.global_obsexp: # compute global obs/exp
          exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
          if len(exp_chr) ==0: 
              raise ValueError('no expected values found for chr:'+mseq.chr)
          exp_map= np.zeros((seq_len_pool,seq_len_pool))
          for i in range(seq_len_pool):
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
            set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
          seq_hic_obsexp = seq_hic_smoothed / exp_map
          for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
          seq_hic_obsexp[seq_hic_nan] = np.nan          

        else: # compute local obs/exp
          seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]

        # log
        if options.no_log==False:
          seq_hic_obsexp = np.log(seq_hic_obsexp)
          seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)
          for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
        else:
          seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
          seq_hic_obsexp = interp_nan(seq_hic_obsexp)