How to use the espnet.utils.cli_writers.KaldiWriter function in espnet

To help you get started, we’ve selected a few espnet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github didi / delta / utils / speech / compute_pitch_feats.py View on Github external
config['resample_freq'] = args.resample_freq
  config['delta_pitch'] = args.delta_pitch
  config['nccf_ballast'] = args.nccf_ballast
  config['lowpass_filter_width'] = args.lowpass_filter_width
  config['upsample_filter_width'] = args.upsample_filter_width
  config['max_frames_latency'] = args.max_frames_latency
  config['frames_per_chunk'] = args.frames_per_chunk
  config['simulate_first_pass_online'] = args.simulate_first_pass_online
  config['recompute_frame'] = args.recompute_frame
  config['nccf_ballast_online'] = args.nccf_ballast_online

  pitch = Pitch.params(config).instantiate()

  with kaldiio.ReadHelper(args.rspecifier,
                          segments=args.segments) as reader, \
        KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames,
                    compress=args.compress, compression_method=args.compression_method) as writer:
    for utt_id, (sample_rate, array) in reader:
      if sample_rate != args.sample_rate:
        args.sample_rate = sample_rate
      array = array.astype(np.float32)
      audio_data = tf.constant(array, dtype=tf.float32)
      pitch_test = tf.squeeze(pitch(audio_data, args.sample_rate))
      sess = tf.Session()
      pitch_feats = pitch_test.eval(session=sess)
      writer[utt_id] = pitch_feats
github didi / delta / utils / speech / compute_plp_feats.py View on Github external
def compute_plp():
  parser = get_parser()
  args = parser.parse_args()

  config = {}
  config['sample_rate'] = int(args.sample_rate)
  config['plp_order'] = int(args.plp_order)
  config['window_length'] = args.window_length
  config['frame_length'] = args.frame_length

  plp = Plp.params(config).instantiate()

  with kaldiio.ReadHelper(args.rspecifier,
                          segments=args.segments) as reader, \
        KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames,
                    compress=args.compress, compression_method=args.compression_method) as writer:
    for utt_id, (sample_rate, array) in reader:
      if sample_rate != args.sample_rate:
        args.sample_rate = sample_rate
      array = array.astype(np.float32)
      audio_data = tf.constant(array, dtype=tf.float32)
      plp_test = plp(audio_data, args.sample_rate)
      sess = tf.Session()
      plp_feats = plp_test.eval(session=sess)
      writer[utt_id] = plp_feats
github didi / delta / utils / speech / compute_fbank_pitch.py View on Github external
config = {}
  config['sample_rate'] = float(args.sample_rate)
  config['upper_frequency_limit'] = float(args.upper_frequency_limit)
  config['lower_frequency_limit'] = float(args.lower_frequency_limit)
  config['filterbank_channel_count'] = float(args.filterbank_channel_count)
  config['window_length'] = args.window_length
  config['frame_length'] = args.frame_length
  config['thres_autoc'] = args.thres_autoc
  config['output_type'] = args.output_type

  fbank_pitch = FbankPitch.params(config).instantiate()

  with kaldiio.ReadHelper(args.rspecifier,
                          segments=args.segments) as reader, \
        KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames,
                    compress=args.compress, compression_method=args.compression_method) as writer:
    for utt_id, (sample_rate, array) in reader:
      if sample_rate != args.sample_rate:
        args.sample_rate = sample_rate
      array = array.astype(np.float32)
      audio_data = tf.constant(array, dtype=tf.float32)
      fbank_pitch_test = fbank_pitch(audio_data, args.sample_rate)
      sess = tf.Session()
      fbank_pitch_feats = fbank_pitch_test.eval(session=sess)
      writer[utt_id] = fbank_pitch_feats
github didi / delta / utils / speech / compute_mfcc_feats.py View on Github external
config['frame_length'] = args.frame_length
  config['output_type'] = args.output_type
  config['window_type'] = args.window_type
  config['snip_edges'] = args.snip_edges
  config['preeph_coeff'] = args.preeph_coeff
  config['remove_dc_offset'] = args.remove_dc_offset
  config['is_fbank'] = args.is_fbank
  config['cepstral_lifter'] = args.cepstral_lifter
  config['coefficient_count'] = args.coefficient_count
  config['use_energy'] = args.use_energy

  mfcc = Mfcc.params(config).instantiate()

  with kaldiio.ReadHelper(args.rspecifier,
                          segments=args.segments) as reader, \
        KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames,
                    compress=args.compress, compression_method=args.compression_method) as writer:
    for utt_id, (sample_rate, array) in reader:
      if sample_rate != args.sample_rate:
        args.sample_rate = sample_rate
      array = array.astype(np.float32)
      audio_data = tf.constant(array, dtype=tf.float32)
      mfcc_test = tf.squeeze(mfcc(audio_data, args.sample_rate))
      sess = tf.Session()
      mfcc_feats = mfcc_test.eval(session=sess)
      writer[utt_id] = mfcc_feats
github didi / delta / utils / speech / compute_cmvn_stats.py View on Github external
cmvn_stats = {}
  for spk in counts:
    feat_shape = sum_feats[spk].shape
    cmvn_shape = (2, feat_shape[0] + 1) + feat_shape[1:]
    _cmvn_stats = np.empty(cmvn_shape, dtype=np.float64)
    _cmvn_stats[0, :-1] = sum_feats[spk]
    _cmvn_stats[1, :-1] = square_sum_feats[spk]

    _cmvn_stats[0, -1] = counts[spk]
    _cmvn_stats[1, -1] = 0.

    cmvn_stats[spk] = _cmvn_stats

  if is_wspecifier:
    with KaldiWriter(args.wspecifier_or_wxfilename) as writer:
      for spk, mat in cmvn_stats.items():
        writer[spk] = mat
  else:
    matrix = cmvn_stats[None]
    kaldiio.save_mat(args.wspecifier_or_wxfilename, matrix)
github didi / delta / utils / speech / compute_spectrum_feats.py View on Github external
def compute_spectrum():
  parser = get_parser()
  args = parser.parse_args()

  config = {}
  config['sample_rate'] = float(args.sample_rate)
  config['output_type'] = int(args.output_type)
  config['window_length'] = args.window_length
  config['frame_length'] = args.frame_length

  spectrum = Spectrum.params(config).instantiate()

  with kaldiio.ReadHelper(args.rspecifier,
                          segments=args.segments) as reader, \
        KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames,
                    compress=args.compress, compression_method=args.compression_method) as writer:
    for utt_id, (sample_rate, array) in reader:
      if sample_rate != args.sample_rate:
        args.sample_rate = sample_rate
      array = array.astype(np.float32)
      audio_data = tf.constant(array, dtype=tf.float32)
      spectrum_test = spectrum(audio_data, args.sample_rate)
      sess = tf.compat.v1.Session()
      spectrum_feats = spectrum_test.eval(session=sess)
      writer[utt_id] = spectrum_feats
github didi / delta / utils / speech / compute_fbank_feats.py View on Github external
args = parser.parse_args()

  config = {}
  config['sample_rate'] = float(args.sample_rate)
  config['upper_frequency_limit'] = float(args.upper_frequency_limit)
  config['lower_frequency_limit'] = float(args.lower_frequency_limit)
  config['filterbank_channel_count'] = float(args.filterbank_channel_count)
  config['window_length'] = args.window_length
  config['frame_length'] = args.frame_length
  config['output_type'] = args.output_type

  fbank = Fbank.params(config).instantiate()

  with kaldiio.ReadHelper(args.rspecifier,
                          segments=args.segments) as reader, \
        KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames,
                    compress=args.compress, compression_method=args.compression_method) as writer:
    for utt_id, (sample_rate, array) in reader:
      if sample_rate != args.sample_rate:
        args.sample_rate = sample_rate
      array = array.astype(np.float32)
      audio_data = tf.constant(array, dtype=tf.float32)
      fbank_test = tf.squeeze(fbank(audio_data, args.sample_rate))
      sess = tf.compat.v1.Session()
      fbank_feats = fbank_test.eval(session=sess)
      writer[utt_id] = fbank_feats
github didi / delta / utils / speech / compute_stft_feats.py View on Github external
def compute_stft():
  parser = get_parser()
  args = parser.parse_args()

  config = {}
  config['sample_rate'] = int(args.sample_rate)
  config['window_length'] = args.window_length
  config['frame_length'] = args.frame_length

  stft = Analyfiltbank.params(config).instantiate()

  with kaldiio.ReadHelper(args.rspecifier,
                          segments=args.segments) as reader, \
        KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames,
                    compress=args.compress, compression_method=args.compression_method) as writer:
    for utt_id, (sample_rate, array) in reader:
      if sample_rate != args.sample_rate:
        args.sample_rate = sample_rate
      array = array.astype(np.float32)
      audio_data = tf.constant(array, dtype=tf.float32)
      power_spectrum, phase_spectrum = stft(audio_data, args.sample_rate)
      sess = tf.Session()
      if args.output_type == 1:
        out_feats = power_spectrum.eval(session=sess)
      else:
        out_feats = phase_spectrum.eval(session=sess)
      writer[utt_id] = out_feats