How to use the asteroid.data.avspeech_dataset.AVSpeechDataset function in asteroid

To help you get started, we’ve selected a few asteroid examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mpariente / AsSteroid / egs / avspeech / looking-to-listen / train.py View on Github external
def main(conf):
    config = ParamConfig(
        conf["training"]["batch_size"],
        conf["training"]["epochs"],
        conf["training"]["num_workers"],
        cuda=True,
        use_half=False,
        learning_rate=conf["optim"]["lr"],
    )

    dataset = AVSpeechDataset(
        Path("data/train.csv"), Path(EMBED_DIR), conf["main_args"]["n_src"]
    )
    val_dataset = AVSpeechDataset(
        Path("data/val.csv"), Path(EMBED_DIR), conf["main_args"]["n_src"]
    )

    model, optimizer = make_model_and_optimizer(conf)
    print(
        f"AVFusion has {sum(np.prod(i.shape) for i in model.parameters()):,} parameters"
    )

    criterion = DiscriminativeLoss()

    model_path = Path(conf["main_args"]["exp_dir"]) / "checkpoints" / "best_full.pth"
    if model_path.is_file():
        print("Loading saved model...")
github mpariente / AsSteroid / egs / avspeech / looking-to-listen / eval.py View on Github external
def main(conf):
    config = ParamConfig(
        conf["training"]["batch_size"],
        conf["training"]["epochs"],
        conf["training"]["num_workers"],
        cuda=True,
        use_half=False,
        learning_rate=conf["optim"]["lr"],
    )

    val_dataset = AVSpeechDataset(
        Path("data/val.csv"), Path(EMBED_DIR), conf["main_args"]["n_src"]
    )

    model = load_best_model(conf, conf["main_args"]["exp_dir"])

    print(
        f"AVFusion has {sum(np.prod(i.shape) for i in model.parameters()):,} parameters"
    )

    validate(model, val_dataset, config)
github mpariente / AsSteroid / egs / avspeech / looking-to-listen / train.py View on Github external
def main(conf):
    config = ParamConfig(
        conf["training"]["batch_size"],
        conf["training"]["epochs"],
        conf["training"]["num_workers"],
        cuda=True,
        use_half=False,
        learning_rate=conf["optim"]["lr"],
    )

    dataset = AVSpeechDataset(
        Path("data/train.csv"), Path(EMBED_DIR), conf["main_args"]["n_src"]
    )
    val_dataset = AVSpeechDataset(
        Path("data/val.csv"), Path(EMBED_DIR), conf["main_args"]["n_src"]
    )

    model, optimizer = make_model_and_optimizer(conf)
    print(
        f"AVFusion has {sum(np.prod(i.shape) for i in model.parameters()):,} parameters"
    )

    criterion = DiscriminativeLoss()

    model_path = Path(conf["main_args"]["exp_dir"]) / "checkpoints" / "best_full.pth"
    if model_path.is_file():
        print("Loading saved model...")
        resume = model_path.as_posix()
    else:
        resume = None
github mpariente / AsSteroid / egs / avspeech / looking-to-listen / train / metric_utils.py View on Github external
Calculate the Signal-to-Distortion Ratio
        from two signals

        Args:
            pred_signal (torch.Tensor): predicted signal spectrogram.
            true_signal (torch.Tensor): original signal spectrogram.

    """
    n_sources = pred_signal.shape[0]

    y_pred_wav = np.zeros((n_sources, 48_000))
    y_wav = np.zeros((n_sources, 48_000))

    for i in range(n_sources):
        y_pred_wav[i] = AVSpeechDataset.decode(pred_signal[i, ...]).numpy()
        y_wav[i] = AVSpeechDataset.decode(true_signal[i, ...]).numpy()
    sdr, sir, sar, _ = mir_eval.separation.bss_eval_sources(y_wav, y_pred_wav)

    return sdr
github mpariente / AsSteroid / egs / avspeech / looking-to-listen / train / metric_utils.py View on Github external
"""
        Calculate the Signal-to-Distortion Ratio
        from two signals

        Args:
            pred_signal (torch.Tensor): predicted signal spectrogram.
            true_signal (torch.Tensor): original signal spectrogram.

    """
    n_sources = pred_signal.shape[0]

    y_pred_wav = np.zeros((n_sources, 48_000))
    y_wav = np.zeros((n_sources, 48_000))

    for i in range(n_sources):
        y_pred_wav[i] = AVSpeechDataset.decode(pred_signal[i, ...]).numpy()
        y_wav[i] = AVSpeechDataset.decode(true_signal[i, ...]).numpy()
    sdr, sir, sar, _ = mir_eval.separation.bss_eval_sources(y_wav, y_pred_wav)

    return sdr