How to use the nnmnkwii.datasets.FileDataSource function in nnmnkwii

To help you get started, we’ve selected a few nnmnkwii examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github r9y9 / nnmnkwii / tests / test_datasets.py View on Github external
def test_empty_dataset():
    class EmptyDataSource(FileDataSource):
        def collect_files(self):
            return []

        def collect_features(self, path):
            pass
    X = FileSourceDataset(EmptyDataSource())

    def __test_outof_range(X):
        print(X[0])

    # Should raise IndexError
    yield raises(IndexError)(__test_outof_range), X
github Sharad24 / Neural-Voice-Cloning-with-Few-Samples / deepvoice3_pytorch / train.py View on Github external
alignment,
        aspect='auto',
        origin='lower',
        interpolation='none')
    fig.colorbar(im, ax=ax)
    xlabel = 'Decoder timestep'
    if info is not None:
        xlabel += '\n\n' + info
    plt.xlabel(xlabel)
    plt.ylabel('Encoder timestep')
    plt.tight_layout()
    plt.savefig(path, format='png')
    plt.close()


class TextDataSource(FileDataSource):
    def __init__(self, data_root, speaker_id=None):
        self.data_root = data_root
        self.speaker_ids = None
        self.multi_speaker = False
        # If not None, filter by speaker_id
        self.speaker_id = speaker_id

    def collect_files(self):
        meta = join(self.data_root, "train.txt")
        with open(meta, "rb") as f:
            lines = f.readlines()
        l = lines[0].decode("utf-8").split("|")
        assert len(l) == 4 or len(l) == 5
        self.multi_speaker = len(l) == 5
        texts = list(map(lambda l: l.decode("utf-8").split("|")[3], lines))
        if self.multi_speaker:
github andi611 / TTS-Tacotron-Pytorch / dataloader.py View on Github external
import numpy as np
#----------------#
import torch
from torch.utils import data
from torch.autograd import Variable
#---------------------------------#
from config import config
from utils.text import text_to_sequence
#-------------------------------------#
from nnmnkwii.datasets import FileSourceDataset, FileDataSource


####################
# TEXT DATA SOURCE #
####################
class TextDataSource(FileDataSource):
	def __init__(self, data_root, meta_text):
		self.data_root = data_root
		self.meta_text = meta_text
		#self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]

	def collect_files(self):
		meta = os.path.join(self.data_root, self.meta_text)
		with open(meta, 'r', encoding='utf-8') as f:
			lines = f.readlines()
		lines = list(map(lambda l: l.split("|")[-1][:-1], lines))
		return lines

	def collect_features(self, text):
		return np.asarray(text_to_sequence(text), dtype=np.int32)
github SforAiDl / Neural-Voice-Cloning-With-Few-Samples / speaker_adaptatation-libri.py View on Github external
else:
            return texts

    def collect_features(self, *args):
        if self.multi_speaker:
            text, speaker_id = args
        else:
            text = args[0]
        seq = _frontend.text_to_sequence(text, p=hparams.replace_pronunciation_prob)
        if self.multi_speaker:
            return np.asarray(seq, dtype=np.int32), int(speaker_id)
        else:
            return np.asarray(seq, dtype=np.int32)

class _NPYDataSource(FileDataSource):
    def __init__(self, data_root,mel_or_spec, col, speaker_id=None,train_set = 0):

        self.data_root = data_root
        self.col = col
        self.frame_lengths = []
        self.speaker_id = speaker_id
        self.train_set = train_set
        self.mel_or_spec = mel_or_spec

    def collect_files(self):
        meta = join(self.data_root, "train.txt")
        with open(meta, "rb") as f:
            lines = f.readlines()
        l = lines[0].decode("utf-8").split("|")
        assert len(l) == 4 or len(l) == 5
        multi_speaker = len(l) == 5
github PaddlePaddle / models / PaddleSpeech / DeepVoice3 / deepvoice3_paddle / data.py View on Github external
text, p=hparams.replace_pronunciation_prob)

        if platform.system() == "Windows":
            if hasattr(hparams, "gc_probability"):
                _frontend = None  # memory leaking prevention in Windows
                if np.random.rand() < hparams.gc_probability:
                    gc.collect()  # garbage collection enforced
                    print("GC done")

        if self.multi_speaker:
            return np.asarray(seq, dtype=np.int32), int(speaker_id)
        else:
            return np.asarray(seq, dtype=np.int32)


class _NPYDataSource(FileDataSource):
    def __init__(self, data_root, col, speaker_id=None):
        self.data_root = data_root
        self.col = col
        self.frame_lengths = []
        self.speaker_id = speaker_id

    def collect_files(self):
        meta = join(self.data_root, "train.txt")
        with io.open(meta, "rt", encoding="utf-8") as f:
            lines = f.readlines()
        l = lines[0].split("|")
        assert len(l) == 4 or len(l) == 5
        multi_speaker = len(l) == 5
        self.frame_lengths = list(map(lambda l: int(l.split("|")[2]), lines))

        paths = list(map(lambda l: l.split("|")[self.col], lines))
github PaddlePaddle / models / PaddleSpeech / DeepVoice3 / deepvoice3_paddle / data.py View on Github external
def _pad(seq, max_len, constant_values=0):
    return np.pad(seq, (0, max_len - len(seq)),
                  mode="constant",
                  constant_values=constant_values)


def _pad_2d(x, max_len, b_pad=0):
    x = np.pad(x, [(b_pad, max_len - len(x) - b_pad), (0, 0)],
               mode="constant",
               constant_values=0)
    return x


class TextDataSource(FileDataSource):
    def __init__(self, data_root, speaker_id=None):
        self.data_root = data_root
        self.speaker_ids = None
        self.multi_speaker = False
        # If not None, filter by speaker_id
        self.speaker_id = speaker_id

    def collect_files(self):
        meta = join(self.data_root, "train.txt")
        with io.open(meta, "rt", encoding="utf-8") as f:
            lines = f.readlines()
        l = lines[0].split("|")
        assert len(l) == 4 or len(l) == 5
        self.multi_speaker = len(l) == 5
        texts = list(map(lambda l: l.split("|")[3], lines))
        if self.multi_speaker:
github Sharad24 / Neural-Voice-Cloning-with-Few-Samples / deepvoice3_pytorch / train.py View on Github external
else:
            return texts

    def collect_features(self, *args):
        if self.multi_speaker:
            text, speaker_id = args
        else:
            text = args[0]
        seq = _frontend.text_to_sequence(text, p=hparams.replace_pronunciation_prob)
        if self.multi_speaker:
            return np.asarray(seq, dtype=np.int32), int(speaker_id)
        else:
            return np.asarray(seq, dtype=np.int32)


class _NPYDataSource(FileDataSource):
    def __init__(self, data_root, col, speaker_id=None):
        self.data_root = data_root
        self.col = col
        self.frame_lengths = []
        self.speaker_id = speaker_id

    def collect_files(self):
        meta = join(self.data_root, "train.txt")
        with open(meta, "rb") as f:
            lines = f.readlines()
        l = lines[0].decode("utf-8").split("|")
        assert len(l) == 4 or len(l) == 5
        multi_speaker = len(l) == 5
        self.frame_lengths = list(
            map(lambda l: int(l.decode("utf-8").split("|")[2]), lines))
github r9y9 / wavenet_vocoder / train.py View on Github external
if c is not None:
            raise RuntimeError("WaveNet expects no conditional features, but given")


def _pad(seq, max_len, constant_values=0):
    return np.pad(seq, (0, max_len - len(seq)),
                  mode='constant', constant_values=constant_values)


def _pad_2d(x, max_len, b_pad=0, constant_values=0):
    x = np.pad(x, [(b_pad, max_len - len(x) - b_pad), (0, 0)],
               mode="constant", constant_values=constant_values)
    return x


class _NPYDataSource(FileDataSource):
    def __init__(self, data_root, col, speaker_id=None,
                 train=True, test_size=0.05, test_num_samples=None, random_state=1234):
        self.data_root = data_root
        self.col = col
        self.lengths = []
        self.speaker_id = speaker_id
        self.multi_speaker = False
        self.speaker_ids = None
        self.train = train
        self.test_size = test_size
        self.test_num_samples = test_num_samples
        self.random_state = random_state

    def interest_indices(self, paths):
        indices = np.arange(len(paths))
        if self.test_size is None:
github r9y9 / nnmnkwii / nnmnkwii / datasets / vcc2016.py View on Github external
# coding: utf-8
from __future__ import with_statement, print_function, absolute_import

from nnmnkwii.datasets import FileDataSource

import numpy as np
from os.path import join, splitext, isdir
from os import listdir

# List of available speakers.
available_speakers = [
    "SF1", "SF2", "SF3", "SM1", "SM2", "TF1", "TF2", "TM1", "TM2", "TM3"]


class WavFileDataSource(FileDataSource):
    """Wav file data source for Voice Conversion Challenge (VCC) 2016 dataset.

    The data source collects wav files from VCC2016 dataset.
    Users are expected to inherit the class and implement ``collect_features``
    method, which defines how features are computed given a wav file path.

    .. note::
        VCC2016 datasets are composed of training data and evaluation data,
        which can be downloaded separately. ``data_root`` should point to the
        directory that contains both the training and evaluation data.


    Directory structure should look like for example:

    .. code-block:: shell
github mertcokluk / GlotNET / t.py View on Github external
if c is not None:
            raise RuntimeError("WaveNet expects no conditional features, but given")


def _pad(seq, max_len, constant_values=0):
    return np.pad(seq, (0, max_len - len(seq)),
                  mode='constant', constant_values=constant_values)


def _pad_2d(x, max_len, b_pad=0):
    x = np.pad(x, [(b_pad, max_len - len(x) - b_pad), (0, 0)],
               mode="constant", constant_values=0)
    return x


class _NPYDataSource(FileDataSource):
    def __init__(self, data_root, col, speaker_id=None,
                 train=True, test_size=0.05, test_num_samples=None, random_state=1234):
        self.data_root = data_root
        self.col = col
        self.lengths = []
        self.speaker_id = speaker_id
        self.multi_speaker = False
        self.speaker_ids = None
        self.train = train
        self.test_size = test_size
        self.test_num_samples = test_num_samples
        self.random_state = random_state

    def interest_indices(self, paths):
        indices = np.arange(len(paths))
        if self.test_size is None: