How to use the asrtoolkit.data_structures.audio_file.audio_file function in asrtoolkit

To help you get started, we’ve selected a few asrtoolkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / audio_file.py View on Github external
def prepare_for_training(self, file_name, sample_rate=16000):
        """
        Converts to single channel (from channel 1) audio file
        in SPH file format
        Returns audio_file object on success, else None
        """
        if file_name.split(".")[-1] != "sph":
            LOGGER.warning(
                "Forcing training data to use SPH file format for %s",
                file_name)
            file_name = strip_extension(file_name) + ".sph"

        file_name = sanitize_hyphens(file_name)

        # return None if error code given, otherwise return audio_file object
        output_file = (audio_file(file_name) if not subprocess.call(
            "sox -V1 {} {} rate {} remix -".format(self.location, file_name,
                                                   sample_rate),
            shell=True,
        ) else None)

        return output_file
github finos / greenkey-asrtoolkit / asrtoolkit / combine_audio_files.py View on Github external
def check_audio_file(audio_file_name):
    if valid_input_file(audio_file_name, ["mp3", "sph", "wav", "au", "raw"]):
        return audio_file(audio_file_name)
    else:
        LOGGER.error("Invalid audio file {}".format(audio_file_name))
        sys.exit(1)
github finos / greenkey-asrtoolkit / asrtoolkit / split_audio_file.py View on Github external
def split_audio_file(source_audio_file,
                     source_transcript,
                     target_directory="split"):
    """
    Split source audio file into segments denoted by transcript file
    into target_directory
    Results in stm and sph files in target directory
    """
    source_audio = audio_file(source_audio_file)
    transcript = time_aligned_text(source_transcript)
    source_audio.split(transcript, target_directory)
github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / corpus.py View on Github external
exemplar({
                    "audio_file":
                    audio_file(fl),
                    "transcript_file":
                    time_aligned_text(strip_extension(fl) + ".stm"),
                }) for audio_extension in audio_extensions_to_try
                for fl in (get_files(self.location, audio_extension) if self.
                           location else [])
                if (os.path.exists(strip_extension(fl) + ".stm"))
            ]

            # gather all exemplars from /stm and /sph subdirectories if present
            self.exemplars += [
                exemplar({
                    "audio_file":
                    audio_file(fl),
                    "transcript_file":
                    time_aligned_text(self.location + "/stm/" +
                                      basename(strip_extension(fl)) + ".stm"),
                }) for audio_extension in audio_extensions_to_try for fl in
                (get_files(self.location +
                           "/sph/", audio_extension) if self.location else [])
                if (os.path.exists(self.location + "/stm/" +
                                   basename(strip_extension(fl)) + ".stm"))
            ]