Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def check_transcript(transcript):
if valid_input_file(transcript):
return time_aligned_text(input_data=transcript)
else:
LOGGER.error("Invalid transcript file {}".format(transcript))
sys.exit(1)
def degrade_audio(source_audio_file, target_audio_file=None):
"""
Degrades audio to typical G711 level.
Useful if models need to target this audio quality.
"""
valid_input_file(source_audio_file, ["mp3", "sph", "wav", "au", "raw"])
target_audio_file = (source_audio_file
if target_audio_file is None else target_audio_file)
# degrade to 8k
tmp1 = ".".join(source_audio_file.split(".")[:-1]) + "_tmp1.wav"
subprocess.call(
"sox -V1 {} -r 8000 -e a-law {}".format(source_audio_file, tmp1),
shell=True,
)
# convert to u-law
tmp2 = ".".join(source_audio_file.split(".")[:-1]) + "_tmp2.wav"
subprocess.call(
"sox -V1 {} --rate 8000 -e u-law {}".format(tmp1, tmp2),
shell=True,
def validate_audio_file(source_audio_file):
if not valid_input_file(source_audio_file,
["mp3", "sph", "wav", "au", "raw"]):
LOGGER.error("Invalid audio file %s", source_audio_file)
sys.exit(1)
def check_audio_file(audio_file_name):
if valid_input_file(audio_file_name, ["mp3", "sph", "wav", "au", "raw"]):
return audio_file(audio_file_name)
else:
LOGGER.error("Invalid audio file {}".format(audio_file_name))
sys.exit(1)
def validate_transcript(transcript):
"""
Exit if invalid transcript
"""
if not valid_input_file(transcript):
LOGGER.error("Invalid transcript file %s", transcript)
sys.exit(1)
def clean_text_file(*input_text_files):
"""
Cleans input *.txt files and outputs *_cleaned.txt
"""
for input_text_file in input_text_files:
if not valid_input_file(input_text_file, valid_extensions=["txt"]):
LOGGER.error(
"File %s does not end in .txt - please only use this for cleaning txt files",
input_text_file,
)
continue
clean_one_file(input_text_file)
LOGGER.info("File output: %s",
input_text_file.replace(".txt", "_cleaned.txt"))
def degrade_all_files(*audio_files):
"""
Degrade all audio files given as arguments (in place by default)
"""
for file_name in audio_files:
if valid_input_file(file_name, ["mp3", "sph", "wav", "au", "raw"]):
degrade_audio(file_name)
else:
LOGGER.error("Invalid input file %s", file_name)