Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def extract_xlsx(filename, target_folder):
"""
For an excel spreadsheet, extract to a text file
"""
working_excel_data_structure = pd.ExcelFile(filename)
raw_name = sanitize(strip_extension(basename(filename)))
with open("".join([target_folder, "/", raw_name, ".txt"]),
"a+") as output_file:
for sheet in working_excel_data_structure.sheet_names:
dump_sheet(output_file,
working_excel_data_structure.parse(sheet).values)
CLI for forced alignment tools
Using a reference txt file and a hypothesis gk json
file, this time-aligns the reference txt file
and outputs an STM file
Input
ref_txt, str - reference text file containing ground truth
json_file, str - hypothesis gk JSON file
filename, str - output STM filename
"""
ref_tokens = preprocess_txt.parse_transcript(ref_txt)
gk_json = preprocess_gk_json.preprocess_transcript(json_file)
segments = align(gk_json, ref_tokens)
if filename is None:
filename = basename(sanitize(strip_extension(ref_txt))) + ".stm"
# fix segment filename and speaker
for seg in segments:
seg.filename = strip_extension(filename)
seg.speaker = strip_extension(filename) + "UnknownSpeaker"
output = time_aligned_text()
output.segments = segments
output.write(filename)