Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def parse_line(line):
"""
:param line: str; a single line of an stm file
:return: segment object if STM file line contains accurately formatted data; else None
"""
data = line.strip().split()
seg = None
if len(data) > 6:
filename, channel, speaker, start, stop, label = data[:6]
text = " ".join(data[6:])
seg = segment({
"filename": filename,
"channel": channel,
"speaker": speaker,
"start": start,
"stop": stop,
"label": label,
"text": text,
})
return seg if (seg is not None) and seg.validate() else None
def parse_line(line):
" parse a single line of an html file"
cols = line.findAll("td")
seg = None
if cols:
start_stop, speaker, text = [[val for val in col.children][0]
for col in cols]
start, stop = start_stop[1:-1].split(" - ")
seg = segment({
"speaker": speaker,
"start": start,
"stop": stop,
"text": text
})
seg = seg if seg.validate() else None
return seg
def read_in_memory(input_data):
"""
Reads input text
"""
segments = []
for line in input_data.splitlines():
segments.append(segment({"text": line.strip()}))
return segments
seg = None
try:
assign_if_present("channel")
assign_if_present("startTimeSec", "start")
assign_if_present("stopTimeSec", "stop")
assign_if_present("endTimeSec", "stop")
assign_if_present("transcript", "text")
assign_if_present("corrected_transcript", "text")
assign_if_present("formatted_transcript", "formatted_text")
assign_if_present("punctuated_transcript", "formatted_text")
assign_if_present("speakerInfo", "speaker", "ID")
assign_if_present("genderInfo", "label", "gender",
lambda gender: "".format(gender))
assign_if_present("confidence", "confidence")
seg = segment(extracted_dict)
except Exception as exc:
LOGGER.exception(exc)
return seg if seg and seg.validate() else None
def read_caption(caption):
"""
Parses caption object to return a segment object
"""
seg = None
try:
start = caption.start_in_seconds
stop = caption.end_in_seconds
text = re.sub(non_transcript_marks, lambda v: "",
caption.text.strip()).strip()
seg = segment({"start": start, "stop": stop, "text": text})
except Exception as exc:
seg = None
LOGGER.exception(exc)
return seg if seg and seg.validate() else None