How to use the asrtoolkit.data_structures.segment.segment function in asrtoolkit

To help you get started, we’ve selected a few asrtoolkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github finos / greenkey-asrtoolkit / asrtoolkit / data_handlers / stm.py View on Github external
def parse_line(line):
    """
    :param line: str; a single line of an stm file
    :return: segment object if STM file line contains accurately formatted data; else None
    """
    data = line.strip().split()

    seg = None
    if len(data) > 6:
        filename, channel, speaker, start, stop, label = data[:6]
        text = " ".join(data[6:])
        seg = segment({
            "filename": filename,
            "channel": channel,
            "speaker": speaker,
            "start": start,
            "stop": stop,
            "label": label,
            "text": text,
        })
    return seg if (seg is not None) and seg.validate() else None
github finos / greenkey-asrtoolkit / asrtoolkit / data_handlers / html.py View on Github external
def parse_line(line):
    " parse a single line of an html file"
    cols = line.findAll("td")
    seg = None
    if cols:
        start_stop, speaker, text = [[val for val in col.children][0]
                                     for col in cols]
        start, stop = start_stop[1:-1].split(" - ")
        seg = segment({
            "speaker": speaker,
            "start": start,
            "stop": stop,
            "text": text
        })
        seg = seg if seg.validate() else None
    return seg
github finos / greenkey-asrtoolkit / asrtoolkit / data_handlers / txt.py View on Github external
def read_in_memory(input_data):
    """
    Reads input text
    """
    segments = []
    for line in input_data.splitlines():
        segments.append(segment({"text": line.strip()}))
    return segments
github finos / greenkey-asrtoolkit / asrtoolkit / data_handlers / json.py View on Github external
seg = None
    try:
        assign_if_present("channel")
        assign_if_present("startTimeSec", "start")
        assign_if_present("stopTimeSec", "stop")
        assign_if_present("endTimeSec", "stop")
        assign_if_present("transcript", "text")
        assign_if_present("corrected_transcript", "text")
        assign_if_present("formatted_transcript", "formatted_text")
        assign_if_present("punctuated_transcript", "formatted_text")
        assign_if_present("speakerInfo", "speaker", "ID")
        assign_if_present("genderInfo", "label", "gender",
                          lambda gender: "".format(gender))
        assign_if_present("confidence", "confidence")

        seg = segment(extracted_dict)

    except Exception as exc:
        LOGGER.exception(exc)

    return seg if seg and seg.validate() else None
github finos / greenkey-asrtoolkit / asrtoolkit / data_handlers / webvtt_common.py View on Github external
def read_caption(caption):
    """
    Parses caption object to return a segment object
    """
    seg = None

    try:
        start = caption.start_in_seconds
        stop = caption.end_in_seconds

        text = re.sub(non_transcript_marks, lambda v: "",
                      caption.text.strip()).strip()

        seg = segment({"start": start, "stop": stop, "text": text})
    except Exception as exc:
        seg = None
        LOGGER.exception(exc)

    return seg if seg and seg.validate() else None