How to use the asrtoolkit.data_structures.formatting.clean_float function in asrtoolkit

To help you get started, we’ve selected a few asrtoolkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / segment.py View on Github external
Class for holding segment-specific information
    Segment objects corresponds to dict under the key 'segment'
    in the ASR generated transcript (lattice)
    - the fields included below are shared across 'segments'
      but 'segments' may contain many other fields (i.e. sentiment) depending
      on the the text processing pipeline selected.
    """

    # refer to some file if possible
    filename = "unknown"
    # by default, use channel 1
    channel = "1"
    # need a speaker id
    speaker = "UnknownSpeaker"
    # start at beginning of file
    start = clean_float(0.0)
    # this should go the length of the file or the segment
    stop = clean_float(0.0)

    # Arbitrarily choose a default gender since
    # unknown does not play well with some programs
    # which digest ASR output
    label = ""
    # text to be populated from read class
    text = ""
    # text for printing out to fancy output formats
    formatted_text = ""
    # confidence in accuracy of text
    confidence = 1.0

    def __init__(self, *args, **kwargs):
        """
github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / segment.py View on Github external
def validate(self):
        """
        Checks for common failure cases for if a line is valid or not
        """
        valid = (self.speaker != "inter_segment_gap" and self.text
                 and self.text != "ignore_time_segment_in_scoring"
                 and self.label in ["", ""])

        try:
            self.start = clean_float(self.start)
            self.stop = clean_float(self.stop)
        except Exception as exc:
            valid = False
            print(exc)

        if not valid:
            LOGGER.error(
                """Skipping segment due to validation error.
Please note that this invalidates WER calculations based on the entire file.
Segment: %s""",
                json.dumps(self.__dict__),
            )

        if "-" in self.filename:
            self.filename = self.filename.replace("-", "_")
            print(
                "Please rename audio file to replace hyphens with underscores")
github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / segment.py View on Github external
in the ASR generated transcript (lattice)
    - the fields included below are shared across 'segments'
      but 'segments' may contain many other fields (i.e. sentiment) depending
      on the the text processing pipeline selected.
    """

    # refer to some file if possible
    filename = "unknown"
    # by default, use channel 1
    channel = "1"
    # need a speaker id
    speaker = "UnknownSpeaker"
    # start at beginning of file
    start = clean_float(0.0)
    # this should go the length of the file or the segment
    stop = clean_float(0.0)

    # Arbitrarily choose a default gender since
    # unknown does not play well with some programs
    # which digest ASR output
    label = ""
    # text to be populated from read class
    text = ""
    # text for printing out to fancy output formats
    formatted_text = ""
    # confidence in accuracy of text
    confidence = 1.0

    def __init__(self, *args, **kwargs):
        """
        Stores and initializes filename, channel, speaker, start & stop times,
        label, and formatted and unformatted text fields.
github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / segment.py View on Github external
def validate(self):
        """
        Checks for common failure cases for if a line is valid or not
        """
        valid = (self.speaker != "inter_segment_gap" and self.text
                 and self.text != "ignore_time_segment_in_scoring"
                 and self.label in ["", ""])

        try:
            self.start = clean_float(self.start)
            self.stop = clean_float(self.stop)
        except Exception as exc:
            valid = False
            print(exc)

        if not valid:
            LOGGER.error(
                """Skipping segment due to validation error.
Please note that this invalidates WER calculations based on the entire file.
Segment: %s""",
                json.dumps(self.__dict__),
            )

        if "-" in self.filename:
            self.filename = self.filename.replace("-", "_")
            print(