How to use the sacremoses.truecase.MosesTruecaser function in sacremoses

To help you get started, we’ve selected a few sacremoses examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alvations / sacremoses / sacremoses / cli.py View on Github external
def truecase_file(modelfile, processes, is_asr, encoding, quiet):
    moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr, encoding=encoding)
    moses_truecase = partial(moses.truecase, return_str=True)
    with click.get_text_stream("stdin", encoding=encoding) as fin:
        with click.get_text_stream("stdout", encoding=encoding) as fout:
            fin = fin if quiet else tqdm(fin)
            for line in fin:
                print(moses.truecase(line, return_str=True), end="\n", file=fout)
            # FIXME: parallelize job don't work properly for MosesTruecaser.truecase
github alvations / sacremoses / sacremoses / cli.py View on Github external
def truecase_file(modelfile, processes, is_asr, encoding, quiet):
    moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr, encoding=encoding)
    moses_truecase = partial(moses.truecase, return_str=True)
    with click.get_text_stream("stdin", encoding=encoding) as fin:
        with click.get_text_stream("stdout", encoding=encoding) as fout:
            fin = fin if quiet else tqdm(fin)
            for line in fin:
                print(moses.truecase(line, return_str=True), end="\n", file=fout)
            # FIXME: parallelize job don't work properly for MosesTruecaser.truecase
github alvations / sacremoses / sacremoses / cli.py View on Github external
def train_truecaser(modelfile, processes, is_asr, possibly_use_first_token, encoding, quiet):
    moses = MosesTruecaser(is_asr=is_asr, encoding=encoding)
    with click.get_text_stream("stdin", encoding=encoding) as fin:
        model = moses.train_from_file_object(
            fin,
            possibly_use_first_token=possibly_use_first_token,
            processes=processes,
            progress_bar=(not quiet),
        )
        moses.save_model(modelfile)
github alvations / sacremoses / sacremoses / cli.py View on Github external
def train_truecaser(modelfile, processes, is_asr, possibly_use_first_token, encoding, quiet):
    moses = MosesTruecaser(is_asr=is_asr, encoding=encoding)
    with click.get_text_stream("stdin", encoding=encoding) as fin:
        model = moses.train_from_file_object(
            fin,
            possibly_use_first_token=possibly_use_first_token,
            processes=processes,
            progress_bar=(not quiet),
        )
        moses.save_model(modelfile)
github alvations / sacremoses / sacremoses / truecase.py View on Github external
def __init__(self, load_from=None, is_asr=None, encoding="utf8"):
        """
        :param load_from:
        :type load_from:

        :param is_asr: A flag to indicate that model is for ASR. ASR input has
            no case, make sure it is lowercase, and make sure known are cased
            eg. 'i' to be uppercased even if i is known.
        :type is_asr: bool
        """
        # Initialize the object.
        super(MosesTruecaser, self).__init__()
        # Initialize the language specific nonbreaking prefixes.
        self.SKIP_LETTERS_REGEX = re.compile(
            u"[{}{}{}]".format(
                self.Lowercase_Letter, self.Uppercase_Letter, self.Titlecase_Letter
            )
        )

        self.XML_SPLIT_REGX = re.compile("(<.*(?<=>))(.*)((?=]*>)")

        self.SENT_END = {".", ":", "?", "!"}
        self.DELAYED_SENT_START = {
            "(",
            "[",
            '"',
            "'",
            "'",