How to use the sacremoses.truecase.MosesDetruecaser function in sacremoses

To help you get started, we’ve selected a few sacremoses examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alvations / sacremoses / sacremoses / cli.py View on Github external
def detruecase_file(processes, is_headline, encoding, quiet):
    moses = MosesDetruecaser()
    moses_detruecase = partial(
        moses.detruecase, return_str=True, is_headline=is_headline
    )
    with click.get_text_stream("stdin", encoding=encoding) as fin:
        with click.get_text_stream("stdout", encoding=encoding) as fout:
            # If it's single process, joblib parallization is slower,
            # so just process line by line normally.
            if processes == 1:
                for line in tqdm(fin.readlines()):
                    print(moses_detruecase(line), end="\n", file=fout)
            else:
                for outline in parallelize_preprocess(
                    moses_detruecase, fin.readlines(), processes, progress_bar=(not quiet)
                ):
                    print(outline, end="\n", file=fout)
github alvations / sacremoses / sacremoses / cli.py View on Github external
def detruecase_file(processes, is_headline, encoding, quiet):
    moses = MosesDetruecaser()
    moses_detruecase = partial(
        moses.detruecase, return_str=True, is_headline=is_headline
    )
    with click.get_text_stream("stdin", encoding=encoding) as fin:
        with click.get_text_stream("stdout", encoding=encoding) as fout:
            # If it's single process, joblib parallization is slower,
            # so just process line by line normally.
            if processes == 1:
                for line in tqdm(fin.readlines()):
                    print(moses_detruecase(line), end="\n", file=fout)
            else:
                for outline in parallelize_preprocess(
                    moses_detruecase, fin.readlines(), processes, progress_bar=(not quiet)
                ):
                    print(outline, end="\n", file=fout)
github alvations / sacremoses / sacremoses / truecase.py View on Github external
def __init__(self):
        # Initialize the object.
        super(MosesDetruecaser, self).__init__()
        self.SENT_END = {".", ":", "?", "!"}
        self.DELAYED_SENT_START = {
            "(",
            "[",
            '"',
            "'",
            "'",
            """,
            "[",
            "]",
        }

        # Some predefined words that will always be in lowercase.
        self.ALWAYS_LOWER = {
            "a",
            "after",