How to use the smt.utils.utility.matrix function in smt

To help you get started, we’ve selected a few smt examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kenkov / smt / smt / ibmmodel / ibmmodel2.py View on Github external
("私 は 女 です", "I am a girl"),
                     ("私 は 先生 です", "I am a teacher"),
                     ("彼女 は 先生 です", "She is a teacher"),
                     ("彼 は 先生 です", "He is a teacher"),
                     ]
    >>> t, a = train(sentences, loop_count=1000)
    >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a)
    |x| | | |
    | | |x| |
    | | | |x|
    | | |x| |
    '''
    max_a = viterbi_alignment(es, fs, t, a).items()
    m = len(es)
    n = len(fs)
    return utility.matrix(m, n, max_a, es, fs)
github kenkov / smt / smt / phrase / word_alignment.py View on Github external
return alignment(es, fs, e2f, f2e)


if __name__ == '__main__':
    # test for alignment
    es = "michael assumes that he will stay in the house".split()
    fs = "michael geht davon aus , dass er im haus bleibt".split()
    e2f = [(1, 1), (2, 2), (2, 3), (2, 4), (3, 6),
           (4, 7), (7, 8), (9, 9), (6, 10)]
    f2e = [(1, 1), (2, 2), (3, 6), (4, 7), (7, 8),
           (8, 8), (9, 9), (5, 10), (6, 10)]
    from smt.utils.utility import matrix
    print(matrix(len(es), len(fs), e2f, es, fs))
    print(matrix(len(es), len(fs), f2e, es, fs))
    ali = _alignment(es, fs, e2f, f2e)
    print(matrix(len(es), len(fs), ali, es, fs))

    # test for symmetrization
    from smt.utils.utility import mkcorpus
    sentenses = [("僕 は 男 です", "I am a man"),
                 ("私 は 女 です", "I am a girl"),
                 ("私 は 先生 です", "I am a teacher"),
                 ("彼女 は 先生 です", "She is a teacher"),
                 ("彼 は 先生 です", "He is a teacher"),
                 ]
    corpus = mkcorpus(sentenses)
    es = "私 は 先生 です".split()
    fs = "I am a teacher".split()
    syn = symmetrization(es, fs, corpus)
    pprint(syn)
    print(matrix(len(es), len(fs), syn, es, fs))
github kenkov / smt / smt / phrase / word_alignment.py View on Github external
e2f_train = ibmmodel2._train(e2f_corpus, loop_count=10)
    e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items()

    return alignment(es, fs, e2f, f2e)


if __name__ == '__main__':
    # test for alignment
    es = "michael assumes that he will stay in the house".split()
    fs = "michael geht davon aus , dass er im haus bleibt".split()
    e2f = [(1, 1), (2, 2), (2, 3), (2, 4), (3, 6),
           (4, 7), (7, 8), (9, 9), (6, 10)]
    f2e = [(1, 1), (2, 2), (3, 6), (4, 7), (7, 8),
           (8, 8), (9, 9), (5, 10), (6, 10)]
    from smt.utils.utility import matrix
    print(matrix(len(es), len(fs), e2f, es, fs))
    print(matrix(len(es), len(fs), f2e, es, fs))
    ali = _alignment(es, fs, e2f, f2e)
    print(matrix(len(es), len(fs), ali, es, fs))

    # test for symmetrization
    from smt.utils.utility import mkcorpus
    sentenses = [("僕 は 男 です", "I am a man"),
                 ("私 は 女 です", "I am a girl"),
                 ("私 は 先生 です", "I am a teacher"),
                 ("彼女 は 先生 です", "She is a teacher"),
                 ("彼 は 先生 です", "He is a teacher"),
                 ]
    corpus = mkcorpus(sentenses)
    es = "私 は 先生 です".split()
    fs = "I am a teacher".split()
    syn = symmetrization(es, fs, corpus)
github kenkov / smt / smt / phrase / word_alignment.py View on Github external
e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items()

    return alignment(es, fs, e2f, f2e)


if __name__ == '__main__':
    # test for alignment
    es = "michael assumes that he will stay in the house".split()
    fs = "michael geht davon aus , dass er im haus bleibt".split()
    e2f = [(1, 1), (2, 2), (2, 3), (2, 4), (3, 6),
           (4, 7), (7, 8), (9, 9), (6, 10)]
    f2e = [(1, 1), (2, 2), (3, 6), (4, 7), (7, 8),
           (8, 8), (9, 9), (5, 10), (6, 10)]
    from smt.utils.utility import matrix
    print(matrix(len(es), len(fs), e2f, es, fs))
    print(matrix(len(es), len(fs), f2e, es, fs))
    ali = _alignment(es, fs, e2f, f2e)
    print(matrix(len(es), len(fs), ali, es, fs))

    # test for symmetrization
    from smt.utils.utility import mkcorpus
    sentenses = [("僕 は 男 です", "I am a man"),
                 ("私 は 女 です", "I am a girl"),
                 ("私 は 先生 です", "I am a teacher"),
                 ("彼女 は 先生 です", "She is a teacher"),
                 ("彼 は 先生 です", "He is a teacher"),
                 ]
    corpus = mkcorpus(sentenses)
    es = "私 は 先生 です".split()
    fs = "I am a teacher".split()
    syn = symmetrization(es, fs, corpus)
    pprint(syn)
github kenkov / smt / smt / phrase / word_alignment.py View on Github external
print(matrix(len(es), len(fs), ali, es, fs))

    # test for symmetrization
    from smt.utils.utility import mkcorpus
    sentenses = [("僕 は 男 です", "I am a man"),
                 ("私 は 女 です", "I am a girl"),
                 ("私 は 先生 です", "I am a teacher"),
                 ("彼女 は 先生 です", "She is a teacher"),
                 ("彼 は 先生 です", "He is a teacher"),
                 ]
    corpus = mkcorpus(sentenses)
    es = "私 は 先生 です".split()
    fs = "I am a teacher".split()
    syn = symmetrization(es, fs, corpus)
    pprint(syn)
    print(matrix(len(es), len(fs), syn, es, fs))