How to use the ngram.knlm.NGram function in ngram

To help you get started, we’ve selected a few ngram examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shuyo / iir / ngram / knlm.py View on Github external
parser = optparse.OptionParser()
    parser.add_option("-n", dest="ngram", type="int", help="n-gram", default=7)
    parser.add_option("-d", dest="discount", type="float", help="discount parameter of Knerser-Ney", default=0.5)
    parser.add_option("-i", dest="numgen", type="int", help="number of texts to generate", default=100)
    parser.add_option("-e", dest="encode", help="character code of input file(s)", default='utf-8')
    parser.add_option("-o", dest="output", help="output filename", default="generated.txt")
    parser.add_option("--seed", dest="seed", type="int", help="random seed")
    (opt, args) = parser.parse_args()

    numpy.random.seed(opt.seed)

    START = u"\u0001"
    END = u"\u0002"

    ngram = NGram(opt.ngram)
    gen = Generator(ngram)
    for filename in args:
        with codecs.open(filename, "rb", opt.encode) as f:
            for s in f:
                s = s.strip()
                if len(s) == 0: continue
                s = START + s + END
                gen.start()
                for c in s:
                    gen.inc(c)

    D = opt.discount
    with codecs.open(opt.output, "wb", "utf-8") as f:
        for n in xrange(opt.numgen):
            st = START
            for i in xrange(1000):
github shuyo / iir / ngram / knlm.py View on Github external
def inc(self, v):
        if self.depth <= self.N:
            if v not in self:
                self[v] = NGram(self.N, self.depth + 1)
            self[v].freq += 1
            return self[v]
    def dump(self):