How to use the ngram.LetterModels.Letters function in ngram

To help you get started, we’ve selected a few ngram examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Ezhil-Language-Foundation / open-tamil / ngram / LetterModels.py View on Github external
def __init__(self,filename):
        Letters.__init__(self,filename)
github Ezhil-Language-Foundation / open-tamil / ngram / LetterModels.py View on Github external
pass

    def __unicode__( self ):
        op = u""
        for lett,freq in self.letter.items():
            op = op + u"%s => %d\n"%(lett,freq)
        print(max(self.letter.values()))
        return op
    
    def update_file(self,filename):
        self.corpus = Corpus( filename )
        
    def save(self,filename):
        raise Exception('Not implemented')
    
class Unigram(Letters):
    def __init__(self,filename):
        Letters.__init__(self,filename)
        
    def frequency_model( self ):
        """ build a letter frequency model for Tamil letters from a corpus """
        # use a generator in corpus
        for next_letter in self.corpus.next_tamil_letter():
            # update frequency from corpus
            self.letter[next_letter] = self.letter[next_letter] + 1
    
    def save(self,filename):
        with codecs.open(filename,"w","utf-8") as fp:
            for k,v in sorted(self.letter.items(),key=operator.itemgetter(1),reverse=True):
                if v == 0:
                    continue
                fp.write(u"%s - %d\n"%(k,v))