How to use the langdetect.utils.ngram.NGram function in langdetect

To help you get started, we’ve selected a few langdetect examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Mimino666 / langdetect / langdetect / detector.py View on Github external
def _extract_ngrams(self):
        '''Extract n-grams from target text.'''
        RANGE = list(xrange(1, NGram.N_GRAM + 1))

        result = []
        ngram = NGram()
        for ch in self.text:
            ngram.add_char(ch)
            if ngram.capitalword:
                continue
            for n in RANGE:
                # optimized w = ngram.get(n)
                if len(ngram.grams) < n:
                    break
                w = ngram.grams[-n:]
                if w and w != ' ' and w in self.word_lang_prob_map:
                    result.append(w)
        return result
github Mimino666 / langdetect / langdetect / utils / lang_profile.py View on Github external
def add(self, gram):
        '''Add n-gram to profile.'''
        if self.name is None or gram is None:  # Illegal
            return
        length = len(gram)
        if length < 1 or length > NGram.N_GRAM:  # Illegal
            return
        self.n_words[length - 1] += 1
        self.freq[gram] += 1
github Mimino666 / langdetect / langdetect / detector.py View on Github external
def _extract_ngrams(self):
        '''Extract n-grams from target text.'''
        RANGE = list(xrange(1, NGram.N_GRAM + 1))

        result = []
        ngram = NGram()
        for ch in self.text:
            ngram.add_char(ch)
            if ngram.capitalword:
                continue
            for n in RANGE:
                # optimized w = ngram.get(n)
                if len(ngram.grams) < n:
                    break
                w = ngram.grams[-n:]
                if w and w != ' ' and w in self.word_lang_prob_map:
                    result.append(w)
        return result
github Mimino666 / langdetect / langdetect / utils / lang_profile.py View on Github external
def update(self, text):
        '''Update the language profile with (fragmented) text.
        Extract n-grams from text and add their frequency into the profile.
        '''
        if text is None:
            return
        text = NGram.normalize_vi(text)
        gram = NGram()
        for ch in text:
            gram.add_char(ch)
            for n in xrange(1, NGram.N_GRAM+1):
                self.add(gram.get(n))
github Mimino666 / langdetect / langdetect / utils / ngram.py View on Github external
messages.get_string('NGram.KANJI_7_29'),
        messages.get_string('NGram.KANJI_7_32'),
        messages.get_string('NGram.KANJI_7_33'),
        messages.get_string('NGram.KANJI_7_35'),
        messages.get_string('NGram.KANJI_7_37')]

    CJK_MAP = {}

    @classmethod
    def _init_cjk_map(cls):
        for cjk_list in cls.CJK_CLASS:
            representative = cjk_list[0]
            for ch in cjk_list:
                cls.CJK_MAP[ch] = representative

NGram._init_cjk_map()