Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def append(self, text):
'''Append the target text for language detection.
If the total size of target text exceeds the limit size specified by
Detector.set_max_text_length(int), the rest is cut down.
'''
text = self.URL_RE.sub(' ', text)
text = self.MAIL_RE.sub(' ', text)
text = NGram.normalize_vi(text)
pre = 0
for i in xrange(min(len(text), self.max_text_length)):
ch = text[i]
if ch != ' ' or pre != ' ':
self.text += ch
pre = ch
def update(self, text):
'''Update the language profile with (fragmented) text.
Extract n-grams from text and add their frequency into the profile.
'''
if text is None:
return
text = NGram.normalize_vi(text)
gram = NGram()
for ch in text:
gram.add_char(ch)
for n in xrange(1, NGram.N_GRAM+1):
self.add(gram.get(n))