Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _detect_block(self):
self.cleaning_text()
ngrams = self._extract_ngrams()
if not ngrams:
raise LangDetectException(ErrorCode.CantDetectError, 'No features in text.')
self.langprob = [0.0] * len(self.langlist)
self.random.seed(self.seed)
for t in xrange(self.n_trial):
prob = self._init_probability()
alpha = self.alpha + self.random.gauss(0.0, 1.0) * self.ALPHA_WIDTH
i = 0
while True:
self._update_lang_prob(prob, self.random.choice(ngrams), alpha)
if i % 5 == 0:
if self._normalize_prob(prob) > self.CONV_THRESHOLD or i >= self.ITERATION_LIMIT:
break
if self.verbose:
six.print_('>', self._sort_probability(prob))
def add_profile(self, profile, index, langsize):
lang = profile.name
if lang in self.langlist:
raise LangDetectException(ErrorCode.DuplicateLangError, 'Duplicate the same language profile.')
self.langlist.append(lang)
for word in profile.freq:
if word not in self.word_lang_prob_map:
self.word_lang_prob_map[word] = [0.0] * langsize
length = len(word)
if 1 <= length <= 3:
prob = 1.0 * profile.freq.get(word) / profile.n_words[length - 1]
self.word_lang_prob_map[word][index] = prob
def load_profile(self, profile_directory):
list_files = os.listdir(profile_directory)
if not list_files:
raise LangDetectException(ErrorCode.NeedLoadProfileError, 'Not found profile: ' + profile_directory)
langsize, index = len(list_files), 0
for filename in list_files:
if filename.startswith('.'):
continue
filename = path.join(profile_directory, filename)
if not path.isfile(filename):
continue
f = None
try:
if sys.version_info[0] < 3:
f = open(filename, 'r')
else:
f = open(filename, 'r', encoding='utf-8')
json_data = json.load(f)