How to use the langdetect.DetectorFactory function in langdetect

To help you get started, we’ve selected a few langdetect examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Lab41 / altair / altair / util / determine_spoken_language.py View on Github external
def determine_spoken_language(comments):
    # Detect spoken language for a string of comments extracted from source code
    # Input: comments (string)
    # Output: language (string) as a ISO 639-1 code (ex: 'en')

    # DetectorFactory Seed forces deterministic results on language assessment
    DetectorFactory.seed = 0
    language = "unknown" 

    try:
        # Attempt language detection
        language = detect(comments)
    except Exception as e:
        # Return "unknown" if there is not enough information to detect the language
        if e.__class__.__name__ in ['LangDetectException']:
            pass
        # Log unexpected error
        else:
            logger.info(e.__class__.__name__,"-",e)

    return language
github BLKSerene / Wordless / src / wordless_utils / wordless_detection.py View on Github external
import re

from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *

import chardet
import cchardet
import langdetect
import langid

from wordless_text import wordless_matching
from wordless_utils import wordless_conversion, wordless_misc

# Force consistent results for language detection
langdetect.DetectorFactory.seed = 0

def detect_encoding(main, file_path):
    text = b''
    success = True

    with open(file_path, 'rb') as f:
        if main.settings_custom['auto_detection']['detection_settings']['number_lines_no_limit']:
            for line in f:
                text += line
        else:
            for i, line in enumerate(f):
                if i < main.settings_custom['auto_detection']['detection_settings']['number_lines']:
                    text += line
                else:
                    break
github SmokinCaterpillar / TrufflePig / trufflepig / filters / stylemeasures.py View on Github external
def __init__(self, max_length=5000, seed=42):
        self.max_length = max_length
        self.factory = langdetect.DetectorFactory()
        self.factory.set_seed(seed)
        self.factory.load_profile(langdetect.PROFILES_DIRECTORY)