How to use the konlpy.tag.Kkma function in konlpy

To help you get started, we’ve selected a few konlpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github konlpy / konlpy / test / test_kkma.py View on Github external
def kkma_instance():
    from konlpy.tag import Kkma
    k = Kkma()
    return k
github TensorMSA / tensormsa / cluster / common / common_node.py View on Github external
def _kkma_parse(self, str_arr, tag_combine=True):
        """

        :param h5file:
        :return:
        """
        kkma = Kkma()
        return_arr = []
        for data in str_arr:
            return_arr = return_arr + self._flat(kkma.pos(str(data)), tag_combine=tag_combine)
        return return_arr
github Kyubyong / word2word / make.py View on Github external
def load_tokenizer(lang):
    if lang=="en":
        from nltk.tokenize import word_tokenize as wt
        tokenizer = wt
    elif lang=="ko":
        from konlpy.tag import Kkma
        tokenizer = Kkma()
    elif lang=="ja":
        import Mykytea
        opt="-model jp-0.4.7-1.mod"
        tokenizer = Mykytea.Mykytea(opt)
    elif lang=="zh_cn":
        import Mykytea
        opt = "-model ctb-0.4.0-1.mod"
        tokenizer = Mykytea.Mykytea(opt)
    elif lang=="zh_tw":
        import jieba
        tokenizer = jieba
    elif lang=="vi":
        from pyvi import ViTokenizer
        tokenizer = ViTokenizer
    elif lang=="th":
        from pythainlp.tokenize import word_tokenize
github miso-belica / sumy / sumy / nlp / tokenizers.py View on Github external
def tokenize(self, text):
        try:
            from konlpy.tag import Kkma
        except ImportError as e:
            raise ValueError("Korean tokenizer requires konlpy. Please, install it by command 'pip install konlpy'.")
        kkma = Kkma()
        return kkma.sentences(text)
github ratsgo / embedding / preprocess / supervised_nlputils.py View on Github external
def get_tokenizer(tokenizer_name):
    if tokenizer_name == "komoran":
        tokenizer = Komoran()
    elif tokenizer_name == "okt":
        tokenizer = Okt()
    elif tokenizer_name == "mecab":
        tokenizer = Mecab()
    elif tokenizer_name == "hannanum":
        tokenizer = Hannanum()
    elif tokenizer_name == "kkma":
        tokenizer = Kkma()
    elif tokenizer_name == "khaiii":
        tokenizer = KhaiiiApi()
    else:
        tokenizer = Mecab()
    return tokenizer
github bbayoung / korean-text-complexity / analyzer.py View on Github external
def analyzer(message):
    kkma = Kkma()

    sentences = kkma.sentences(message)

    max_score = 0
    total_score = 0
    total_line = 0
    for sentence in sentences:
        morphemes = kkma.pos(sentence)
        print('Sentence : {}'.format(sentence))
        print('Morphemes : {}'.format(morphemes))
        labels = []
        score = 0
        for idx, morpheme in enumerate(morphemes):
            if morpheme[1] in rule_database or '{}_'.format(morpheme[1][:-1]) in rule_database:
                if morpheme[1] in rule_database:
                    current_rule = rule_database[morpheme[1]]