How to use konlpy - 10 common examples

To help you get started, we’ve selected a few konlpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github konlpy / konlpy / test / test_kkma.py View on Github external
def kkma_instance():
    from konlpy.tag import Kkma
    k = Kkma()
    return k
github konlpy / konlpy / test / test_openkoreantext.py View on Github external
def tkorean_instance():
    from konlpy.tag import Okt
    t = Okt()
    return t
github konlpy / konlpy / test / test_hannanum.py View on Github external
def hannanum_instance():
    from konlpy import init_jvm
    from konlpy.tag import Hannanum
    init_jvm()
    h = Hannanum()
    return h
github konlpy / konlpy / test / test_komoran.py View on Github external
def komoran_instance():
        from konlpy.tag import Komoran
        k = Komoran()
        return k
github konlpy / konlpy / test / test_stream_daum.py View on Github external
def test_daum_streamer():
    daum = DaumStreamer()
    daum.options.n_limits = 1
    daum.options.display_rank = True
    daum.options.verbose = True
    daum.options.interval = 3
    daum.stream()
github konlpy / konlpy / test / test_stream_naver.py View on Github external
def test_naver_streamer():
    naver = NaverStreamer()
    naver.options.n_limits = 1
    naver.options.display_rank = True
    naver.options.verbose = True
    naver.options.interval = 3
    naver.stream()
github konlpy / konlpy / test / test_corpus.py View on Github external
def test_corpus_kolaw():
    from konlpy.corpus import kolaw

    fids = kolaw.fileids()

    kolaw.abspath()
    kolaw.abspath(fids[0])

    assert kolaw.name == 'kolaw'
    assert kolaw.open('constitution.txt').read(10) ==\
            u'\ub300\ud55c\ubbfc\uad6d\ud5cc\ubc95\n\n\uc720\uad6c'
github TensorMSA / tensormsa / cluster / generator / ner_augmentation.py View on Github external
def _conv_type_b(self, idx):
        """
        
        :return: 
        """
        df_csv_read = pd.read_csv(self.pattern_data_path,
                                  skipinitialspace=True,
                                  engine="python",
                                  encoding='utf-8-sig')

        i = 0
        for key, line in zip(df_csv_read['decode'].values, df_csv_read['encode'].values) :
            words = []
            if (self.use_mecab):
                self.mecab = Mecab('/usr/local/lib/mecab/dic/mecab-ko-dic')
                pos = self.mecab.pos(line)
                for word, tag in pos:
                    words.append(word)
            else:
                words = str(line).split(' ')
            match_keys = self._check_all_match(words)
            aug_data = self._aug_sent(match_keys, words, [])
            self._intent_formatter(aug_data, key, idx)

            if(i%100 == 0) :
                print("====Therad{0} : {1} line job done".format(idx, i))
            i = i + 1
github konlpy / konlpy / konlpy / stream / twitter.py View on Github external
filename = "{}{}{}.{}".format(
                self.dirname,
                self.options.output_prefix,
                word_count,
                self.options.output_extension
            )

            n_word_file = io.open(filename, 'a', encoding='utf-8')
            n_word_file.write(tweet)
            n_word_file.write("\n")

            if self.options.verbose:
                for word in self.words:
                    tweet = (colorama.Fore.CYAN + word).join(tweet.split(word))
                    tweet = (word + colorama.Fore.RESET).join(tweet.split(word))
                pprint(word_count, tweet)
github MSWon / Sentimental-Analysis / Word2Vec / Word2Vec_train.py View on Github external
import tensorflow as tf
import numpy as np
import codecs

os.chdir("C:\\Users\\jbk48\\Desktop\\Sentimental-Analysis-master\\Sentimental-Analysis-master\\Word2Vec\\Movie_rating_data")

def read_data(filename):    
    with open(filename, 'r',encoding='utf-8') as f:
        data = [line.split('\t') for line in f.read().splitlines()]        
        data = data[1:]   # header 제외 #    
    return data 
    
train_data = read_data('ratings_train.txt') 
test_data = read_data('ratings_test.txt') 

pos_tagger = Twitter() 

def tokenize(doc):

    return ['/'.join(t) for t in pos_tagger.pos(doc, norm=True, stem=True)]


## training Word2Vec model using skip-gram   
tokens = [tokenize(row[1]) for row in train_data]
model = gensim.models.Word2Vec(size=300,sg = 1, alpha=0.025,min_alpha=0.025, seed=1234)
model.build_vocab(tokens)
    
for epoch in range(30):
           
    model.train(tokens,model.corpus_count,epochs = model.iter)
    model.alpha -= 0.002
    model.min_alpha = model.alpha