How to use the pyknp.Juman function in pyknp

To help you get started, we’ve selected a few pyknp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Kensuke-Mitsuzawa / JapaneseTokenizers / test / test_juman_wrapper_python2.py View on Github external
def test_juman_wrapper(self):
        try:
            from pyknp import Juman

            juman = Juman()
            result = juman.analysis(u"これはペンです。")
            logger.debug(','.join(mrph.midasi for mrph in result))

            for mrph in result.mrph_list():
                assert isinstance(mrph, pyknp.Morpheme)
                logger.debug(u"見出し:%s, 読み:%s, 原形:%s, 品詞:%s, 品詞細分類:%s, 活用型:%s, 活用形:%s, 意味情報:%s, 代表表記:%s" \
                  % (mrph.midasi, mrph.yomi, mrph.genkei, mrph.hinsi, mrph.bunrui, mrph.katuyou1, mrph.katuyou2, mrph.imis, mrph.repname))
        except ImportError:
            logger.debug('skip test_juman_wrapper')
github ku-nlp / pyknp / pyknp / juman / simple.py View on Github external
#-*- encoding: utf-8 -*-

from __future__ import absolute_import
from pyknp import Juman
import unittest

JUMAN = Juman()


def juman(input_str):
    return JUMAN.analysis(input_str)


class SimpleTest(unittest.TestCase):

    def test(self):
        test_str = u"この文を解析してください。"
        result = juman(test_str)
        self.assertEqual(len(result), 7)
        self.assertEqual(''.join(mrph.midasi for mrph in result), test_str)
        self.assertGreaterEqual(len(result.spec().split("\n")), 7)

if __name__ == '__main__':
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / jumanpp_wrapper / jumanpp_wrapper.py View on Github external
def call_juman_interface(self, input_str):
        # type: (text_type) -> MList
        """* What you can do
        - You call Juman tokenizer interface.

        * Output
        - pyknp.MList
        """
        if isinstance(self.jumanpp_obj, Juman):
            ml_token_object = self.jumanpp_obj.analysis(input_str=input_str)
        elif isinstance(self.jumanpp_obj, JumanppHnadler):
            try:
                result_token = self.jumanpp_obj.query(input_string=input_str)
            except ProcessDownException:
                """Unix process is down by any reason."""
                logger.warning("Re-starting unix process because it takes longer time than {} seconds...".format(self.jumanpp_obj.timeout_second))
                self.jumanpp_obj.restart_process()
                self.jumanpp_obj.query(self.dummy_text)
                result_token = self.jumanpp_obj.query(input_string=input_str)
                ml_token_object = MList(result_token)
            except UnicodeDecodeError:
                logger.warning(msg="Process is down by some reason. It restarts process automatically.")
                self.jumanpp_obj.restart_process()
                self.jumanpp_obj.query(self.dummy_text)
                result_token = self.jumanpp_obj.query(input_string=input_str)
github axinc-ai / ailia-models / bert / bert.py View on Github external
def text2token(text, tokenizer, lang='en'):
    # convert a text to tokens which can be interpreted in BERT model
    if lang == 'en':
        text = text.replace('_', '[MASK]')
        masked_text = "[CLS] " + text + " [SEP]"
        tokenized_text = tokenizer.tokenize(masked_text)
        indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

    elif lang == 'jp':
        jumanapp = Juman()
        juman_res = jumanapp.analysis(text)
        tokenized_text = [mrph.midasi for mrph in juman_res.mrph_list()]
        tokenized_text.insert(0, '[CLS]')
        tokenized_text.append('[SEP]')
        tokenized_text = [
            '[MASK]' if token == '_' else token for token in tokenized_text
        ]
        indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

    masked_index = tokenized_text.index('[MASK]')
    segments_ids = [0] * len(tokenized_text)
    tokens_ts = np.array([indexed_tokens])
    segments_ts = np.array([segments_ids])

    # input length fixed by max_seq_len
    # (ailia should manage adoptable input size)
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / juman_wrapper / juman_wrapper.py View on Github external
def call_juman_interface(self, input_str):
        # type: (text_type)->MList
        if isinstance(self.juman, pyknp.Juman):
            result = self.juman.analysis(input_str)
            return result
        elif isinstance(self.juman, JumanppHnadler):
            try:
                result_analysis = self.juman.query(input_str)
            except UnicodeDecodeError:
                logger.warning(msg="Process is down by some reason. It restarts process automatically.")
                self.juman.restart_process()
                result_analysis = self.juman.query(input_string=input_str)
            return MList(result_analysis)
        else:
            raise Exception('Not defined.')
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / juman_wrapper / juman_wrapper.py View on Github external
pass
            self.is_use_pyknp = True
        else:
            pass

        if server is not None:
            # use server mode #
            self.juman = pyknp.Juman(command=command, server=server, port=port,
                                     timeout=self.timeout, rcfile=rcfile, option=option,
                                     pattern=pattern, jumanpp=False, **args)
            if six.PY3:
                # It overwrites juman_lines() method #
                self.juman.juman_lines = self.__monkey_patch_juman_lines
        elif is_use_pyknp and server is None:
            # use unix process with pyknp
            self.juman = pyknp.Juman(command=command, server=server, port=port,
                                     timeout=self.timeout, rcfile=rcfile, option=option,
                                     pattern=pattern, jumanpp=False, **args)
        else:
            # use unix process with pexpect(RECOMMENDED) #
            self.juman = JumanppHnadler(jumanpp_command=command,
                                        option=self.option,
                                        pattern=self.pattern,
                                        timeout_second=self.timeout)

pyknp

Python module for JUMAN/KNP

See COPYING
Latest version published 3 years ago

Package Health Score

52 / 100
Full package analysis

Similar packages