How to use pyknp - 10 common examples

To help you get started, we’ve selected a few pyknp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Kensuke-Mitsuzawa / JapaneseTokenizers / test / test_juman_wrapper_python2.py View on Github external
def test_juman_wrapper(self):
        try:
            from pyknp import Juman

            juman = Juman()
            result = juman.analysis(u"これはペンです。")
            logger.debug(','.join(mrph.midasi for mrph in result))

            for mrph in result.mrph_list():
                assert isinstance(mrph, pyknp.Morpheme)
                logger.debug(u"見出し:%s, 読み:%s, 原形:%s, 品詞:%s, 品詞細分類:%s, 活用型:%s, 活用形:%s, 意味情報:%s, 代表表記:%s" \
                  % (mrph.midasi, mrph.yomi, mrph.genkei, mrph.hinsi, mrph.bunrui, mrph.katuyou1, mrph.katuyou2, mrph.imis, mrph.repname))
        except ImportError:
            logger.debug('skip test_juman_wrapper')
github Kensuke-Mitsuzawa / JapaneseTokenizers / test / test_juman_wrapper_python2.py View on Github external
def test_juman_wrapper(self):
        try:
            from pyknp import Juman

            juman = Juman()
            result = juman.analysis(u"これはペンです。")
            logger.debug(','.join(mrph.midasi for mrph in result))

            for mrph in result.mrph_list():
                assert isinstance(mrph, pyknp.Morpheme)
                logger.debug(u"見出し:%s, 読み:%s, 原形:%s, 品詞:%s, 品詞細分類:%s, 活用型:%s, 活用形:%s, 意味情報:%s, 代表表記:%s" \
                  % (mrph.midasi, mrph.yomi, mrph.genkei, mrph.hinsi, mrph.bunrui, mrph.katuyou1, mrph.katuyou2, mrph.imis, mrph.repname))
        except ImportError:
            logger.debug('skip test_juman_wrapper')
github ku-nlp / pyknp / pyknp / juman / simple.py View on Github external
#-*- encoding: utf-8 -*-

from __future__ import absolute_import
from pyknp import Juman
import unittest

JUMAN = Juman()


def juman(input_str):
    return JUMAN.analysis(input_str)


class SimpleTest(unittest.TestCase):

    def test(self):
        test_str = u"この文を解析してください。"
        result = juman(test_str)
        self.assertEqual(len(result), 7)
        self.assertEqual(''.join(mrph.midasi for mrph in result), test_str)
        self.assertGreaterEqual(len(result.spec().split("\n")), 7)

if __name__ == '__main__':
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / jumanpp_wrapper / __jumanpp_wrapper_python3.py View on Github external
- pyknp.MList
        """
        # type: (six.text_type)->MList
        if isinstance(self.jumanpp_obj, Jumanpp):
            ml_token_object = self.jumanpp_obj.analysis(input_str=input_str)
        elif isinstance(self.jumanpp_obj, JumanppHnadler):
            try:
                result_token = self.jumanpp_obj.query(input_string=input_str)
            except UnicodeDecodeError:
                logger.warning(msg="Process is down by some reason. It restarts process automatically.")
                self.jumanpp_obj.restart_process()
                result_token = self.jumanpp_obj.query(input_string=input_str)
            ml_token_object = MList(result_token)
        elif isinstance(self.jumanpp_obj, JumanppClient):
            server_response = self.jumanpp_obj.query(sentence=input_str, pattern=self.eos_pattern)
            ml_token_object = MList(server_response)
        else:
            raise Exception('Not defined')

        return ml_token_object
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / jumanpp_wrapper / __jumanpp_wrapper_python2.py View on Github external
* Output
        - pyknp.MList
        """
        if isinstance(self.jumanpp_obj, Jumanpp):
            ml_token_object = self.jumanpp_obj.analysis(input_str=input_str)
        elif isinstance(self.jumanpp_obj, JumanppHnadler):
            try:
                result_token = self.jumanpp_obj.query(input_string=input_str)
            except UnicodeDecodeError:
                logger.warning(msg="Process is down by some reason. It restarts process automatically.")
                self.jumanpp_obj.restart_process()
                result_token = self.jumanpp_obj.query(input_string=input_str)
            ml_token_object = MList(result_token)
        elif isinstance(self.jumanpp_obj, JumanppClient):
            server_response = self.jumanpp_obj.query(sentence=input_str, pattern=self.eos_pattern)
            ml_token_object = MList(server_response)
        else:
            raise Exception('Not defined')

        return ml_token_object
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / jumanpp_wrapper / jumanpp_wrapper.py View on Github external
logger.warning("Re-starting unix process because it takes longer time than {} seconds...".format(self.jumanpp_obj.timeout_second))
                self.jumanpp_obj.restart_process()
                self.jumanpp_obj.query(self.dummy_text)
                result_token = self.jumanpp_obj.query(input_string=input_str)
                ml_token_object = MList(result_token)
            except UnicodeDecodeError:
                logger.warning(msg="Process is down by some reason. It restarts process automatically.")
                self.jumanpp_obj.restart_process()
                self.jumanpp_obj.query(self.dummy_text)
                result_token = self.jumanpp_obj.query(input_string=input_str)
                ml_token_object = MList(result_token)
            else:
                ml_token_object = MList(result_token)
        elif isinstance(self.jumanpp_obj, JumanppClient):
            server_response = self.jumanpp_obj.query(sentence=input_str, pattern=self.eos_pattern)
            ml_token_object = MList(server_response)
        else:
            raise Exception('Not defined')

        return ml_token_object
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / jumanpp_wrapper / jumanpp_wrapper.py View on Github external
elif isinstance(self.jumanpp_obj, JumanppHnadler):
            try:
                result_token = self.jumanpp_obj.query(input_string=input_str)
            except ProcessDownException:
                """Unix process is down by any reason."""
                logger.warning("Re-starting unix process because it takes longer time than {} seconds...".format(self.jumanpp_obj.timeout_second))
                self.jumanpp_obj.restart_process()
                self.jumanpp_obj.query(self.dummy_text)
                result_token = self.jumanpp_obj.query(input_string=input_str)
                ml_token_object = MList(result_token)
            except UnicodeDecodeError:
                logger.warning(msg="Process is down by some reason. It restarts process automatically.")
                self.jumanpp_obj.restart_process()
                self.jumanpp_obj.query(self.dummy_text)
                result_token = self.jumanpp_obj.query(input_string=input_str)
                ml_token_object = MList(result_token)
            else:
                ml_token_object = MList(result_token)
        elif isinstance(self.jumanpp_obj, JumanppClient):
            server_response = self.jumanpp_obj.query(sentence=input_str, pattern=self.eos_pattern)
            ml_token_object = MList(server_response)
        else:
            raise Exception('Not defined')

        return ml_token_object
github ku-nlp / pyknp / pyknp / juman / jumanpp.py View on Github external
def jumanpp(self, input_str):
        assert(isinstance(input_str, six.text_type))
        result = MList(self.jumanpp_lines(input_str))
        return result
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / jumanpp_wrapper / jumanpp_wrapper.py View on Github external
result_token = self.jumanpp_obj.query(input_string=input_str)
            except ProcessDownException:
                """Unix process is down by any reason."""
                logger.warning("Re-starting unix process because it takes longer time than {} seconds...".format(self.jumanpp_obj.timeout_second))
                self.jumanpp_obj.restart_process()
                self.jumanpp_obj.query(self.dummy_text)
                result_token = self.jumanpp_obj.query(input_string=input_str)
                ml_token_object = MList(result_token)
            except UnicodeDecodeError:
                logger.warning(msg="Process is down by some reason. It restarts process automatically.")
                self.jumanpp_obj.restart_process()
                self.jumanpp_obj.query(self.dummy_text)
                result_token = self.jumanpp_obj.query(input_string=input_str)
                ml_token_object = MList(result_token)
            else:
                ml_token_object = MList(result_token)
        elif isinstance(self.jumanpp_obj, JumanppClient):
            server_response = self.jumanpp_obj.query(sentence=input_str, pattern=self.eos_pattern)
            ml_token_object = MList(server_response)
        else:
            raise Exception('Not defined')

        return ml_token_object
github Kensuke-Mitsuzawa / JapaneseTokenizers / JapaneseTokenizer / jumanpp_wrapper / __jumanpp_wrapper_python2.py View on Github external
"""* What you can do
        - You call Juman tokenizer interface.

        * Output
        - pyknp.MList
        """
        if isinstance(self.jumanpp_obj, Jumanpp):
            ml_token_object = self.jumanpp_obj.analysis(input_str=input_str)
        elif isinstance(self.jumanpp_obj, JumanppHnadler):
            try:
                result_token = self.jumanpp_obj.query(input_string=input_str)
            except UnicodeDecodeError:
                logger.warning(msg="Process is down by some reason. It restarts process automatically.")
                self.jumanpp_obj.restart_process()
                result_token = self.jumanpp_obj.query(input_string=input_str)
            ml_token_object = MList(result_token)
        elif isinstance(self.jumanpp_obj, JumanppClient):
            server_response = self.jumanpp_obj.query(sentence=input_str, pattern=self.eos_pattern)
            ml_token_object = MList(server_response)
        else:
            raise Exception('Not defined')

        return ml_token_object