How to use the sudachipy.dictionarylib.grammar.Grammar function in SudachiPy

To help you get started, we’ve selected a few SudachiPy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github WorksApplications / SudachiPy / tests / mock_grammar.py View on Github external
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from unittest import mock

from sudachipy.dictionarylib.charactercategory import CharacterCategory
from sudachipy.dictionarylib.grammar import Grammar

mocked_grammar = mock.Mock(spec=Grammar)
mocked_grammar.get_part_of_speech_size.return_value = 0
mocked_grammar.get_part_of_speech_string.return_value = None
mocked_grammar.get_part_of_speech_id.return_value = 0
mocked_grammar.get_connect_cost.return_value = 0
# mocked_grammar.set_connect_cost.return_value = None
mocked_grammar.get_bos_parameter.return_value = None
mocked_grammar.get_eos_parameter.return_value = None


def mocked_get_character_category():
    cat = CharacterCategory()
    test_resources_dir = os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        os.pardir,
        'sudachipy',
        'resources')
github WorksApplications / SudachiPy / tests / dictionarylib / test_dictionarybuilder.py View on Github external
import mmap
        from sudachipy import dictionarylib
        buffers = []
        if filename is None:
            raise AttributeError("system dictionary is not specified")
        with open(filename, 'r+b') as system_dic:
            bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
        buffers.append(bytes_)

        offset = 0
        header = dictionarylib.dictionaryheader.DictionaryHeader.from_bytes(bytes_, offset)
        if header.version != SYSTEM_DICT_VERSION:
            raise Exception("invalid system dictionary")
        offset += header.storage_size()

        grammar = dictionarylib.grammar.Grammar(bytes_, offset)
        offset += grammar.get_storage_size()

        lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
        return buffers, header, grammar, lexicon
github megagonlabs / ginza / sudachipy / dictionary.py View on Github external
def read_system_dictionary(self, filename):
        if filename is None:
            raise AttributeError("system dictionary is not specified")
        with open(filename, 'r+b') as system_dic:
            bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
        self.buffers.append(bytes_)

        offset = 0
        self.header = dictionarylib.dictionaryheader.DictionaryHeader.from_bytes(bytes_, offset)
        if self.header.version != DictionaryVersion.SYSTEM_DICT_VERSION:
            raise Exception("invalid system dictionary")
        offset += self.header.storage_size()

        self.grammar = dictionarylib.grammar.Grammar(bytes_, offset)
        offset += self.grammar.get_storage_size()

        self.lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
github WorksApplications / SudachiPy / sudachipy / lattice.py View on Github external
def connect_node(self, r_node: LatticeNode) -> None:
        begin = r_node.begin
        r_node.total_cost = float('inf')
        for l_node in self.end_lists[begin]:
            if not l_node.is_connected_to_bos:
                continue
            # right_id and left_id look reversed, but it works ...
            connect_cost = self.grammar.get_connect_cost(l_node.right_id, r_node.left_id)
            if connect_cost == Grammar.INHIBITED_CONNECTION:
                continue
            cost = l_node.total_cost + connect_cost
            if cost < r_node.total_cost:
                r_node.total_cost = cost
                r_node.best_previous_node = l_node

        r_node.is_connected_to_bos = r_node.best_previous_node is not None
        r_node.total_cost += r_node.cost
github megagonlabs / ginza / sudachipy / command_line.py View on Github external
"""
    import mmap
    buffers = []
    if filename is None:
        raise AttributeError("system dictionary is not specified")
    with open(filename, 'r+b') as system_dic:
        bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
    buffers.append(bytes_)

    offset = 0
    header = DictionaryHeader.from_bytes(bytes_, offset)
    if header.version != DictionaryVersion.SYSTEM_DICT_VERSION:
        raise Exception("invalid system dictionary")
    offset += header.storage_size()

    grammar = dictionarylib.grammar.Grammar(bytes_, offset)
    offset += grammar.get_storage_size()

    lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
    return buffers, header, grammar, lexicon
github WorksApplications / SudachiPy / sudachipy / plugin / connect_cost / editconnectioncost.py View on Github external
def inhibit_connection(grammar: Grammar, left: int, right: int) -> None:
        """ Inhibit a connection.

        Args:
            grammar: grammar of system dictionary
            left: right-ID of left node
            right: left-ID of right node

        """
        grammar.set_connect_cost(left, right, Grammar.INHIBITED_CONNECTION)
github WorksApplications / SudachiPy / sudachipy / dictionarylib / binarydictionary.py View on Github external
def _read_dictionary(filename, access=mmap.ACCESS_READ):
        with open(filename, 'r+b') as system_dic:
            bytes_ = mmap.mmap(system_dic.fileno(), 0, access=access)
        offset = 0
        header = DictionaryHeader.from_bytes(bytes_, offset)
        offset += header.storage_size()
        if header.version not in [SYSTEM_DICT_VERSION, USER_DICT_VERSION_1, USER_DICT_VERSION_2]:
            raise Exception('invalid dictionary version')
        grammar = None
        if header.version != USER_DICT_VERSION_1:
            grammar = Grammar(bytes_, offset)
            offset += grammar.get_storage_size()

        lexicon = DoubleArrayLexicon(bytes_, offset)
        return bytes_, grammar, header, lexicon