How to use the hanlp.pretrained function in hanlp

To help you get started, we’ve selected a few hanlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hankcs / HanLP / tests / demo / zh / demo_serving.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-06 20:23
import hanlp
from hanlp.common.component import KerasComponent

tagger: KerasComponent = hanlp.load(hanlp.pretrained.pos.CTB5_POS_RNN)
print(tagger('商品 和 服务'.split()))
tagger.serve()
github hankcs / HanLP / tests / demo / en / demo_pipeline.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-04 21:05
import hanlp

tokenizer = hanlp.utils.rules.tokenize_english
tagger = hanlp.load(hanlp.pretrained.pos.PTB_POS_RNN_FASTTEXT_EN)
syntactic_parser = hanlp.load(hanlp.pretrained.dep.PTB_BIAFFINE_DEP_EN)
semantic_parser = hanlp.load(hanlp.pretrained.sdp.SEMEVAL15_PAS_BIAFFINE_EN)

pipeline = hanlp.pipeline() \
    .append(hanlp.utils.rules.split_sentence, output_key='sentences') \
    .append(tokenizer, output_key='tokens') \
    .append(tagger, output_key='part_of_speech_tags') \
    .append(syntactic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='syntactic_dependencies', conll=False) \
    .append(semantic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='semantic_dependencies', conll=False)
print(pipeline)

text = '''Jobs and Wozniak co-founded Apple in 1976 to sell Wozniak's Apple I personal computer.
Together the duo gained fame and wealth a year later with the Apple II.
'''

doc = pipeline(text)
print(doc)
github hankcs / HanLP / tests / script / evaluate_dep.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-10 21:27
import hanlp

syntactic_parser = hanlp.load(hanlp.pretrained.dep.CTB7_BIAFFINE_DEP_ZH)
syntactic_parser.evaluate(hanlp.datasets.parsing.ctb.CTB7_DEP_TEST)
github hankcs / HanLP / tests / demo / en / demo_pipeline.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-04 21:05
import hanlp

tokenizer = hanlp.utils.rules.tokenize_english
tagger = hanlp.load(hanlp.pretrained.pos.PTB_POS_RNN_FASTTEXT_EN)
syntactic_parser = hanlp.load(hanlp.pretrained.dep.PTB_BIAFFINE_DEP_EN)
semantic_parser = hanlp.load(hanlp.pretrained.sdp.SEMEVAL15_PAS_BIAFFINE_EN)

pipeline = hanlp.pipeline() \
    .append(hanlp.utils.rules.split_sentence, output_key='sentences') \
    .append(tokenizer, output_key='tokens') \
    .append(tagger, output_key='part_of_speech_tags') \
    .append(syntactic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='syntactic_dependencies', conll=False) \
    .append(semantic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='semantic_dependencies', conll=False)
print(pipeline)

text = '''Jobs and Wozniak co-founded Apple in 1976 to sell Wozniak's Apple I personal computer.
Together the duo gained fame and wealth a year later with the Apple II.
'''

doc = pipeline(text)
github hankcs / HanLP / tests / demo / zh / demo_ner.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-30 19:52
import hanlp

recognizer = hanlp.load(hanlp.pretrained.ner.MSRA_NER_BERT_BASE_ZH)
print(recognizer.predict([list('上海华安工业(集团)公司董事长谭旭光和秘书张晚霞来到美国纽约现代艺术博物馆参观。'),
                          list('萨哈夫说,伊拉克将同联合国销毁伊拉克大规模杀伤性武器特别委员会继续保持合作。')]))
github hankcs / HanLP / tests / demo / zh / demo_dep.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-28 21:25
import hanlp

syntactic_parser = hanlp.load(hanlp.pretrained.dep.CTB7_BIAFFINE_DEP_ZH)
sent = [('蜡烛', 'NN'), ('两', 'CD'), ('头', 'NN'), ('烧', 'VV')]
tree = syntactic_parser(sent)
print(tree)
github hankcs / HanLP / tests / demo / en / demo_ner.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-03 22:50
import hanlp

recognizer = hanlp.load(hanlp.pretrained.ner.CONLL03_NER_BERT_BASE_UNCASED_EN)
print(recognizer(["President", "Obama", "is", "speaking", "at", "the", "White", "House."]))
github hankcs / HanLP / hanlp / utils / component_util.py View on Github external
identifier = save_dir
    load_path = save_dir
    save_dir = get_resource(save_dir)
    if save_dir.endswith('.json'):
        meta_filename = os.path.basename(save_dir)
        save_dir = os.path.dirname(save_dir)
    metapath = os.path.join(save_dir, meta_filename)
    if not os.path.isfile(metapath):
        tips = ''
        if save_dir.isupper():
            from difflib import SequenceMatcher
            similar_keys = sorted(pretrained.ALL.keys(),
                                  key=lambda k: SequenceMatcher(None, save_dir, metapath).ratio(),
                                  reverse=True)[:5]
            tips = f'Check its spelling based on the available keys:\n' + \
                   f'{sorted(pretrained.ALL.keys())}\n' + \
                   f'Tips: it might be one of {similar_keys}'
        raise FileNotFoundError(f'The identifier {save_dir} resolves to a non-exist meta file {metapath}. {tips}')
    meta: dict = load_json(metapath)
    cls = meta.get('class_path', None)
    assert cls, f'{meta_filename} doesn\'t contain class_path field'
    try:
        obj: Component = object_from_class_path(cls, **kwargs)
        if hasattr(obj, 'load'):
            if transform_only:
                # noinspection PyUnresolvedReferences
                obj.load_transform(save_dir)
            else:
                if load_kwargs is None:
                    load_kwargs = {}
                if os.path.isfile(os.path.join(save_dir, 'config.json')):
                    obj.load(save_dir, **load_kwargs)
github hankcs / HanLP / hanlp / __init__.py View on Github external
def load(save_dir: str, meta_filename='meta.json', transform_only=False, load_kwargs=None,
         **kwargs) -> hanlp.common.component.Component:
    """
    Load saved component from identifier.
    :param save_dir: The identifier to the saved component.
    :param meta_filename: The meta file of that saved component, which stores the class_path and version.
    :param transform_only: Whether to load transform only.
    :param load_kwargs: The arguments passed to `load`
    :param kwargs: Additional arguments parsed to the `from_meta` method.
    :return: A pretrained component.
    """
    save_dir = hanlp.pretrained.ALL.get(save_dir, save_dir)
    from hanlp.utils.component_util import load_from_meta_file
    return load_from_meta_file(save_dir, meta_filename, transform_only=transform_only, load_kwargs=load_kwargs, **kwargs)
github hankcs / HanLP / hanlp / utils / component_util.py View on Github external
def load_from_meta_file(save_dir: str, meta_filename='meta.json', transform_only=False, load_kwargs=None,
                        **kwargs) -> Component:
    identifier = save_dir
    load_path = save_dir
    save_dir = get_resource(save_dir)
    if save_dir.endswith('.json'):
        meta_filename = os.path.basename(save_dir)
        save_dir = os.path.dirname(save_dir)
    metapath = os.path.join(save_dir, meta_filename)
    if not os.path.isfile(metapath):
        tips = ''
        if save_dir.isupper():
            from difflib import SequenceMatcher
            similar_keys = sorted(pretrained.ALL.keys(),
                                  key=lambda k: SequenceMatcher(None, save_dir, metapath).ratio(),
                                  reverse=True)[:5]
            tips = f'Check its spelling based on the available keys:\n' + \
                   f'{sorted(pretrained.ALL.keys())}\n' + \
                   f'Tips: it might be one of {similar_keys}'
        raise FileNotFoundError(f'The identifier {save_dir} resolves to a non-exist meta file {metapath}. {tips}')
    meta: dict = load_json(metapath)
    cls = meta.get('class_path', None)
    assert cls, f'{meta_filename} doesn\'t contain class_path field'
    try:
        obj: Component = object_from_class_path(cls, **kwargs)
        if hasattr(obj, 'load'):
            if transform_only:
                # noinspection PyUnresolvedReferences
                obj.load_transform(save_dir)
            else: