Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-06 20:23
import hanlp
from hanlp.common.component import KerasComponent
tagger: KerasComponent = hanlp.load(hanlp.pretrained.pos.CTB5_POS_RNN)
print(tagger('商品 和 服务'.split()))
tagger.serve()
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-04 21:05
import hanlp
tokenizer = hanlp.utils.rules.tokenize_english
tagger = hanlp.load(hanlp.pretrained.pos.PTB_POS_RNN_FASTTEXT_EN)
syntactic_parser = hanlp.load(hanlp.pretrained.dep.PTB_BIAFFINE_DEP_EN)
semantic_parser = hanlp.load(hanlp.pretrained.sdp.SEMEVAL15_PAS_BIAFFINE_EN)
pipeline = hanlp.pipeline() \
.append(hanlp.utils.rules.split_sentence, output_key='sentences') \
.append(tokenizer, output_key='tokens') \
.append(tagger, output_key='part_of_speech_tags') \
.append(syntactic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='syntactic_dependencies', conll=False) \
.append(semantic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='semantic_dependencies', conll=False)
print(pipeline)
text = '''Jobs and Wozniak co-founded Apple in 1976 to sell Wozniak's Apple I personal computer.
Together the duo gained fame and wealth a year later with the Apple II.
'''
doc = pipeline(text)
print(doc)
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-10 21:27
import hanlp
syntactic_parser = hanlp.load(hanlp.pretrained.dep.CTB7_BIAFFINE_DEP_ZH)
syntactic_parser.evaluate(hanlp.datasets.parsing.ctb.CTB7_DEP_TEST)
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-04 21:05
import hanlp
tokenizer = hanlp.utils.rules.tokenize_english
tagger = hanlp.load(hanlp.pretrained.pos.PTB_POS_RNN_FASTTEXT_EN)
syntactic_parser = hanlp.load(hanlp.pretrained.dep.PTB_BIAFFINE_DEP_EN)
semantic_parser = hanlp.load(hanlp.pretrained.sdp.SEMEVAL15_PAS_BIAFFINE_EN)
pipeline = hanlp.pipeline() \
.append(hanlp.utils.rules.split_sentence, output_key='sentences') \
.append(tokenizer, output_key='tokens') \
.append(tagger, output_key='part_of_speech_tags') \
.append(syntactic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='syntactic_dependencies', conll=False) \
.append(semantic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='semantic_dependencies', conll=False)
print(pipeline)
text = '''Jobs and Wozniak co-founded Apple in 1976 to sell Wozniak's Apple I personal computer.
Together the duo gained fame and wealth a year later with the Apple II.
'''
doc = pipeline(text)
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-30 19:52
import hanlp
recognizer = hanlp.load(hanlp.pretrained.ner.MSRA_NER_BERT_BASE_ZH)
print(recognizer.predict([list('上海华安工业(集团)公司董事长谭旭光和秘书张晚霞来到美国纽约现代艺术博物馆参观。'),
list('萨哈夫说,伊拉克将同联合国销毁伊拉克大规模杀伤性武器特别委员会继续保持合作。')]))
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-28 21:25
import hanlp
syntactic_parser = hanlp.load(hanlp.pretrained.dep.CTB7_BIAFFINE_DEP_ZH)
sent = [('蜡烛', 'NN'), ('两', 'CD'), ('头', 'NN'), ('烧', 'VV')]
tree = syntactic_parser(sent)
print(tree)
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-03 22:50
import hanlp
recognizer = hanlp.load(hanlp.pretrained.ner.CONLL03_NER_BERT_BASE_UNCASED_EN)
print(recognizer(["President", "Obama", "is", "speaking", "at", "the", "White", "House."]))
identifier = save_dir
load_path = save_dir
save_dir = get_resource(save_dir)
if save_dir.endswith('.json'):
meta_filename = os.path.basename(save_dir)
save_dir = os.path.dirname(save_dir)
metapath = os.path.join(save_dir, meta_filename)
if not os.path.isfile(metapath):
tips = ''
if save_dir.isupper():
from difflib import SequenceMatcher
similar_keys = sorted(pretrained.ALL.keys(),
key=lambda k: SequenceMatcher(None, save_dir, metapath).ratio(),
reverse=True)[:5]
tips = f'Check its spelling based on the available keys:\n' + \
f'{sorted(pretrained.ALL.keys())}\n' + \
f'Tips: it might be one of {similar_keys}'
raise FileNotFoundError(f'The identifier {save_dir} resolves to a non-exist meta file {metapath}. {tips}')
meta: dict = load_json(metapath)
cls = meta.get('class_path', None)
assert cls, f'{meta_filename} doesn\'t contain class_path field'
try:
obj: Component = object_from_class_path(cls, **kwargs)
if hasattr(obj, 'load'):
if transform_only:
# noinspection PyUnresolvedReferences
obj.load_transform(save_dir)
else:
if load_kwargs is None:
load_kwargs = {}
if os.path.isfile(os.path.join(save_dir, 'config.json')):
obj.load(save_dir, **load_kwargs)
def load(save_dir: str, meta_filename='meta.json', transform_only=False, load_kwargs=None,
**kwargs) -> hanlp.common.component.Component:
"""
Load saved component from identifier.
:param save_dir: The identifier to the saved component.
:param meta_filename: The meta file of that saved component, which stores the class_path and version.
:param transform_only: Whether to load transform only.
:param load_kwargs: The arguments passed to `load`
:param kwargs: Additional arguments parsed to the `from_meta` method.
:return: A pretrained component.
"""
save_dir = hanlp.pretrained.ALL.get(save_dir, save_dir)
from hanlp.utils.component_util import load_from_meta_file
return load_from_meta_file(save_dir, meta_filename, transform_only=transform_only, load_kwargs=load_kwargs, **kwargs)
def load_from_meta_file(save_dir: str, meta_filename='meta.json', transform_only=False, load_kwargs=None,
**kwargs) -> Component:
identifier = save_dir
load_path = save_dir
save_dir = get_resource(save_dir)
if save_dir.endswith('.json'):
meta_filename = os.path.basename(save_dir)
save_dir = os.path.dirname(save_dir)
metapath = os.path.join(save_dir, meta_filename)
if not os.path.isfile(metapath):
tips = ''
if save_dir.isupper():
from difflib import SequenceMatcher
similar_keys = sorted(pretrained.ALL.keys(),
key=lambda k: SequenceMatcher(None, save_dir, metapath).ratio(),
reverse=True)[:5]
tips = f'Check its spelling based on the available keys:\n' + \
f'{sorted(pretrained.ALL.keys())}\n' + \
f'Tips: it might be one of {similar_keys}'
raise FileNotFoundError(f'The identifier {save_dir} resolves to a non-exist meta file {metapath}. {tips}')
meta: dict = load_json(metapath)
cls = meta.get('class_path', None)
assert cls, f'{meta_filename} doesn\'t contain class_path field'
try:
obj: Component = object_from_class_path(cls, **kwargs)
if hasattr(obj, 'load'):
if transform_only:
# noinspection PyUnresolvedReferences
obj.load_transform(save_dir)
else: