Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-02 19:41
import hanlp
text = """\
Don't go gentle into that good night.
"""
print(hanlp.utils.rules.tokenize_english(text))
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-31 03:24
import hanlp
tokenizer = hanlp.load('CTB6_CONVSEG')
tagger = hanlp.load('CTB5_POS_RNN_FASTTEXT_ZH')
syntactic_parser = hanlp.load('CTB7_BIAFFINE_DEP_ZH')
semantic_parser = hanlp.load('SEMEVAL16_TEXT_BIAFFINE_ZH')
pipeline = hanlp.pipeline() \
.append(hanlp.utils.rules.split_sentence, output_key='sentences') \
.append(tokenizer, output_key='tokens') \
.append(tagger, output_key='part_of_speech_tags') \
.append(syntactic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='syntactic_dependencies', conll=False) \
.append(semantic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='semantic_dependencies', conll=False)
print(pipeline)
text = '''HanLP是一系列模型与算法组成的自然语言处理工具包,目标是普及自然语言处理在生产环境中的应用。
HanLP具备功能完善、性能高效、架构清晰、语料时新、可自定义的特点。
内部算法经过工业界和学术界考验,配套书籍《自然语言处理入门》已经出版。
'''
doc = pipeline(text)
print(doc)
# By default the doc is json serializable, it holds true if your pipes output json serializable object too.
# print(json.dumps(doc, ensure_ascii=False, indent=2))
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-04 21:05
import hanlp
tokenizer = hanlp.utils.rules.tokenize_english
tagger = hanlp.load(hanlp.pretrained.pos.PTB_POS_RNN_FASTTEXT_EN)
syntactic_parser = hanlp.load(hanlp.pretrained.dep.PTB_BIAFFINE_DEP_EN)
semantic_parser = hanlp.load(hanlp.pretrained.sdp.SEMEVAL15_PAS_BIAFFINE_EN)
pipeline = hanlp.pipeline() \
.append(hanlp.utils.rules.split_sentence, output_key='sentences') \
.append(tokenizer, output_key='tokens') \
.append(tagger, output_key='part_of_speech_tags') \
.append(syntactic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='syntactic_dependencies', conll=False) \
.append(semantic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='semantic_dependencies', conll=False)
print(pipeline)
text = '''Jobs and Wozniak co-founded Apple in 1976 to sell Wozniak's Apple I personal computer.
Together the duo gained fame and wealth a year later with the Apple II.
'''
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-04 21:05
import hanlp
tokenizer = hanlp.utils.rules.tokenize_english
tagger = hanlp.load(hanlp.pretrained.pos.PTB_POS_RNN_FASTTEXT_EN)
syntactic_parser = hanlp.load(hanlp.pretrained.dep.PTB_BIAFFINE_DEP_EN)
semantic_parser = hanlp.load(hanlp.pretrained.sdp.SEMEVAL15_PAS_BIAFFINE_EN)
pipeline = hanlp.pipeline() \
.append(hanlp.utils.rules.split_sentence, output_key='sentences') \
.append(tokenizer, output_key='tokens') \
.append(tagger, output_key='part_of_speech_tags') \
.append(syntactic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='syntactic_dependencies', conll=False) \
.append(semantic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='semantic_dependencies', conll=False)
print(pipeline)
text = '''Jobs and Wozniak co-founded Apple in 1976 to sell Wozniak's Apple I personal computer.
Together the duo gained fame and wealth a year later with the Apple II.
'''
doc = pipeline(text)
print(doc)
# You can save the config to disk for deploying or sharing.
pipeline.save('en.json')
# Then load it smoothly.
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-31 03:24
import hanlp
tokenizer = hanlp.load('PKU_NAME_MERGED_SIX_MONTHS_CONVSEG')
tagger = hanlp.load('CTB5_POS_RNN_FASTTEXT_ZH')
syntactic_parser = hanlp.load('CTB7_BIAFFINE_DEP_ZH')
semantic_parser = hanlp.load('SEMEVAL16_TEXT_BIAFFINE_ZH')
pipeline = hanlp.pipeline() \
.append(hanlp.utils.rules.split_sentence, output_key='sentences') \
.append(tokenizer, output_key='tokens') \
.append(tagger, output_key='part_of_speech_tags') \
.append(syntactic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='syntactic_dependencies') \
.append(semantic_parser, input_key=('tokens', 'part_of_speech_tags'), output_key='semantic_dependencies')
print(pipeline)
text = '''HanLP是一系列模型与算法组成的自然语言处理工具包,目标是普及自然语言处理在生产环境中的应用。
HanLP具备功能完善、性能高效、架构清晰、语料时新、可自定义的特点。
内部算法经过工业界和学术界考验,配套书籍《自然语言处理入门》已经出版。
'''
doc = pipeline(text)
print(doc)
# You can save the config to disk for deploying or sharing.
pipeline.save('zh.json')
def load(self, save_dir: str, logger=hanlp.utils.log_util.logger, **kwargs):
self.meta['load_path'] = save_dir
save_dir = get_resource(save_dir)
self.load_config(save_dir)
self.load_vocabs(save_dir)
self.build(**merge_dict(self.config, training=False, logger=logger, **kwargs, overwrite=True, inplace=True))
self.load_weights(save_dir, **kwargs)
self.load_meta(save_dir)