How to use the hanlp.common.constant.HANLP_URL function in hanlp

To help you get started, we’ve selected a few hanlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hankcs / HanLP / hanlp / utils / io_util.py View on Github external
def path_from_url(url, save_dir=hanlp_home(), prefix=HANLP_URL, append_location=True):
    if not save_dir:
        save_dir = hanlp_home()
    domain, relative_path = parse_url_path(url)
    if append_location:
        if not url.startswith(prefix):
            save_dir = os.path.join(save_dir, 'thirdparty', domain)
        else:
            # remove the relative path in prefix
            middle = prefix.split(domain)[-1].lstrip('/')
            if relative_path.startswith(middle):
                relative_path = relative_path[len(middle):]
        realpath = os.path.join(save_dir, relative_path)
    else:
        realpath = os.path.join(save_dir, os.path.basename(relative_path))
    return realpath
github hankcs / HanLP / hanlp / pretrained / sdp.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-31 23:54
from hanlp.common.constant import HANLP_URL

SEMEVAL16_NEWS_BIAFFINE_ZH = HANLP_URL + 'sdp/semeval16-news-biaffine_20191231_235407.zip'
SEMEVAL16_TEXT_BIAFFINE_ZH = HANLP_URL + 'sdp/semeval16-text-biaffine_20200101_002257.zip'

SEMEVAL15_PAS_BIAFFINE_EN = HANLP_URL + 'sdp/semeval15_biaffine_pas_20200103_152405.zip'
SEMEVAL15_PSD_BIAFFINE_EN = HANLP_URL + 'sdp/semeval15_biaffine_psd_20200106_123009.zip'
SEMEVAL15_DM_BIAFFINE_EN = HANLP_URL + 'sdp/semeval15_biaffine_dm_20200106_122808.zip'

ALL = {}
github hankcs / HanLP / hanlp / utils / io_util.py View on Github external
def download(url, save_path=None, save_dir=hanlp_home(), prefix=HANLP_URL, append_location=True):
    if not save_path:
        save_path = path_from_url(url, save_dir, prefix, append_location)
    if os.path.isfile(save_path):
        eprint('Using local {}, ignore {}'.format(save_path, url))
        return save_path
    else:
        makedirs(parent_dir(save_path))
        eprint('Downloading {} to {}'.format(url, save_path))
        tmp_path = '{}.downloading'.format(save_path)
        remove_file(tmp_path)
        try:
            def reporthook(count, block_size, total_size):
                global start_time, progress_size
                if count == 0:
                    start_time = time.time()
                    progress_size = 0
github hankcs / HanLP / hanlp / datasets / parsing / ctb.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-28 18:44
from hanlp.common.constant import HANLP_URL

CTB_HOME = HANLP_URL + 'embeddings/SUDA-LA-CIP_20200109_021624.zip#'

CTB5_DEP_HOME = CTB_HOME + 'BPNN/data/ctb5/'

CTB5_DEP_TRAIN = CTB5_DEP_HOME + 'train.conll'
CTB5_DEP_VALID = CTB5_DEP_HOME + 'dev.conll'
CTB5_DEP_TEST = CTB5_DEP_HOME + 'test.conll'

CTB7_HOME = CTB_HOME + 'BPNN/data/ctb7/'

CTB7_DEP_TRAIN = CTB7_HOME + 'train.conll'
CTB7_DEP_VALID = CTB7_HOME + 'dev.conll'
CTB7_DEP_TEST = CTB7_HOME + 'test.conll'

CIP_W2V_100_CN = CTB_HOME + 'BPNN/data/embed.txt'
github hankcs / HanLP / hanlp / pretrained / dep.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-29 02:55
from hanlp.common.constant import HANLP_URL

CTB5_BIAFFINE_DEP_ZH = HANLP_URL + 'dep/biaffine_ctb5_20191229_025833.zip'
CTB7_BIAFFINE_DEP_ZH = HANLP_URL + 'dep/biaffine_ctb7_20200109_022431.zip'

PTB_BIAFFINE_DEP_EN = HANLP_URL + 'dep/ptb_dep_biaffine_20200101_174624.zip'

ALL = {}
github hankcs / HanLP / hanlp / pretrained / classifiers.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2020-01-01 03:51
from hanlp.common.constant import HANLP_URL

CHNSENTICORP_BERT_BASE_ZH = HANLP_URL + 'classification/chnsenticorp_bert_base_20200104_164655.zip'
SST2_BERT_BASE_EN = HANLP_URL + 'classification/sst2_bert_base_uncased_en_20200210_090240.zip'
SST2_ALBERT_BASE_EN = HANLP_URL + 'classification/sst2_albert_base_20200122_205915.zip'
EMPATHETIC_DIALOGUES_SITUATION_ALBERT_BASE_EN = HANLP_URL + 'classification/empathetic_dialogues_situation_albert_base_20200122_212250.zip'
EMPATHETIC_DIALOGUES_SITUATION_ALBERT_LARGE_EN = HANLP_URL + 'classification/empathetic_dialogues_situation_albert_large_20200123_142724.zip'

ALL = {}
github hankcs / HanLP / hanlp / pretrained / word2vec.py View on Github external
from hanlp.common.constant import HANLP_URL

CONVSEG_W2V_NEWS_TENSITE = HANLP_URL + 'embeddings/convseg_embeddings.zip'
CONVSEG_W2V_NEWS_TENSITE_WORD_PKU = CONVSEG_W2V_NEWS_TENSITE + '#news_tensite.pku.words.w2v50'
CONVSEG_W2V_NEWS_TENSITE_WORD_MSR = CONVSEG_W2V_NEWS_TENSITE + '#news_tensite.msr.words.w2v50'
CONVSEG_W2V_NEWS_TENSITE_CHAR = CONVSEG_W2V_NEWS_TENSITE + '#news_tensite.w2v200'

SEMEVAL16_EMBEDDINGS_CN = HANLP_URL + 'embeddings/semeval16_embeddings.zip'
SEMEVAL16_EMBEDDINGS_300_NEWS_CN = SEMEVAL16_EMBEDDINGS_CN + '#news.fasttext.300.txt'
SEMEVAL16_EMBEDDINGS_300_TEXT_CN = SEMEVAL16_EMBEDDINGS_CN + '#text.fasttext.300.txt'

CTB5_FASTTEXT_300_CN = HANLP_URL + 'embeddings/ctb.fasttext.300.txt.zip'

TENCENT_AI_LAB_EMBEDDING = 'https://ai.tencent.com/ailab/nlp/data/Tencent_AILab_ChineseEmbedding.tar.gz#Tencent_AILab_ChineseEmbedding.txt'

RADICAL_CHAR_EMBEDDING_100 = HANLP_URL + 'embeddings/radical_char_vec_20191229_013849.zip#character.vec.txt'

ALL = {}
github hankcs / HanLP / hanlp / utils / io_util.py View on Github external
def get_resource(path: str, save_dir=None, extract=True, prefix=HANLP_URL, append_location=True):
    """
    Fetch real path for a resource (model, corpus, whatever)
    :param path: the general path (can be a url or a real path)
    :param extract: whether to unzip it if it's a zip file
    :param save_dir:
    :return: the real path to the resource
    """
    anchor: str = None
    compressed = None
    if os.path.isdir(path):
        return path
    elif os.path.isfile(path):
        pass
    elif path.startswith('http:') or path.startswith('https:'):
        url = path
        if '#' in url:
github hankcs / HanLP / hanlp / pretrained / ner.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-30 20:07
from hanlp.common.constant import HANLP_URL

MSRA_NER_BERT_BASE_ZH = HANLP_URL + 'ner/ner_bert_base_msra_20200104_185735.zip'
MSRA_NER_ALBERT_BASE_ZH = HANLP_URL + 'ner/ner_albert_base_zh_msra_20200111_202919.zip'
CONLL03_NER_BERT_BASE_UNCASED_EN = HANLP_URL + 'ner/ner_conll03_bert_base_uncased_en_20200104_194352.zip'

ALL = {}
github hankcs / HanLP / hanlp / pretrained / cws.py View on Github external
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-28 21:12
from hanlp.common.constant import HANLP_URL

SIGHAN2005_PKU_CONVSEG = HANLP_URL + 'cws/sighan2005-pku-convseg_20200110_153722.zip'
SIGHAN2005_MSR_CONVSEG = HANLP_URL + 'cws/convseg-msr-nocrf-noembed_20200110_153524.zip'
# SIGHAN2005_MSR_BERT_BASE = HANLP_URL + 'cws/cws_bert_base_msra_20191230_194627.zip'
CTB6_CONVSEG = HANLP_URL + 'cws/ctb6_convseg_nowe_nocrf_20200110_004046.zip'
# CTB6_BERT_BASE = HANLP_URL + 'cws/cws_bert_base_ctb6_20191230_185536.zip'
PKU_NAME_MERGED_SIX_MONTHS_CONVSEG = HANLP_URL + 'cws/pku98_6m_conv_ngram_20200110_134736.zip'

# Will be filled up during runtime
ALL = {}