How to use the paddlehub.common.downloader.default_downloader.download_file_and_uncompress function in paddlehub

To help you get started, we’ve selected a few paddlehub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / PaddleHub / paddlehub / dataset / base_cv_dataset.py View on Github external
def _download_dataset(self, dataset_path, url):
        if not os.path.exists(dataset_path):
            result, tips, dataset_path = default_downloader.download_file_and_uncompress(
                url=url,
                save_path=hub.common.dir.DATA_HOME,
                print_progress=True,
                replace=True)
            if not result:
                print(tips)
                exit()
        return dataset_path
github PaddlePaddle / PaddleHub / paddlehub / dataset / inews.py View on Github external
def __init__(self):
        self.dataset_dir = os.path.join(DATA_HOME, "inews")
        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_test_examples()
        self._load_dev_examples()
github PaddlePaddle / PaddleHub / paddlehub / dataset / nlpcc_dbqa.py View on Github external
def __init__(self):
        self.dataset_dir = os.path.join(DATA_HOME, "nlpcc-dbqa")
        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_test_examples()
        self._load_dev_examples()
github PaddlePaddle / PaddleHub / paddlehub / dataset / chnsenticorp.py View on Github external
def __init__(self):
        self.dataset_dir = os.path.join(DATA_HOME, "chnsenticorp")
        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_test_examples()
        self._load_dev_examples()
github PaddlePaddle / PaddleHub / paddlehub / finetune / evaluator / cmrc2018_evaluate.py View on Github external
def mixed_segmentation(in_str, rm_punc=False):
    nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data")
    tokenizers_path = os.path.join(nltk_path, "tokenizers")
    punkt_path = os.path.join(tokenizers_path, "punkt")
    if not os.path.exists(punkt_path):
        default_downloader.download_file_and_uncompress(
            url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True)
    nltk.data.path.append(nltk_path)

    in_str = str(in_str).lower().strip()
    segs_out = []
    temp_str = ""
    sp_char = [
        '-', ':', '_', '*', '^', '/', '\\', '~', '`', '+', '=', ',', '。', ':',
        '?', '!', '“', '”', ';', '’', '《', '》', '……', '·', '、', '「', '」', '(',
        ')', '-', '~', '『', '』'
    ]
    for char in in_str:
        if rm_punc and char in sp_char:
            continue
        if re.search(r'[\u4e00-\u9fa5]', char) or char in sp_char:
            if temp_str != "":
github PaddlePaddle / PaddleHub / paddlehub / dataset / msra_ner.py View on Github external
def __init__(self):
        self.dataset_dir = os.path.join(DATA_HOME, "msra_ner")
        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_test_examples()
        self._load_dev_examples()
github PaddlePaddle / PaddleHub / paddlehub / dataset / squad.py View on Github external
def __init__(self, version_2_with_negative=False):
        self.dataset_dir = os.path.join(DATA_HOME, "squad_data")
        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))
        self.version_2_with_negative = version_2_with_negative
        self._load_train_examples(version_2_with_negative, if_has_answer=True)
        self._load_dev_examples(version_2_with_negative, if_has_answer=True)