How to use the parlai.core.build_data.DownloadableFile function in parlai

To help you get started, we’ve selected a few parlai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github facebookresearch / ParlAI / parlai / tasks / triviaqa / build.py View on Github external
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'http://nlp.cs.washington.edu/triviaqa/data/triviaqa-rc.tar.gz',
        'triviaqa-rc.tar.gz',
        'ef94fac6db0541e5bb5b27020d067a8b13b1c1ffc52717e836832e02aaed87b9',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'TriviaQA')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
github facebookresearch / ParlAI / parlai / tasks / hotpotqa / build.py View on Github external
import os
import json

VERSION = '1'
TRAIN_FILENAME = 'hotpot_train_v{}.1.json'.format(VERSION)
DEV_DISTRACTOR_FILENAME = 'hotpot_dev_distractor_v{}.json'.format(VERSION)
DEV_FULLWIKI_FILENAME = 'hotpot_dev_fullwiki_v{}.json'.format(VERSION)

RESOURCES = [
    DownloadableFile(
        'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_train_v1.1.json',
        'hotpot_train_v1.1.json',
        '26650cf50234ef5fb2e664ed70bbecdfd87815e6bffc257e068efea5cf7cd316',
        zipped=False,
    ),
    DownloadableFile(
        'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_distractor_v1.json',
        'hotpot_dev_distractor_v1.json',
        '4e9ecb5c8d3b719f624d66b60f8d56bf227f03914f5f0753d6fa1b359d7104ea',
        zipped=False,
    ),
    DownloadableFile(
        'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json',
        'hotpot_dev_fullwiki_v1.json',
        '2f1f3e594a3066a3084cc57950ca2713c24712adaad03af6ccce18d1846d5618',
        zipped=False,
    ),
]

OUTPUT_FORMAT = 'text:{context_question}\t' 'labels:{answer}'
github facebookresearch / ParlAI / parlai / tasks / squad2 / build.py View on Github external
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json',
        'train-v2.0.json',
        '68dcfbb971bd3e96d5b46c7177b16c1a4e7d4bdef19fb204502738552dede002',
        zipped=False,
    ),
    DownloadableFile(
        'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json',
        'dev-v2.0.json',
        '80a5225e94905956a6446d296ca1093975c4d3b3260f1d6c8f68bc2ab77182d8',
        zipped=False,
    ),
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'SQuAD2')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
github facebookresearch / ParlAI / parlai / tasks / dialogue_safety / build.py View on Github external
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import os
from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/dialogue_safety/single_turn_safety.json',
        'single_turn_safety.json',
        'f3a46265aa639cfa4b55d2be4dca4be1c596acb5e8f94d7e0041e1a54cedd4cd',
        zipped=False,
    ),
    DownloadableFile(
        'http://parl.ai/downloads/dialogue_safety/multi_turn_safety.json',
        'multi_turn_safety.json',
        'e3e577f456d63d51eb7b5f98ffd251ad695476f186d422fa8de1a177742fa7b6',
        zipped=False,
    ),
]


def build(datapath):
    version = 'v1.0'
    dpath = os.path.join(datapath, 'dialogue_safety')

    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
github facebookresearch / ParlAI / parlai / tasks / vqa_v1 / build.py View on Github external
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Train_mscoco.zip',
        'Questions_Train_mscoco.zip',
        'c3b2bb6155528eeae95e0a914af394d6f0d98f8f2b51012c44b27778e1a96707',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Val_mscoco.zip',
        'Questions_Val_mscoco.zip',
        'e8839be5de2d711989bf0adc82e6717d1ce307d27c9b1dfb0abf413b79a5d4d0',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Test_mscoco.zip',
        'Questions_Test_mscoco.zip',
        'bd080c297fc863bf8258caa4864d3b5afab29373375a6637f8546338291e28c0',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Val_mscoco.zip',
        'Annotations_Val_mscoco.zip',
        '29377c35186d90aeab3e61bdad890f51215d1f88b700bd22ef19004d73bf284f',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Train_mscoco.zip',
github facebookresearch / ParlAI / parlai / tasks / convai2_wild_evaluation / build.py View on Github external
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import os
import json
from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data

RESOURCES = [
    DownloadableFile(
        'http://lnsigo.mipt.ru/export/datasets/convai/convai2_wild_evaluation_0.2.tgz',
        'convai2_wild_evaluation_0.2.tgz',
        'd40ff70275c8d1939a8081707edcf4e71072097d18b9998100a1099d23e29801',
    )
]


def make_parlai_format(data: list, dpath: str):
    train_p = 0.6
    valid_p = 0.2
    test_p = 1 - (train_p + valid_p)

    assert train_p > 0
    assert valid_p > 0
    assert test_p > 0
github facebookresearch / ParlAI / parlai / tasks / coco_caption / build_2017.py View on Github external
DownloadableFile(
        'http://parl.ai/downloads/COCO-IMG/train2017.zip',
        'train2017.zip',
        '69a8bb58ea5f8f99d24875f21416de2e9ded3178e903f1f7603e283b9e06d929',
    ),
    DownloadableFile(
        'http://parl.ai/downloads/COCO-IMG/val2017.zip',
        'val2017.zip',
        '4f7e2ccb2866ec5041993c9cf2a952bbed69647b115d0f74da7ce8f4bef82f05',
    ),
    DownloadableFile(
        'http://parl.ai/downloads/COCO-IMG/test2017.zip',
        'test2017.zip',
        'c7908c3c9f94ba2f3340ebbeec58c25db6be8774f18d68c2f15d0e369d95baba',
    ),
    DownloadableFile(
        'http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
        'annotations_trainval2017.zip',
        '113a836d90195ee1f884e704da6304dfaaecff1f023f49b6ca93c4aaae470268',
    ),
    DownloadableFile(
        'http://images.cocodataset.org/annotations/image_info_test2017.zip',
        'image_info_test2017.zip',
        'e52f412dd7195ac8f98d782b44c6dd30ea10241e9f42521f67610fbe055a74f8',
    ),
]


def buildImage(opt):
    dpath = os.path.join(opt['datapath'], 'COCO-IMG-2017')
    version = '1'
github facebookresearch / ParlAI / parlai / tasks / cnn_dm / build.py View on Github external
'ad69010002210b7c406718248ee66e65868b9f6820f163aa966369878d14147e',
        from_google=True,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_training_urls.txt',
        'cnn_wayback_training_urls.txt',
        'e074c2245c475b00c455cefb911e0066b27fe17085dd0c773101e10d3088583b',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_validation_urls.txt',
        'cnn_wayback_validation_urls.txt',
        'b1ae81ff058ca640da3ae2b3c98fefca3adfea358736b6e29efc2ec1cbef5b5c',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_test_urls.txt',
        'cnn_wayback_test_urls.txt',
        'a0796c3c7812e3c9fcb1a65faa9aee7bb6f8a3869e953c7f61b401790c0a6f33',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_training_urls.txt',
        'dailymail_wayback_training_urls.txt',
        '3913d6a90c29a81196128346d81c28d6c7f7e91777d886e8417163ce83b2a04a',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_validation_urls.txt',
        'dailymail_wayback_validation_urls.txt',
        '2377b8f809bd07b143bbbd9e60594d10e7b8a211c8a5672181ea6000bbf548a2',
        zipped=False,
github facebookresearch / ParlAI / parlai / tasks / cnn_dm / build.py View on Github external
'e074c2245c475b00c455cefb911e0066b27fe17085dd0c773101e10d3088583b',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_validation_urls.txt',
        'cnn_wayback_validation_urls.txt',
        'b1ae81ff058ca640da3ae2b3c98fefca3adfea358736b6e29efc2ec1cbef5b5c',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_test_urls.txt',
        'cnn_wayback_test_urls.txt',
        'a0796c3c7812e3c9fcb1a65faa9aee7bb6f8a3869e953c7f61b401790c0a6f33',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_training_urls.txt',
        'dailymail_wayback_training_urls.txt',
        '3913d6a90c29a81196128346d81c28d6c7f7e91777d886e8417163ce83b2a04a',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_validation_urls.txt',
        'dailymail_wayback_validation_urls.txt',
        '2377b8f809bd07b143bbbd9e60594d10e7b8a211c8a5672181ea6000bbf548a2',
        zipped=False,
    ),
    DownloadableFile(
        'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_test_urls.txt',
        'dailymail_wayback_test_urls.txt',
        '554d18fc79a06a16902662d926cb7cc981ea36a3f82d5ae1426e25bf62f65b87',
        zipped=False,
github facebookresearch / ParlAI / parlai / tasks / vqa_v1 / build.py View on Github external
DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Val_mscoco.zip',
        'Questions_Val_mscoco.zip',
        'e8839be5de2d711989bf0adc82e6717d1ce307d27c9b1dfb0abf413b79a5d4d0',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Test_mscoco.zip',
        'Questions_Test_mscoco.zip',
        'bd080c297fc863bf8258caa4864d3b5afab29373375a6637f8546338291e28c0',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Val_mscoco.zip',
        'Annotations_Val_mscoco.zip',
        '29377c35186d90aeab3e61bdad890f51215d1f88b700bd22ef19004d73bf284f',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Train_mscoco.zip',
        'Annotations_Train_mscoco.zip',
        'a5f5f97c162a4ad44896be08bac6deaa258aa3fec281afcc84fe85ae44cb1ebc',
    ),
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'VQA-v1')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)