How to use the parlai.core.build_data.mark_done function in parlai

To help you get started, we’ve selected a few parlai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github facebookresearch / ParlAI / parlai / tasks / dialogue_qe / build.py View on Github external
fname = 'data_' + version + '.tar.gz'
        url = 'https://raw.githubusercontent.com/deepmipt/turing-data/master/' + fname
        build_data.download(url, data_path, fname)
        build_data.untar(data_path, fname)

        os.rename(
            os.path.join(data_path, 'data_train_' + version + '.json'),
            os.path.join(data_path, 'train.json'),
        )
        os.rename(
            os.path.join(data_path, 'data_test_' + version + '.json'),
            os.path.join(data_path, 'test.json'),
        )

        build_data.mark_done(data_path, version_string=version)
github deepmipt / kpi2017 / deeppavlov / tasks / insults / build.py View on Github external
print('Preprocessing test')
        test_data['Comment'] = data_preprocessing(test_data['Comment'])

        print('Writing input files for fasttext')
        write_input_fasttext_cls(train_data, os.path.join(dpath, 'train'), 'train')
        write_input_fasttext_cls(test_data, os.path.join(dpath, 'test'), 'test')

        write_input_fasttext_emb(train_data, os.path.join(dpath, 'train'), 'train')
        write_input_fasttext_emb(test_data, os.path.join(dpath, 'test'), 'test')

        print('Writing input normalized input files')
        train_data.to_csv(os.path.join(dpath, 'train.csv'), index=False)
        test_data.to_csv(os.path.join(dpath, 'test.csv'), index=False)

        # mark the data as built
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / projects / controllable_dialogue / tasks / build.py View on Github external
build_data.download(URL_ROOT + fname_data, dpath, fname_data)
        build_data.untar(dpath, fname_data)

        # next download the wordstats files
        fname_wordstats = 'wordstats_v1.tar.gz'
        build_data.download(URL_ROOT + fname_wordstats, dpath, fname_wordstats)
        build_data.untar(dpath, fname_wordstats)

        # next download the evaluation logs
        fname_evallogs = 'evaluationlogs_v1.tar.gz'
        build_data.download(URL_ROOT + fname_evallogs, dpath, fname_evallogs)
        build_data.untar(dpath, fname_evallogs)

        print("Data has been placed in " + dpath)

        build_data.mark_done(dpath, version)
github facebookresearch / ParlAI / parlai / tasks / personality_captions / download_images.py View on Github external
dts = ['train', 'val', 'test']
    if task == 'image_chat':
        dts[1] = 'valid'
    for dt in dts:
        with open(os.path.join(dpath, '{}.json'.format(dt))) as f:
            data = json.load(f)
            hashes += [d['image_hash'] for d in data]
    os.makedirs(image_path, exist_ok=True)

    print('[downloading images to {}]'.format(image_path))
    for _, (p_hash) in enumerate(tqdm.tqdm(hashes, unit='img')):
        image_url = '{}/{}/{}/{}.jpg'.format(
            image_prefix, p_hash[:3], p_hash[3:6], p_hash
        )
        download(image_url, image_path, '{}.jpg'.format(p_hash))
    build_data.mark_done(image_path, version)
github facebookresearch / ParlAI / parlai / tasks / flickr30k / build.py View on Github external
dpath = os.path.join(opt['datapath'], 'Flickr30k')
    version = '1.0'

    if not build_data.built(dpath, version_string=version):
        print('[building image data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / projects / memnn_feedback / tasks / dbll_babi / build.py View on Github external
if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname = 'dbll.tgz'
        url = 'https://s3.amazonaws.com/fair-data/parlai/dbll/' + fname
        build_data.download(url, dpath, fname)
        build_data.untar(dpath, fname)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / copa / build.py View on Github external
version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')

        if build_data.built(dpath):
            # an older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # mark the data as built
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / ubuntu / build.py View on Github external
dpath = os.path.join(opt['datapath'], 'Ubuntu')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / ms_marco / build.py View on Github external
build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        create_fb_format(dpath, "train", os.path.join(dpath, 'train.gz'))
        # os.remove(os.path.join(dpath, 'train.gz'))
        create_fb_format(dpath, "valid", os.path.join(dpath, 'valid.gz'))
        # os.remove(os.path.join(dpath, 'valid.gz'))
        create_fb_format(dpath, "test", os.path.join(dpath, 'test.gz'))
        # os.remove(os.path.join(dpath, 'test.gz'))

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / self_feeding / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'self_feeding')
    fname = 'self_feeding_v031.tar.gz'
    version = '3.1'
    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)
        url = 'http://parl.ai/downloads/self_feeding/' + fname
        build_data.download(url, dpath, fname)
        build_data.untar(dpath, fname)
        build_data.mark_done(dpath, version)