How to use the parlai.core.build_data.make_dir function in parlai

To help you get started, we’ve selected a few parlai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github facebookresearch / ParlAI / parlai / core / image_featurizers.py View on Github external
task = opt['image_load_task']
            prepath = os.path.join(opt['datapath'], task)
            imagefn = ''.join(zipname.strip('.zip').split('/')[-2:]) + path.name
        if mode == 'raw':
            # raw just returns RGB values
            return Image.open(path).convert('RGB')
        elif mode == 'ascii':
            # convert images to ascii ¯\_(ツ)_/¯
            return self._img_to_ascii(path)
        else:
            # otherwise, looks for preprocessed version under 'mode' directory
            if not is_zip:
                prepath, imagefn = os.path.split(path)
            dpath = os.path.join(prepath, mode)
            if not os.path.exists(dpath):
                build_data.make_dir(dpath)
            imagefn = imagefn.split('.')[0]
            new_path = os.path.join(prepath, mode, imagefn)
            if not os.path.isfile(new_path):
                return self.extract(Image.open(path).convert('RGB'), new_path)
            else:
                return self.torch.load(new_path)
github facebookresearch / ParlAI / parlai / tasks / twitter / build.py View on Github external
def build(opt):
    version = 'v1.1'
    dpath = os.path.join(opt['datapath'], 'Twitter')

    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname1 = "twitter_en_big.txt.gz.partaa"
        fname2 = "twitter_en_big.txt.gz.partab"
        url = 'https://github.com/Marsan-Ma/chat_corpus/raw/master/'
        build_data.download(url + fname1, dpath, fname1)
        build_data.download(url + fname2, dpath, fname2)

        file1 = os.path.join(dpath, fname1)
        file2 = os.path.join(dpath, fname2)
        file3 = "twitter_en_big.txt.gz"
        outzipfile = os.path.join(dpath, file3)
        build_data.cat(file1, file2, outzipfile)

        import gzip
github facebookresearch / ParlAI / parlai / tasks / clevr / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'CLEVR')
    version = 'v1.0'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        # An older version exists, so remove these outdated files.
        if build_data.built(dpath):
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname = 'CLEVR_v1.0.zip'
        url = 'https://dl.fbaipublicfiles.com/clevr/'

        build_data.download(url + fname, dpath, fname)
        build_data.untar(dpath, fname)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github deepmipt / kpi2017 / deeppavlov / agents / coreference / agents.py View on Github external
"""
    Downloads required embeddings and chars dictionary for agent.
    Builds a folders tree.

    Args:
        opt: parameters from command line

    Returns:
        nothing
    """
    # get path to data directory and create folders tree
    dpath = join(opt['model_file'])
    # define languages
    language = opt['language']
    dpath = join(dpath, language, 'agent')
    build_data.make_dir(dpath)
    
    build_data.make_dir(join(dpath, 'embeddings'))
    build_data.make_dir(join(dpath, 'vocab'))
    build_data.make_dir(join(dpath, 'logs', opt['name']))
    
    if not isfile(join(dpath, 'embeddings', 'embeddings_lenta_100.vec')):     
        print('[Download the word embeddings]...')
        try:
            embed_url = os.environ['EMBEDDINGS_URL'] + 'embeddings_lenta_100.vec'
            build_data.download(embed_url, join(dpath, 'embeddings'), 'embeddings_lenta_100.vec')
            print('[End of download the word embeddings]...')
        except RuntimeWarning:
            raise('To use your own embeddings, please, put the file embeddings_lenta_100.vec in the folder '
                  '{0}'.format(join(dpath, 'embeddings')))

    if not isfile(join(dpath, 'embeddings', 'ft_0.8.3_nltk_yalen_sg_300.bin')):
github facebookresearch / ParlAI / parlai / tasks / coqa / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'CoQA')
    version = VERSION

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        with open(os.path.join(dpath, 'coqa-train-v1.0.json')) as f:
            data = json.load(f)['data']
            make_parlai_format(dpath, 'train', data)

        with open(os.path.join(dpath, 'coqa-dev-v1.0.json')) as f:
            data = json.load(f)['data']
            make_parlai_format(dpath, 'valid', data)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / squad / build.py View on Github external
# Download the data.
        for downloadable_file in RESOURCES[:2]:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)

    if 'fulldoc' in opt['task']:
        dpath += '-fulldoc'
        if not build_data.built(dpath, version_string=version):
            print('[building data: ' + dpath + ']')
            if build_data.built(dpath):
                # An older version exists, so remove these outdated files.
                build_data.remove_dir(dpath)
            build_data.make_dir(dpath)

            # Download the data.
            RESOURCES[2].download_file(dpath)

            # Mark the data as built.
            build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / convai2_wild_evaluation / build.py View on Github external
def build(opt):
    version = '0.2'
    dpath = os.path.join(opt['datapath'], 'ConvAI2_wild_evaluation')

    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')

        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        output_fname = 'convai2_wild_evaluation.json'
        output_path = os.path.join(dpath, output_fname)

        with open(output_path, 'r') as data_f:
            data = json.load(data_f)

        make_parlai_format(data, dpath)
        os.remove(output_path)

        # Mark the data as built.
        build_data.mark_done(dpath, version)
github facebookresearch / ParlAI / parlai / tasks / mnist_qa / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'mnist')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / dealnodeal / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'negotiation')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')

        # make a clean directory if needed
        if build_data.built(dpath):
            # an older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data from github
        fname = 'negotiation.zip'
        url = (
            'https://github.com/facebookresearch/end-to-end-negotiator/'
            'archive/master.zip'
        )
        print('[downloading data from: ' + url + ']')
        build_data.download(url, dpath, fname)
        build_data.untar(dpath, fname)

        # Mark as done
        build_data.mark_done(dpath, version_string=version)
github deepmipt / kpi2017 / deeppavlov / tasks / insults / build.py View on Github external
nothing
    """
    # get path to data directory
    dpath = os.path.join(opt['datapath'], 'insults')
    # define version if any
    version = '1.0'

    # check if data had been previously built
    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')

        # make a clean directory if needed
        if build_data.built(dpath):
            # an older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        raw_path = os.path.abspath(opt['raw_dataset_path'] or ".")
        train_file = os.path.join(raw_path, 'train.csv')
        valid_file = os.path.join(raw_path, 'test_with_solutions.csv')
        test_file = os.path.join(raw_path, 'impermium_verification_labels.csv')
        if not os.path.isfile(train_file) or not os.path.isfile(valid_file) or not os.path.isfile(test_file):
            ds_path = os.environ.get('DATASETS_URL')
            file_name = 'insults.tar.gz'
            if not ds_path:
                raise RuntimeError('Please download dataset files from'
                                   ' https://www.kaggle.com/c/detecting-insults-in-social-commentary/data'
                                   ' and set path to their directory in raw-dataset-path parameter')
            print('Trying to download a insults dataset from the repository')
            url = urllib.parse.urljoin(ds_path, file_name)
            print(repr(url))
            build_data.download(url, dpath, file_name)