How to use the pathos.multiprocessing.ProcessPool function in pathos

To help you get started, we’ve selected a few pathos examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ClimbsRocks / auto_ml / auto_ml / predictor.py View on Github external
category_length = relevant_X.shape[0]

            result = {
                'trained_category_model': category_trained_final_model
                , 'category': category
                , 'len_relevant_X': category_length
            }
            return result


        if os.environ.get('is_test_suite', False) == 'True':
            # If this is the test_suite, do not run things in parallel
            results = list(map(lambda x: train_one_categorical_model(x[0], x[1], x[2]), categories_and_data))
        else:

            pool = pathos.multiprocessing.ProcessPool()

            # Since we may have already closed the pool, try to restart it
            try:
                pool.restart()
            except AssertionError as e:
                pass

            try:
                results = list(pool.map(lambda x: train_one_categorical_model(x[0], x[1], x[2]), categories_and_data))
            except RuntimeError:
                # Deep Learning models require a ton of recursion. I've tried to work around it, but sometimes we just need to brute force the solution here
                original_recursion_limit = sys.getrecursionlimit()
                sys.setrecursionlimit(10000)
                results = list(pool.map(lambda x: train_one_categorical_model(x[0], x[1], x[2]), categories_and_data))
                sys.setrecursionlimit(original_recursion_limit)
github stanford-futuredata / optimus-maximus / benchmark / benchmark_fexipro.py View on Github external
output_dir = args.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    run_args = []
    numa_queue = get_numa_queue()

    for (model_dir, (num_factors, num_users, num_items, _, scale),
         _) in TO_RUN:
        input_dir = os.path.join(MODEL_DIR_BASE, model_dir)
        base_name = model_dir.replace('/', '-')
        for K in TOP_K:
            run_args.append((numa_queue, num_factors, num_users, num_items, K,
                             scale, input_dir, base_name, output_dir, runner))

    pool = multiprocessing.ProcessPool(
        NUM_NUMA_NODES)  # Only run 4 jobs at once, since we have 4 NUMA nodes
    pool.map(run, run_args)
github ClimbsRocks / auto_ml / auto_ml / utils_ensembling.py View on Github external
# Don't bother parallelizing if this is a single dictionary
        if X.shape[0] == 1:
            predictions_from_all_estimators = map(lambda predictor: get_predictions_for_one_estimator(predictor, X), self.ensemble_predictors)

        else:

            # Pathos doesn't like datasets beyond a certain size. So fall back on single, non-parallel predictions instead.
            # try:
            if os.environ.get('is_test_suite', False) == 'True':
                predictions_from_all_estimators = map(lambda predictor: get_predictions_for_one_estimator(predictor, X), self.ensemble_predictors)

            else:
                # Open a new multiprocessing pool
                pool = pathos.multiprocessing.ProcessPool()

                # Since we may have already closed the pool, try to restart it
                try:
                    pool.restart()
                except AssertionError as e:
                    pass
                predictions_from_all_estimators = pool.map(lambda predictor: get_predictions_for_one_estimator(predictor, X), self.ensemble_predictors)

                # Once we have gotten all we need from the pool, close it so it's not taking up unnecessary memory
                pool.close()
                try:
                    pool.join()
                except AssertionError:
                    pass

        predictions_from_all_estimators = list(predictions_from_all_estimators)
github hawkjo / freebarcodes / nucleaseq / FreeDivSphere.py View on Github external
def parallel_num_iterator(self, num_proc=None):
        nerr_tups = list(self._nsub_ndel_nins_iterator())
        def dna_nums_given_nerr_tup(nerr_tup):
            nsub, ndel, nins = nerr_tup
            return [seqtools.dna2num(seq)
                    for seq in self._freediv_subsphere_given_counts(nsub, ndel, nins)]

        pl = ProcessPool(num_proc)
        results = pl.map(dna_nums_given_nerr_tup, nerr_tups)
        for num in itertools.chain(*results):
            yield num
        pl._clear()
        del pl
github snarkai / Hub / waymo_upload / waymo_upload.py View on Github external
def upload_all():
    path = '/home/edward/waymo/training/'
    filenames = os.listdir(path)
    filenames.sort()
    pool = ProcessPool(16)
    data = pool.map(frames_tfrecord, map(lambda f: path + f, filenames))
    frames = sum(data, 0)
    print('Frames in files: {}, Total: {}'.format(data, frames))

    start_frame = []
    for i in range(0, frames):
        start_frame.append(sum(data[:i],0))
    dataset_type = 'training'
    version = 'v2'
    storage = S3(bucket='waymo-dataset-upload')
    labels_arr = hub.array(shape=(frames, 2, 6, 30, 7), chunk_size=(100, 2, 6, 30, 7), storage=storage, name='edward/{}-labels:{}'.format(dataset_type, version), backend='s3', dtype='float64')
    images_arr = hub.array(compression='jpeg', shape=(frames, 6, 1280, 1920, 3), storage=storage, name='edward/{}-camera-images:{}'.format(dataset_type, version), backend='s3', dtype='uint8', chunk_size=(1, 6, 1280, 1920, 3))    

    def upload_record(i):
        upload_tfrecord(dataset_type, path + filenames[i], version, start_frame[i])
github swansonk14 / p_tqdm / p_tqdm / p_tqdm.py View on Github external
# Extract num_cpus
    num_cpus = kwargs.pop('num_cpus', None)

    # Determine num_cpus
    if num_cpus is None:
        num_cpus = cpu_count()
    elif type(num_cpus) == float:
        num_cpus = int(round(num_cpus * cpu_count()))

    # Determine length of tqdm (equal to length of shortest iterable)
    length = min(len(iterable) for iterable in iterables if isinstance(iterable, Sized))

    # Create parallel generator
    map_type = 'imap' if ordered else 'uimap'
    pool = Pool(num_cpus)
    map_func = getattr(pool, map_type)

    for item in tqdm(map_func(function, *iterables), total=length, **kwargs):
        yield item

    pool.clear()
github christophmark / bayesloop / bayesloop / core.py View on Github external
if not silent:
                print('+ Started new fit.')
                print('    + {} analyses to run.'.format(len(self.hyperGridValues)))

            # check if multiprocessing is available
            if nJobs > 1:
                try:
                    from pathos.multiprocessing import ProcessPool
                except ImportError:
                    raise ImportError('No module named pathos.multiprocessing. This module represents an optional '
                                      'dependency of bayesloop and is therefore not installed alongside bayesloop.')

                # prepare parallel execution
                if not silent:
                    print('    + Creating {} processes.'.format(nJobs))
                pool = ProcessPool(nodes=nJobs)

                # use parallelFit method to create copies of this HyperStudy instance with only partial
                # hyper-grid values
                subStudies = pool.map(self._parallelFit,
                                      range(nJobs),
                                      [nJobs]*nJobs,
                                      [forwardOnly]*nJobs,
                                      [evidenceOnly]*nJobs,
                                      [silent]*nJobs)

                # prevent memory pile-up in main process
                pool.close()
                pool.join()
                pool.terminate()
                pool.restart()
github snarkai / Hub / waymo_upload / waymo_upload_2.py View on Github external
def main():
    path = '/home/edward/waymo/training/'
    dataset_type = 'training'
    version = 'v2'
    filenames = os.listdir(path)
    filenames.sort()
    pool = ProcessPool(16)
    frame_count_arr = pool.map(frames_tfrecord, map(lambda f: path + f, filenames))
    frames = sum(frame_count_arr, 0)

    str_lasers_range_image = 'edward/{}-lasers-range-image:{}'.format(dataset_type, version)
    str_lasers_range_image_first = 'edward/{}-lasers-range-image-first:{}'.format(dataset_type, version)
    str_lasers_camera_proj = 'edward/{}-lasers-camera-proj:{}'.format(dataset_type, version)
    str_lasers_camera_proj_first = 'edward/{}-lasers-camera-proj-first:{}'.format(dataset_type, version)
    storage = S3(bucket='waymo-dataset-upload')
    hub.array(shape=(frames, 4, 2, 200, 600, 4), dtype='float32', backend='s3', storage=storage, name=str_lasers_range_image, chunk_size=(1, 4, 2, 200, 600, 4))
    hub.array(shape=(frames, 4, 2, 200, 600, 6), dtype='int32', backend='s3', storage=storage, name=str_lasers_camera_proj, chunk_size=(1, 4, 2, 200, 600, 6))
    hub.array(shape=(frames, 2, 64, 2650, 4), dtype='float32', backend='s3', storage=storage, name=str_lasers_range_image_first, chunk_size=(1, 2, 64, 2650, 4))
    hub.array(shape=(frames, 2, 64, 2650, 6), dtype='int32', backend='s3', storage=storage, name=str_lasers_camera_proj_first, chunk_size=(1, 2, 64, 2650, 6))

    start_frame_arr = []
    for i in range(0, len(filenames)):
        start_frame_arr.append(sum(frame_count_arr[:i], 0))