How to use the joblib.parallel_backend function in joblib

To help you get started, we’ve selected a few joblib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shevisjohnson / gpt-2_bot / reddit_bot.py View on Github external
ctr = 0
        for subr in all_arr:
            self.sel.register(AugComStream(subr, ctr, skip_existing=True))
            ctr += 1

        
        all_arr = [self.reddit().subreddit(subr) for subr in subrs]
        n_threads = len(all_arr)
        self.log("\nDeploying "+str(n_threads)+" threads")
        def deploy_stream(self, subr):
            with parallel_backend('threading', n_jobs=32):
                Parallel()(delayed(self.do_work)(comment) for comment in subr.stream.comments(skip_existing=True))

        with parallel_backend('threading', n_jobs=n_threads):
            Parallel()(delayed(deploy_stream)(self, subr) for subr in tqdm.tqdm(all_arr))
            
        self.log("\nMAIN THREAD DONE!!!\n\n============================================================\n")
github csc-training / geocomputing / machineLearning / shallows / EXTRA_classificationParallel.py View on Github external
logging.warning('In case your have firewall limitations and you do not have right to modify its settings,'
                        ' you must set the parameter named "processes=False",'
                        ' which is located in Client object defined above, '
              'this will change your parallelization mode from processor-based to thread-based.\n')

        time.sleep(1)

    except Exception as ex:
        print(ex)
    else:
        # Consider using thread-based configuration described below if you are using an old machine to run the code.
        # Initializing a thread-based local client for distributed computations.
        # client = Client(processes=False, threads_per_worker=5,
        #        n_workers=1, memory_limit='3GB') # meaning 1 process with 4 threads is used.
        with parallel_backend('dask'):
            # extracting training points and their labels from a CSV file.
            xys, classes = extract_training_samples(sampleCoordinates)
            # converting world coordinates to image coordinate system.
            cols, rows = calculate_image_offsets(images[1], xys)
            # stack the bands on top of each other like a cake layers.
            data = stack_bands(images)
            # Converting offset types from float to int in order to be eligible for slicing operation.
            cols = list(map(int, cols))
            rows = list(map(int, rows))
            # Sample the satellite data at the extracted coordinates from the csv.
            sample = data[rows, cols, :]
            x_train, x_test, y_train, y_test = train_test_split(sample, classes, test_size=0.2, random_state=63)
            # Initialize a random forest instance by wrapping it in a meta-estimator
            model = ParallelPostFit(estimator=RandomForestClassifier(n_estimators=trees))
            # initialize a decision tree classifier.
            #model = tree.DecisionTreeClassifier(max_depth=5)
github MateLabs / AutoOut / app / outlier_treatment / main.py View on Github external
y_predicted = list(map(lambda x: 1 if x == -1 else 0, y_predicted))

        elif space['model'] == "IsolationForest":
            model = IsolationForest(**params)
            with parallel_backend('threading'):
                y_predicted = model.fit_predict(X)
            y_predicted = list(map(lambda x: 1 if x == -1 else 0, y_predicted))

        elif space['model'] == "OneClassSVM":
            model = OneClassSVM(**params)
            y_predicted = model.fit_predict(X)
            y_predicted = list(map(lambda x: 1 if x == -1 else 0, y_predicted))

        elif space['model'] == "LocalOutlierFactor":
            model = LocalOutlierFactor(**params)
            with parallel_backend('threading'):
                y_predicted = model.fit_predict(X)
            y_predicted = list(map(lambda x: 1 if x == -1 else 0, y_predicted))

        elif space['model'] == "zscore":
            model = ZScore(threshold=params['threshold'])
            y_predicted = model.fit_predict(X)

    except Exception as e:
        print("Error:", e)
        y_predicted = [0] * X.shape[0]
        error['detect_' + str(space)] = e

    if isinstance(y_predicted, list):
        y_predicted = np.array(y_predicted)

    time_taken = time.time() - start_time
github joblib / joblib / examples / serialization_and_wrappers.py View on Github external
# If you are on a UNIX system, it is possible to fallback to the old
# ``multiprocessing`` backend, which can pickle interactively defined functions
# with the default pickle module, which is faster for such large objects.
#

if sys.platform != 'win32':
    if IS_RUN_WITH_SPHINX_GALLERY:
        # When this example is run with sphinx gallery, it breaks the pickling
        # capacity for multiprocessing backend so we have to modify the way we
        # define our functions. This has nothing to do with the example.
        from utils import func_async
    else:
        def func_async(i, *args):
            return 2 * i

    with parallel_backend('multiprocessing'):
        t_start = time.time()
        Parallel(n_jobs=2)(
            delayed(func_async)(21, large_list) for _ in range(1))
        print("With multiprocessing backend and pickle serialization: {:.3f}s"
              .format(time.time() - t_start))


###############################################################################
# However, using ``fork`` to start new processes can cause violation of the
# POSIX specification and can have bad interaction with compiled extensions
# that use ``openmp``. Also, it is not possible to start processes with
# ``fork`` on windows where only ``spawn`` is available. The ``loky`` backend
# has been developped to mitigate these issues.
#
# To have fast pickling with ``loky``, it is possible to rely on ``pickle`` to
# serialize all communications between the main process and the workers with
github iaroslav-ai / scikit-optimize-benchmarks / distributed_run.py View on Github external
dask_scheduler = "34.228.190.163:8786"  # example scheduler address

res_name = ''.join([random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(15)]) + ".py2.bin"

def run():
    r = parallel_evaluate(
        solvers=[spearmint_minimize, ],
        task_subset=Sphere_7_ri,  # automatically selects all tasks
        n_reps=1,
        eval_kwargs={'n_calls': 6},
        joblib_kwargs={'n_jobs': 1, 'verbose': 1000})
    # it is a good idea to cache results
    pc.dump(r, open(res_name, 'wb'))

if run_with_dask:
    with parallel_backend('dask.distributed', scheduler_host=dask_scheduler):
        run()
else:
    run()

# load cached results
"""
r = pc.load(open(res_name, 'rb'))
github civisanalytics / civis-python / civis / run_joblib_func.py View on Github external
# graceful nested context managers are ~hard across python versions,
        # this just works...
        if NO_SKLEARN:
            with _joblib_para_backend(_backend):
                result = func()
        else:
            # we are using the nested context managers to set the joblib
            # backend to the requested one in both copes of joblib, the
            # package and the copy shipped by sklearn at
            # `sklearn.externals.joblib`. joblib maintains the current
            # backend as global state in the package and thus there are
            # two backends to set when you have two copies of the package
            # in play.
            with _sklearn_para_backend(_backend):
                with _joblib_para_backend(_backend):
                    result = func()
    except Exception:
        print("Error! Attempting to record exception.")
        # Wrap the exception in joblib's TransportableException
        # so that joblib can properly display the results.
        e_type, e_value, e_tb = sys.exc_info()
        text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
        result = TransportableException(text, e_type)
        raise
    finally:
        # Serialize the result and upload it to the Files API.
        if result is not None:
            # If the function exits without erroring, we may not have a result.
            result_buffer = BytesIO()
            cloudpickle.dump(result, result_buffer, pickle.HIGHEST_PROTOCOL)
            result_buffer.seek(0)
github PrincetonUniversity / PsyNeuLink / Scripts / Debug / predator_prey_opt / run_cost_rate_search.py View on Github external
if __name__ == '__main__':

    client = Client(scheduler_file='scheduler.json')
    #client = Client()  # This is actually the following two commands
    print(client)

    domain = ht.Domain({
        "cost_rate": [-.9, -.1]})

    optimiser = ht.BayesianOptimisation(domain)

    domain = ht.Domain({
                    "cost_rate": set([-.8])
    })

    with joblib.parallel_backend('dask'):
        with joblib.Parallel() as parallel:
            print("Doing the work ... ")
            results = parallel(joblib.delayed(predator_prey)(*domain.sample().as_namedtuple()) for s in range(2))

    print(results)
github joblib / joblib / examples / parallel / distributed_backend_simple.py View on Github external
# Run parallel computation using dask.distributed
###############################################################################

import time
import joblib


def long_running_function(i):
    time.sleep(.1)
    return i


###############################################################################
# The verbose messages below show that the backend is indeed the
# dask.distributed one
with joblib.parallel_backend('dask'):
    joblib.Parallel(verbose=100)(
        joblib.delayed(long_running_function)(i)
        for i in range(10))