Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
ctr = 0
for subr in all_arr:
self.sel.register(AugComStream(subr, ctr, skip_existing=True))
ctr += 1
all_arr = [self.reddit().subreddit(subr) for subr in subrs]
n_threads = len(all_arr)
self.log("\nDeploying "+str(n_threads)+" threads")
def deploy_stream(self, subr):
with parallel_backend('threading', n_jobs=32):
Parallel()(delayed(self.do_work)(comment) for comment in subr.stream.comments(skip_existing=True))
with parallel_backend('threading', n_jobs=n_threads):
Parallel()(delayed(deploy_stream)(self, subr) for subr in tqdm.tqdm(all_arr))
self.log("\nMAIN THREAD DONE!!!\n\n============================================================\n")
logging.warning('In case your have firewall limitations and you do not have right to modify its settings,'
' you must set the parameter named "processes=False",'
' which is located in Client object defined above, '
'this will change your parallelization mode from processor-based to thread-based.\n')
time.sleep(1)
except Exception as ex:
print(ex)
else:
# Consider using thread-based configuration described below if you are using an old machine to run the code.
# Initializing a thread-based local client for distributed computations.
# client = Client(processes=False, threads_per_worker=5,
# n_workers=1, memory_limit='3GB') # meaning 1 process with 4 threads is used.
with parallel_backend('dask'):
# extracting training points and their labels from a CSV file.
xys, classes = extract_training_samples(sampleCoordinates)
# converting world coordinates to image coordinate system.
cols, rows = calculate_image_offsets(images[1], xys)
# stack the bands on top of each other like a cake layers.
data = stack_bands(images)
# Converting offset types from float to int in order to be eligible for slicing operation.
cols = list(map(int, cols))
rows = list(map(int, rows))
# Sample the satellite data at the extracted coordinates from the csv.
sample = data[rows, cols, :]
x_train, x_test, y_train, y_test = train_test_split(sample, classes, test_size=0.2, random_state=63)
# Initialize a random forest instance by wrapping it in a meta-estimator
model = ParallelPostFit(estimator=RandomForestClassifier(n_estimators=trees))
# initialize a decision tree classifier.
#model = tree.DecisionTreeClassifier(max_depth=5)
y_predicted = list(map(lambda x: 1 if x == -1 else 0, y_predicted))
elif space['model'] == "IsolationForest":
model = IsolationForest(**params)
with parallel_backend('threading'):
y_predicted = model.fit_predict(X)
y_predicted = list(map(lambda x: 1 if x == -1 else 0, y_predicted))
elif space['model'] == "OneClassSVM":
model = OneClassSVM(**params)
y_predicted = model.fit_predict(X)
y_predicted = list(map(lambda x: 1 if x == -1 else 0, y_predicted))
elif space['model'] == "LocalOutlierFactor":
model = LocalOutlierFactor(**params)
with parallel_backend('threading'):
y_predicted = model.fit_predict(X)
y_predicted = list(map(lambda x: 1 if x == -1 else 0, y_predicted))
elif space['model'] == "zscore":
model = ZScore(threshold=params['threshold'])
y_predicted = model.fit_predict(X)
except Exception as e:
print("Error:", e)
y_predicted = [0] * X.shape[0]
error['detect_' + str(space)] = e
if isinstance(y_predicted, list):
y_predicted = np.array(y_predicted)
time_taken = time.time() - start_time
# If you are on a UNIX system, it is possible to fallback to the old
# ``multiprocessing`` backend, which can pickle interactively defined functions
# with the default pickle module, which is faster for such large objects.
#
if sys.platform != 'win32':
if IS_RUN_WITH_SPHINX_GALLERY:
# When this example is run with sphinx gallery, it breaks the pickling
# capacity for multiprocessing backend so we have to modify the way we
# define our functions. This has nothing to do with the example.
from utils import func_async
else:
def func_async(i, *args):
return 2 * i
with parallel_backend('multiprocessing'):
t_start = time.time()
Parallel(n_jobs=2)(
delayed(func_async)(21, large_list) for _ in range(1))
print("With multiprocessing backend and pickle serialization: {:.3f}s"
.format(time.time() - t_start))
###############################################################################
# However, using ``fork`` to start new processes can cause violation of the
# POSIX specification and can have bad interaction with compiled extensions
# that use ``openmp``. Also, it is not possible to start processes with
# ``fork`` on windows where only ``spawn`` is available. The ``loky`` backend
# has been developped to mitigate these issues.
#
# To have fast pickling with ``loky``, it is possible to rely on ``pickle`` to
# serialize all communications between the main process and the workers with
dask_scheduler = "34.228.190.163:8786" # example scheduler address
res_name = ''.join([random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(15)]) + ".py2.bin"
def run():
r = parallel_evaluate(
solvers=[spearmint_minimize, ],
task_subset=Sphere_7_ri, # automatically selects all tasks
n_reps=1,
eval_kwargs={'n_calls': 6},
joblib_kwargs={'n_jobs': 1, 'verbose': 1000})
# it is a good idea to cache results
pc.dump(r, open(res_name, 'wb'))
if run_with_dask:
with parallel_backend('dask.distributed', scheduler_host=dask_scheduler):
run()
else:
run()
# load cached results
"""
r = pc.load(open(res_name, 'rb'))
# graceful nested context managers are ~hard across python versions,
# this just works...
if NO_SKLEARN:
with _joblib_para_backend(_backend):
result = func()
else:
# we are using the nested context managers to set the joblib
# backend to the requested one in both copes of joblib, the
# package and the copy shipped by sklearn at
# `sklearn.externals.joblib`. joblib maintains the current
# backend as global state in the package and thus there are
# two backends to set when you have two copies of the package
# in play.
with _sklearn_para_backend(_backend):
with _joblib_para_backend(_backend):
result = func()
except Exception:
print("Error! Attempting to record exception.")
# Wrap the exception in joblib's TransportableException
# so that joblib can properly display the results.
e_type, e_value, e_tb = sys.exc_info()
text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
result = TransportableException(text, e_type)
raise
finally:
# Serialize the result and upload it to the Files API.
if result is not None:
# If the function exits without erroring, we may not have a result.
result_buffer = BytesIO()
cloudpickle.dump(result, result_buffer, pickle.HIGHEST_PROTOCOL)
result_buffer.seek(0)
if __name__ == '__main__':
client = Client(scheduler_file='scheduler.json')
#client = Client() # This is actually the following two commands
print(client)
domain = ht.Domain({
"cost_rate": [-.9, -.1]})
optimiser = ht.BayesianOptimisation(domain)
domain = ht.Domain({
"cost_rate": set([-.8])
})
with joblib.parallel_backend('dask'):
with joblib.Parallel() as parallel:
print("Doing the work ... ")
results = parallel(joblib.delayed(predator_prey)(*domain.sample().as_namedtuple()) for s in range(2))
print(results)
# Run parallel computation using dask.distributed
###############################################################################
import time
import joblib
def long_running_function(i):
time.sleep(.1)
return i
###############################################################################
# The verbose messages below show that the backend is indeed the
# dask.distributed one
with joblib.parallel_backend('dask'):
joblib.Parallel(verbose=100)(
joblib.delayed(long_running_function)(i)
for i in range(10))