Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if sub_est is None:
nones = dict.fromkeys(ids, None)
new_fits.update(nones)
if is_transform:
if none_passthrough:
new_Xs.update(zip(ids, get(ids, Xs)))
else:
new_Xs.update(nones)
else:
# Extract the proper subset of Xs, ys
sub_Xs = get(ids, Xs)
sub_ys = get(ids, ys)
# Only subset the parameters/tokens if necessary
if sub_fields:
sub_tokens = list(pluck(sub_inds, get(ids, tokens)))
sub_params = list(pluck(sub_inds, get(ids, params)))
else:
sub_tokens = sub_params = None
if is_transform:
sub_fits, sub_Xs = do_fit_transform(
dsk,
next_token,
sub_est,
cv,
sub_fields,
sub_tokens,
sub_params,
sub_Xs,
sub_ys,
sub_fit_params,
n_splits,
fit_params_lk,
field_to_index,
step_name,
False,
True,
)
fit_steps.append(fits)
tr_Xs.append(out_Xs)
# Rebuild the FeatureUnions
step_names = [n for n, _ in est.transformer_list]
if "transformer_weights" in field_to_index:
index = field_to_index["transformer_weights"]
weight_lk = {}
weight_tokens = list(pluck(index, tokens))
for i, tok in enumerate(weight_tokens):
if tok not in weight_lk:
weights = params[i][index]
if weights is MISSING:
weights = est.transformer_weights
lk = weights or {}
weight_list = [lk.get(n) for n in step_names]
weight_lk[tok] = (weights, weight_list)
weights = get(weight_tokens, weight_lk)
else:
lk = est.transformer_weights or {}
weight_list = [lk.get(n) for n in step_names]
weight_tokens = repeat(None)
weights = repeat((est.transformer_weights, weight_list))
out = []
def _into_iter_mongodb(l, coll, columns=None, schema=None):
""" Into helper function
Return both a lazy sequence of tuples and a list of column names
"""
seq = coll.find()
if not columns and schema:
columns = schema[0].names
elif not columns:
item = next(seq)
seq = concat([[item], seq])
columns = sorted(item.keys())
columns.remove('_id')
return columns, pluck(columns, seq)
def normalize_params(params):
"""Take a list of dictionaries, and tokenize/normalize."""
# Collect a set of all fields
fields = set()
for p in params:
fields.update(p)
fields = sorted(fields)
params2 = list(pluck(fields, params, MISSING))
# Non-basic types (including MISSING) are unique to their id
tokens = [
tuple(x if isinstance(x, (int, float, str)) else id(x) for x in p)
for p in params2
]
return fields, tokens, params2
if sub_est is None:
nones = dict.fromkeys(ids, None)
new_fits.update(nones)
if is_transform:
if none_passthrough:
new_Xs.update(zip(ids, get(ids, Xs)))
else:
new_Xs.update(nones)
else:
# Extract the proper subset of Xs, ys
sub_Xs = get(ids, Xs)
sub_ys = get(ids, ys)
# Only subset the parameters/tokens if necessary
if sub_fields:
sub_tokens = list(pluck(sub_inds, get(ids, tokens)))
sub_params = list(pluck(sub_inds, get(ids, params)))
else:
sub_tokens = sub_params = None
if is_transform:
sub_fits, sub_Xs = do_fit_transform(
dsk,
next_token,
sub_est,
cv,
sub_fields,
sub_tokens,
sub_params,
sub_Xs,
sub_ys,
sub_fit_params,
n_splits,
)
keys = [cv_results]
if refit:
if multimetric:
scorer = refit
else:
scorer = "score"
best_params = "best-params-" + main_token
dsk[best_params] = (get_best_params, candidate_params_name, cv_results, scorer)
best_estimator = "best-estimator-" + main_token
if fit_params:
fit_params = (
dict,
(zip, list(fit_params.keys()), list(pluck(1, fit_params.values()))),
)
dsk[best_estimator] = (
fit_best,
clone(estimator),
best_params,
X_name,
y_name,
fit_params,
)
keys.append(best_estimator)
return dsk, keys, n_splits
def normalize_params(params):
"""Take a list of dictionaries, and tokenize/normalize."""
# Collect a set of all fields
fields = set()
for p in params:
fields.update(p)
fields = sorted(fields)
params2 = list(pluck(fields, params, MISSING))
# Non-basic types (including MISSING) are unique to their id
tokens = [
tuple(x if isinstance(x, (int, float, str)) else id(x) for x in p)
for p in params2
]
return fields, tokens, params2