Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
new_Xs = {}
est_index = field_to_index[step_name]
for ids in _group_ids_by_index(est_index, tokens):
# Get the estimator for this subgroup
sub_est = params[ids[0]][est_index]
if sub_est is MISSING:
sub_est = step
# If an estimator is `None`, there's nothing to do
if sub_est is None:
nones = dict.fromkeys(ids, None)
new_fits.update(nones)
if is_transform:
if none_passthrough:
new_Xs.update(zip(ids, get(ids, Xs)))
else:
new_Xs.update(nones)
else:
# Extract the proper subset of Xs, ys
sub_Xs = get(ids, Xs)
sub_ys = get(ids, ys)
# Only subset the parameters/tokens if necessary
if sub_fields:
sub_tokens = list(pluck(sub_inds, get(ids, tokens)))
sub_params = list(pluck(sub_inds, get(ids, params)))
else:
sub_tokens = sub_params = None
if is_transform:
sub_fits, sub_Xs = do_fit_transform(
dsk,
# Rebuild the FeatureUnions
step_names = [n for n, _ in est.transformer_list]
if "transformer_weights" in field_to_index:
index = field_to_index["transformer_weights"]
weight_lk = {}
weight_tokens = list(pluck(index, tokens))
for i, tok in enumerate(weight_tokens):
if tok not in weight_lk:
weights = params[i][index]
if weights is MISSING:
weights = est.transformer_weights
lk = weights or {}
weight_list = [lk.get(n) for n in step_names]
weight_lk[tok] = (weights, weight_list)
weights = get(weight_tokens, weight_lk)
else:
lk = est.transformer_weights or {}
weight_list = [lk.get(n) for n in step_names]
weight_tokens = repeat(None)
weights = repeat((est.transformer_weights, weight_list))
out = []
out_append = out.append
fit_name = "feature-union-" + token
tr_name = "feature-union-concat-" + token
m = 0
seen = {}
for steps, Xs, wt, (w, wl), nsamp in zip(
zip(*fit_steps), zip(*tr_Xs), weight_tokens, weights, n_samples
):
if (steps, wt) in seen:
# If an estimator is `None`, there's nothing to do
if sub_est is None:
nones = dict.fromkeys(ids, None)
new_fits.update(nones)
if is_transform:
if none_passthrough:
new_Xs.update(zip(ids, get(ids, Xs)))
else:
new_Xs.update(nones)
else:
# Extract the proper subset of Xs, ys
sub_Xs = get(ids, Xs)
sub_ys = get(ids, ys)
# Only subset the parameters/tokens if necessary
if sub_fields:
sub_tokens = list(pluck(sub_inds, get(ids, tokens)))
sub_params = list(pluck(sub_inds, get(ids, params)))
else:
sub_tokens = sub_params = None
if is_transform:
sub_fits, sub_Xs = do_fit_transform(
dsk,
next_token,
sub_est,
cv,
sub_fields,
sub_tokens,
sub_params,
sub_Xs,
sub_ys,
sub_fit_params,
# Rebuild the FeatureUnions
step_names = [n for n, _ in est.transformer_list]
if "transformer_weights" in field_to_index:
index = field_to_index["transformer_weights"]
weight_lk = {}
weight_tokens = list(pluck(index, tokens))
for i, tok in enumerate(weight_tokens):
if tok not in weight_lk:
weights = params[i][index]
if weights is MISSING:
weights = est.transformer_weights
lk = weights or {}
weight_list = [lk.get(n) for n in step_names]
weight_lk[tok] = (weights, weight_list)
weights = get(weight_tokens, weight_lk)
else:
lk = est.transformer_weights or {}
weight_list = [lk.get(n) for n in step_names]
weight_tokens = repeat(None)
weights = repeat((est.transformer_weights, weight_list))
out = []
out_append = out.append
fit_name = "feature-union-" + token
tr_name = "feature-union-concat-" + token
m = 0
seen = {}
for steps, Xs, wt, (w, wl), nsamp in zip(
zip(*fit_steps), zip(*tr_Xs), weight_tokens, weights, n_samples
):
if (steps, wt) in seen:
sub_est = params[ids[0]][est_index]
if sub_est is MISSING:
sub_est = step
# If an estimator is `None`, there's nothing to do
if sub_est is None:
nones = dict.fromkeys(ids, None)
new_fits.update(nones)
if is_transform:
if none_passthrough:
new_Xs.update(zip(ids, get(ids, Xs)))
else:
new_Xs.update(nones)
else:
# Extract the proper subset of Xs, ys
sub_Xs = get(ids, Xs)
sub_ys = get(ids, ys)
# Only subset the parameters/tokens if necessary
if sub_fields:
sub_tokens = list(pluck(sub_inds, get(ids, tokens)))
sub_params = list(pluck(sub_inds, get(ids, params)))
else:
sub_tokens = sub_params = None
if is_transform:
sub_fits, sub_Xs = do_fit_transform(
dsk,
next_token,
sub_est,
cv,
sub_fields,
sub_tokens,
def assemble(pair):
a, b = pair
if a is not None:
joined = get(on_left, a)
else:
joined = get(on_right, b)
if a is not None:
left_entries = get(left_self_columns, a)
else:
left_entries = (None,) * (len(t.lhs.fields) - len(on_left))
if b is not None:
right_entries = get(right_self_columns, b)
else:
right_entries = (None,) * (len(t.rhs.fields) - len(on_right))
return joined + left_entries + right_entries
cv,
sub_fields,
sub_tokens,
sub_params,
sub_Xs,
sub_ys,
sub_fit_params,
n_splits,
error_score,
)
new_fits.update(zip(ids, sub_fits))
# Extract lists of transformed Xs and fit steps
all_ids = list(range(len(Xs)))
if is_transform:
Xs = get(all_ids, new_Xs)
fits = get(all_ids, new_fits)
elif step is None:
# Nothing to do
fits = [None] * len(Xs)
if not none_passthrough:
Xs = fits
else:
# Only subset the parameters/tokens if necessary
if sub_fields:
sub_tokens = list(pluck(sub_inds, tokens))
sub_params = list(pluck(sub_inds, params))
else:
sub_tokens = sub_params = None
if is_transform:
fits, Xs = do_fit_transform(
dsk,
def assemble(pair):
a, b = pair
if a is not None:
joined = get(on_left, a)
else:
joined = get(on_right, b)
if a is not None:
left_entries = get(left_self_columns, a)
else:
left_entries = (None,) * (len(t.lhs.fields) - len(on_left))
if b is not None:
right_entries = get(right_self_columns, b)
else:
right_entries = (None,) * (len(t.rhs.fields) - len(on_right))
return joined + left_entries + right_entries
sub_est,
cv,
sub_fields,
sub_tokens,
sub_params,
sub_Xs,
sub_ys,
sub_fit_params,
n_splits,
error_score,
)
new_fits.update(zip(ids, sub_fits))
# Extract lists of transformed Xs and fit steps
all_ids = list(range(len(Xs)))
if is_transform:
Xs = get(all_ids, new_Xs)
fits = get(all_ids, new_fits)
elif step is None:
# Nothing to do
fits = [None] * len(Xs)
if not none_passthrough:
Xs = fits
else:
# Only subset the parameters/tokens if necessary
if sub_fields:
sub_tokens = list(pluck(sub_inds, tokens))
sub_params = list(pluck(sub_inds, params))
else:
sub_tokens = sub_params = None
if is_transform:
fits, Xs = do_fit_transform(