Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Returns
-------
X_new : sparse matrix, shape = (n_samples, n_words)
Document-term matrix.
"""
X = check_array(X)
n_samples, n_timestamps = X.shape
if y is not None:
check_classification_targets(y)
window_size, window_step = self._check_params(n_timestamps)
n_windows = (n_timestamps - window_size + window_step) // window_step
X_windowed = _windowed_view(
X, n_samples, n_timestamps, window_size, window_step
)
X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)
sfa = SymbolicFourierApproximation(
n_coefs=self.word_size, drop_sum=self.drop_sum, anova=self.anova,
norm_mean=self.norm_mean, norm_std=self.norm_std,
n_bins=self.n_bins, strategy=self.strategy, alphabet=self.alphabet
)
if y is None:
y_repeated = None
else:
y_repeated = np.repeat(y, n_windows)
X_sfa = sfa.fit_transform(X_windowed, y_repeated)
X_word = np.asarray([''.join(X_sfa[i])
-------
self : object
"""
X, y = check_X_y(X, y)
n_samples, n_timestamps = X.shape
check_classification_targets(y)
le = LabelEncoder()
y_ind = le.fit_transform(y)
self.classes_ = le.classes_
n_classes = self.classes_.size
window_size, window_step = self._check_params(n_timestamps)
n_windows = (n_timestamps - window_size + window_step) // window_step
X_windowed = _windowed_view(
X, n_samples, n_timestamps, window_size, window_step
)
X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)
sfa = SymbolicFourierApproximation(
n_coefs=self.word_size, drop_sum=self.drop_sum, anova=self.anova,
norm_mean=self.norm_mean, norm_std=self.norm_std,
n_bins=self.n_bins, strategy=self.strategy, alphabet=self.alphabet
)
y_repeated = np.repeat(y, n_windows)
X_sfa = sfa.fit_transform(X_windowed, y_repeated)
X_word = np.asarray([''.join(X_sfa[i])
for i in range(n_samples * n_windows)])
X_word = X_word.reshape(n_samples, n_windows)
----------
X : array-like, shape = (n_samples, n_timestamps)
Returns
-------
X_new : array, shape = (n_samples,)
Transformed data. Each row is a string consisting of words
separated by a whitespace.
"""
X = check_array(X, dtype=None)
n_samples, n_timestamps = X.shape
window_size, window_step = self._check_params(n_timestamps)
n_windows = (n_timestamps - window_size + window_step) // window_step
X_window = _windowed_view(X, n_samples, n_timestamps,
window_size, window_step)
X_word = np.asarray([[''.join(X_window[i, j])
for j in range(n_windows)]
for i in range(n_samples)])
if self.numerosity_reduction:
not_equal = np.c_[X_word[:, 1:] != X_word[:, :-1],
np.full(n_samples, True)]
X_bow = np.asarray([' '.join(X_word[i, not_equal[i]])
for i in range(n_samples)])
else:
X_bow = np.asarray([' '.join(X_word[i]) for i in range(n_samples)])
return X_bow
check_is_fitted(self, ['_relevant_features_list', '_sfa_list',
'_vectorizer_list', 'vocabulary_'])
X = check_array(X, dtype='float64')
n_samples, n_timestamps = X.shape
X_features = coo_matrix((n_samples, 0), dtype=np.int64)
for (window_size, window_step, sfa,
vectorizer, relevant_features) in zip(
self._window_sizes, self._window_steps, self._sfa_list,
self._vectorizer_list, self._relevant_features_list):
n_windows = ((n_timestamps - window_size + window_step)
// window_step)
X_windowed = _windowed_view(
X, n_samples, n_timestamps, window_size, window_step
)
X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)
X_sfa = sfa.transform(X_windowed)
X_word = np.asarray([''.join(X_sfa[i])
for i in range(n_samples * n_windows)])
X_word = X_word.reshape(n_samples, n_windows)
X_bow = np.asarray([' '.join(X_word[i]) for i in range(n_samples)])
X_counts = vectorizer.transform(X_bow)[:, relevant_features]
X_features = hstack([X_features, X_counts])
if not self.sparse:
return X_features.A
return csr_matrix(X_features)
Class labels for each data sample.
Returns
-------
self : object
"""
X = check_array(X)
n_samples, n_timestamps = X.shape
if y is not None:
check_classification_targets(y)
window_size, window_step = self._check_params(n_timestamps)
n_windows = (n_timestamps - window_size + window_step) // window_step
X_windowed = _windowed_view(
X, n_samples, n_timestamps, window_size, window_step
)
X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)
sfa = SymbolicFourierApproximation(
n_coefs=self.word_size, drop_sum=self.drop_sum, anova=self.anova,
norm_mean=self.norm_mean, norm_std=self.norm_std,
n_bins=self.n_bins, strategy=self.strategy, alphabet=self.alphabet
)
if y is None:
y_repeated = None
else:
y_repeated = np.repeat(y, n_windows)
X_sfa = sfa.fit_transform(X_windowed, y_repeated)
X_word = np.asarray([''.join(X_sfa[i])
def _derive_all_squared_distances_fit(
X, n_samples, n_timestamps, window_sizes, shapelets, lengths
):
"""Derive the squared distances between all shapelets and time series."""
distances = [] # save the distances in a list
for i in prange(len(lengths)):
window_size = lengths[i][0]
X_window = _windowed_view(X, n_samples, n_timestamps,
window_size, window_step=1)
for j in prange(shapelets[i].shape[0]):
dist = _derive_shapelet_distances(X_window, shapelets[i][j])
distances.append(dist)
return distances
# Shapelet initialization
window_sizes = np.arange(
min_shapelet_length,
min_shapelet_length * (self.shapelet_scale + 1),
min_shapelet_length
)
n_shapelets_per_cluster = n_timestamps - window_sizes + 1
if np.any(n_shapelets_per_size > n_shapelets_per_cluster):
raise ValueError("'n_shapelets_per_size' is too high given "
"'min_shapelet_length' and 'shapelet_scale'.")
shapelets = []
lengths = []
for window_size in window_sizes:
X_window = _windowed_view(
X, n_samples, n_timestamps, window_size, window_step=1)
X_window = X_window.reshape(-1, window_size)
kmeans = KMeans(n_clusters=n_shapelets_per_size, random_state=rng)
kmeans.fit(X_window)
shapelets.append(kmeans.cluster_centers_)
lengths.append(np.full(n_shapelets_per_size, window_size))
shapelets = tuple(shapelets)
lengths = tuple(lengths)
# Weight initialization
n_shapelets = n_shapelets_per_size * self.shapelet_scale
if n_classes == 2:
if self.fit_intercept:
weights = rng.randn(n_shapelets + 1) / 100
else:
weights = rng.randn(n_shapelets) / 100
def _derive_all_squared_distances_transform(
X, n_samples, n_timestamps, window_sizes, shapelets, lengths
):
"""Derive the squared distances between all shapelets and time series."""
distances = [] # save the distances in a list
permutation = [] # save the permutation of the indices
for shapelet, window_size in zip(shapelets, window_sizes):
X_window = _windowed_view(
X, n_samples, n_timestamps, window_size, window_step=1
)
indices = np.where(lengths == window_size)[0]
permutation.append(indices)
for idx in indices:
dist = _derive_shapelet_distances(X_window, shapelets[idx])
distances.append(dist)
return distances, permutation