Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# make numpy array with rows containing the segmenatation candidates (time steps)
# and columns as dimensions of the
segmentationCandidates = np.asarray(normalizedTypicalPeriods.loc[i,:])
# produce adjacency matrix: Each time step is only connected to its preceding and succeeding one
adjacencyMatrix = np.eye(timeStepsPerPeriod, k=1) + np.eye(timeStepsPerPeriod, k=-1)
# execute clustering of adjacent time steps
if noSegments==1:
clusterOrder = np.asarray([0] * len(segmentationCandidates))
else:
clustering = AgglomerativeClustering(n_clusters=noSegments, linkage='ward', connectivity=adjacencyMatrix)
clusterOrder = clustering.fit_predict(segmentationCandidates)
# determine the indices where the segments change and the number of time steps in each segment
segNo, indices, segmentNoOccur = np.unique(clusterOrder, return_index=True, return_counts=True)
clusterOrderUnique = [clusterOrder[index] for index in sorted(indices)]
# determine the segments' values
clusterCenters = meanRepresentation(segmentationCandidates, clusterOrder)
# predict each time step of the period by representing it with the corresponding segment's values
predictedSegmentedNormalizedTypicalPeriods = pd.DataFrame(
clusterCenters,
columns=normalizedTypicalPeriods.columns).reindex(clusterOrder).reset_index(drop=True)
# represent the period by the segments in the right order only instead of each time step
segmentedNormalizedTypicalPeriods = pd.DataFrame(
clusterCenters,
columns=normalizedTypicalPeriods.columns).reindex(clusterOrderUnique).set_index(np.sort(indices))
# keep additional information on the lengths of the segments in the right order
segmentDuration = pd.DataFrame(segmentNoOccur, columns=['Segment Duration']).reindex(clusterOrderUnique).set_index(np.sort(indices))
# create DataFrame with reduced number of segments together with three indices per period:
# 1. The segment number
# 2. The segment duration
# 3. The index of the original time step, at which the segment starts
result=segmentedNormalizedTypicalPeriods.set_index([pd.Index(segNo, name='Segment Step'), segmentDuration['Segment Duration'], pd.Index(np.sort(indices), name='Original Start Step')])
# append predicted and segmented DataFrame to list to create a big DataFrame for all periods