Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _slice_by_column(self, df):
"""Slices the data frame by an existing column."""
slices = df.groupby(self.window_size, sort=False)
slice_number = 1
for group, ds in slices:
ds = DataSliceFrame(ds)
ds.context = DataSliceContext(
slice_number=slice_number,
slice_start=ds.first_valid_index(),
slice_stop=ds.last_valid_index(),
)
setattr(ds.context, self.window_size, group)
del ds.context.next_start
slice_number += 1
yield ds
stop = self._iloc(df.index, size.value)
else:
stop = start.value + size.value
ds = df[:stop]
# Pandas includes both endpoints when slicing by time.
# This results in the right endpoint overlapping in consecutive data slices.
# Resolved by making the right endpoint exclusive.
# https://pandas.pydata.org/pandas-docs/version/0.19/gotchas.html#endpoints-are-inclusive
if not ds.empty:
overlap = ds.index == stop
if overlap.any():
ds = ds[~overlap]
ds.context = DataSliceContext(slice_start=start.value, slice_stop=stop)
return ds