Skip to content

Commit

Permalink
🕵️ test error when multiple windows in case of custom segments
Browse files Browse the repository at this point in the history
  • Loading branch information
jvdd committed Oct 22, 2023
1 parent 3fd7d10 commit f3a9496
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 5 deletions.
49 changes: 49 additions & 0 deletions tests/test_features_feature_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,20 @@ def test_group_by_consecutive_subcall():
assert_frame_equal(res, expected_df)


@pytest.mark.parametrize("group_by", ["group_by_all", "group_by_consecutive"])
def test_groupby_multiple_window_sizes_error(dummy_group_data, group_by):
fc = FeatureCollection(
MultipleFeatureDescriptors(
functions=[np.sum, np.min],
series_names=["number_sold"],
windows=["5D", "10D"],
)
)

with pytest.raises(Exception):
fc.calculate(dummy_group_data, return_df=True, **{group_by: "store"})


def test_single_series_feature_collection(dummy_data):
fd = FeatureDescriptor(
function=np.sum,
Expand Down Expand Up @@ -961,6 +975,41 @@ def test_time_segment_start_and_end_idxs_empty_array():
assert np.all(res["dummy__len__w=manual"] == [])


def test_sequence_segment_start_and_end_idxs_no_multiple_windows():
s = pd.Series(np.arange(20), name="dummy")
segment_start_idxs = [0, 5, 3, 3]
segment_end_idxs = [5, 10, 8, 5]

fc = FeatureCollection(MultipleFeatureDescriptors([np.min], "dummy", [3, 5]))
_ = fc.calculate(s, stride=5)
_ = fc.calculate(s, segment_start_idxs=segment_start_idxs)
_ = fc.calculate(s, segment_end_idxs=segment_end_idxs)

with pytest.raises(Exception):
# Should only fail when both are provided
_ = fc.calculate(
s, segment_start_idxs=segment_start_idxs, segment_end_idxs=segment_end_idxs
)


def test_time_segment_start_and_end_idxs_no_multiple_windows():
s = pd.Series(np.arange(20), name="dummy")
s.index = pd.date_range("2021-08-09", freq="1h", periods=20)
segment_start_idxs = s.index[[0, 5, 3, 3]]
segment_end_idxs = s.index[[5, 10, 8, 5]]

fc = FeatureCollection(MultipleFeatureDescriptors([np.min], "dummy", ["3h", "5h"]))
_ = fc.calculate(s, stride="5h")
_ = fc.calculate(s, segment_start_idxs=segment_start_idxs)
_ = fc.calculate(s, segment_end_idxs=segment_end_idxs)

with pytest.raises(Exception):
# Should only fail when both are provided
_ = fc.calculate(
s, segment_start_idxs=segment_start_idxs, segment_end_idxs=segment_end_idxs
)


def test_sequence_segment_start_or_end_idxs_of_wrong_dtype():
s = pd.Series(np.arange(20), name="dummy")
wrong_segment_idx = pd.date_range("2021-08-09", freq="1h", periods=20)[5:9]
Expand Down
27 changes: 22 additions & 5 deletions tsflex/features/feature_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,13 +418,22 @@ def get_stroll_function(idx) -> Tuple[StridedRolling, FuncWrapper]:

return get_stroll_function

def _check_no_multiple_windows(self):
def _check_no_multiple_windows(self, error_case: str):
"""Check whether there are no multiple windows in the feature collection.
Parameters
----------
error_case : str
The case in which no multiple windows are allowed.
"""
assert (
self._get_nb_output_features_without_window()
== self.get_nb_output_features()
), (
"When using `segment_XXX_idxs`; each output name - series_input combination"
+ " can only have 1 window (or None)"
error_case
+ "; each output name - series_input combination can only have 1 window"
+ " (or None)"
)

def _data_to_series_dict(
Expand Down Expand Up @@ -1039,6 +1048,10 @@ def calculate(
or group_by_consecutive
or isinstance(data, pd.core.groupby.DataFrameGroupBy)
):
self._check_no_multiple_windows(
error_case="When using the groupby behavior"
)

# The grouping column must be part of the required series
if group_by_all:
# group_by_consecutive should be None (checked by asserts above)
Expand Down Expand Up @@ -1117,7 +1130,9 @@ def calculate(
_check_start_end_array(segment_start_idxs, segment_end_idxs)
# Check if there is either 1 or No(ne) window value for every output name -
# input_series combination
self._check_no_multiple_windows()
self._check_no_multiple_windows(
error_case="When using both `segment_start_idxs` and `segment_end_idxs`"
)

if segment_start_idxs is None or segment_end_idxs is None:
assert all(
Expand Down Expand Up @@ -1236,7 +1251,9 @@ def reduce(self, feat_cols_to_keep: List[str]) -> FeatureCollection:
assert all(c.endswith("w=manual") for c in feat_cols_to_keep)
# As the windows are created manual, the FeatureCollection cannot contain
# multiple windows for the same output name - input_series combination
self._check_no_multiple_windows()
self._check_no_multiple_windows(
error_case="When reducing a FeatureCollection with manual windows"
)
manual_window = True
feat_col_fd_mapping: Dict[str, Tuple[str, FeatureDescriptor]] = {}
for (s_names, window), fd_list in self._feature_desc_dict.items():
Expand Down

0 comments on commit f3a9496

Please sign in to comment.