🕵️ test error when multiple windows in case of custom segments

predict-idlab · Oct 22, 2023 · f3a9496 · f3a9496
1 parent 3fd7d10
commit f3a9496
Show file tree

Hide file tree

Showing 2 changed files with 71 additions and 5 deletions.
diff --git a/tests/test_features_feature_collection.py b/tests/test_features_feature_collection.py
@@ -475,6 +475,20 @@ def test_group_by_consecutive_subcall():
     assert_frame_equal(res, expected_df)
 
 
+@pytest.mark.parametrize("group_by", ["group_by_all", "group_by_consecutive"])
+def test_groupby_multiple_window_sizes_error(dummy_group_data, group_by):
+    fc = FeatureCollection(
+        MultipleFeatureDescriptors(
+            functions=[np.sum, np.min],
+            series_names=["number_sold"],
+            windows=["5D", "10D"],
+        )
+    )
+
+    with pytest.raises(Exception):
+        fc.calculate(dummy_group_data, return_df=True, **{group_by: "store"})
+
+
 def test_single_series_feature_collection(dummy_data):
     fd = FeatureDescriptor(
         function=np.sum,
@@ -961,6 +975,41 @@ def test_time_segment_start_and_end_idxs_empty_array():
     assert np.all(res["dummy__len__w=manual"] == [])
 
 
+def test_sequence_segment_start_and_end_idxs_no_multiple_windows():
+    s = pd.Series(np.arange(20), name="dummy")
+    segment_start_idxs = [0, 5, 3, 3]
+    segment_end_idxs = [5, 10, 8, 5]
+
+    fc = FeatureCollection(MultipleFeatureDescriptors([np.min], "dummy", [3, 5]))
+    _ = fc.calculate(s, stride=5)
+    _ = fc.calculate(s, segment_start_idxs=segment_start_idxs)
+    _ = fc.calculate(s, segment_end_idxs=segment_end_idxs)
+
+    with pytest.raises(Exception):
+        # Should only fail when both are provided
+        _ = fc.calculate(
+            s, segment_start_idxs=segment_start_idxs, segment_end_idxs=segment_end_idxs
+        )
+
+
+def test_time_segment_start_and_end_idxs_no_multiple_windows():
+    s = pd.Series(np.arange(20), name="dummy")
+    s.index = pd.date_range("2021-08-09", freq="1h", periods=20)
+    segment_start_idxs = s.index[[0, 5, 3, 3]]
+    segment_end_idxs = s.index[[5, 10, 8, 5]]
+
+    fc = FeatureCollection(MultipleFeatureDescriptors([np.min], "dummy", ["3h", "5h"]))
+    _ = fc.calculate(s, stride="5h")
+    _ = fc.calculate(s, segment_start_idxs=segment_start_idxs)
+    _ = fc.calculate(s, segment_end_idxs=segment_end_idxs)
+
+    with pytest.raises(Exception):
+        # Should only fail when both are provided
+        _ = fc.calculate(
+            s, segment_start_idxs=segment_start_idxs, segment_end_idxs=segment_end_idxs
+        )
+
+
 def test_sequence_segment_start_or_end_idxs_of_wrong_dtype():
     s = pd.Series(np.arange(20), name="dummy")
     wrong_segment_idx = pd.date_range("2021-08-09", freq="1h", periods=20)[5:9]

diff --git a/tsflex/features/feature_collection.py b/tsflex/features/feature_collection.py
@@ -418,13 +418,22 @@ def get_stroll_function(idx) -> Tuple[StridedRolling, FuncWrapper]:
 
         return get_stroll_function
 
-    def _check_no_multiple_windows(self):
+    def _check_no_multiple_windows(self, error_case: str):
+        """Check whether there are no multiple windows in the feature collection.
+
+        Parameters
+        ----------
+        error_case : str
+            The case in which no multiple windows are allowed.
+
+        """
         assert (
             self._get_nb_output_features_without_window()
             == self.get_nb_output_features()
         ), (
-            "When using `segment_XXX_idxs`; each output name - series_input combination"
-            + " can only have 1 window (or None)"
+            error_case
+            + "; each output name - series_input combination can only have 1 window"
+            + " (or None)"
         )
 
     def _data_to_series_dict(
@@ -1039,6 +1048,10 @@ def calculate(
             or group_by_consecutive
             or isinstance(data, pd.core.groupby.DataFrameGroupBy)
         ):
+            self._check_no_multiple_windows(
+                error_case="When using the groupby behavior"
+            )
+
             # The grouping column must be part of the required series
             if group_by_all:
                 # group_by_consecutive should be None (checked by asserts above)
@@ -1117,7 +1130,9 @@ def calculate(
             _check_start_end_array(segment_start_idxs, segment_end_idxs)
             # Check if there is either 1 or No(ne) window value for every output name -
             # input_series combination
-            self._check_no_multiple_windows()
+            self._check_no_multiple_windows(
+                error_case="When using both `segment_start_idxs` and `segment_end_idxs`"
+            )
 
         if segment_start_idxs is None or segment_end_idxs is None:
             assert all(
@@ -1236,7 +1251,9 @@ def reduce(self, feat_cols_to_keep: List[str]) -> FeatureCollection:
             assert all(c.endswith("w=manual") for c in feat_cols_to_keep)
             # As the windows are created manual, the FeatureCollection cannot contain
             # multiple windows for the same output name - input_series combination
-            self._check_no_multiple_windows()
+            self._check_no_multiple_windows(
+                error_case="When reducing a FeatureCollection with manual windows"
+            )
             manual_window = True
         feat_col_fd_mapping: Dict[str, Tuple[str, FeatureDescriptor]] = {}
         for (s_names, window), fd_list in self._feature_desc_dict.items():