diff --git a/tests/test_features_feature_collection.py b/tests/test_features_feature_collection.py index 7466440d..1c491915 100644 --- a/tests/test_features_feature_collection.py +++ b/tests/test_features_feature_collection.py @@ -475,6 +475,20 @@ def test_group_by_consecutive_subcall(): assert_frame_equal(res, expected_df) +@pytest.mark.parametrize("group_by", ["group_by_all", "group_by_consecutive"]) +def test_groupby_multiple_window_sizes_error(dummy_group_data, group_by): + fc = FeatureCollection( + MultipleFeatureDescriptors( + functions=[np.sum, np.min], + series_names=["number_sold"], + windows=["5D", "10D"], + ) + ) + + with pytest.raises(Exception): + fc.calculate(dummy_group_data, return_df=True, **{group_by: "store"}) + + def test_single_series_feature_collection(dummy_data): fd = FeatureDescriptor( function=np.sum, @@ -961,6 +975,41 @@ def test_time_segment_start_and_end_idxs_empty_array(): assert np.all(res["dummy__len__w=manual"] == []) +def test_sequence_segment_start_and_end_idxs_no_multiple_windows(): + s = pd.Series(np.arange(20), name="dummy") + segment_start_idxs = [0, 5, 3, 3] + segment_end_idxs = [5, 10, 8, 5] + + fc = FeatureCollection(MultipleFeatureDescriptors([np.min], "dummy", [3, 5])) + _ = fc.calculate(s, stride=5) + _ = fc.calculate(s, segment_start_idxs=segment_start_idxs) + _ = fc.calculate(s, segment_end_idxs=segment_end_idxs) + + with pytest.raises(Exception): + # Should only fail when both are provided + _ = fc.calculate( + s, segment_start_idxs=segment_start_idxs, segment_end_idxs=segment_end_idxs + ) + + +def test_time_segment_start_and_end_idxs_no_multiple_windows(): + s = pd.Series(np.arange(20), name="dummy") + s.index = pd.date_range("2021-08-09", freq="1h", periods=20) + segment_start_idxs = s.index[[0, 5, 3, 3]] + segment_end_idxs = s.index[[5, 10, 8, 5]] + + fc = FeatureCollection(MultipleFeatureDescriptors([np.min], "dummy", ["3h", "5h"])) + _ = fc.calculate(s, stride="5h") + _ = fc.calculate(s, segment_start_idxs=segment_start_idxs) + _ = fc.calculate(s, segment_end_idxs=segment_end_idxs) + + with pytest.raises(Exception): + # Should only fail when both are provided + _ = fc.calculate( + s, segment_start_idxs=segment_start_idxs, segment_end_idxs=segment_end_idxs + ) + + def test_sequence_segment_start_or_end_idxs_of_wrong_dtype(): s = pd.Series(np.arange(20), name="dummy") wrong_segment_idx = pd.date_range("2021-08-09", freq="1h", periods=20)[5:9] diff --git a/tsflex/features/feature_collection.py b/tsflex/features/feature_collection.py index 3342db18..a011ea5c 100644 --- a/tsflex/features/feature_collection.py +++ b/tsflex/features/feature_collection.py @@ -418,13 +418,22 @@ def get_stroll_function(idx) -> Tuple[StridedRolling, FuncWrapper]: return get_stroll_function - def _check_no_multiple_windows(self): + def _check_no_multiple_windows(self, error_case: str): + """Check whether there are no multiple windows in the feature collection. + + Parameters + ---------- + error_case : str + The case in which no multiple windows are allowed. + + """ assert ( self._get_nb_output_features_without_window() == self.get_nb_output_features() ), ( - "When using `segment_XXX_idxs`; each output name - series_input combination" - + " can only have 1 window (or None)" + error_case + + "; each output name - series_input combination can only have 1 window" + + " (or None)" ) def _data_to_series_dict( @@ -1039,6 +1048,10 @@ def calculate( or group_by_consecutive or isinstance(data, pd.core.groupby.DataFrameGroupBy) ): + self._check_no_multiple_windows( + error_case="When using the groupby behavior" + ) + # The grouping column must be part of the required series if group_by_all: # group_by_consecutive should be None (checked by asserts above) @@ -1117,7 +1130,9 @@ def calculate( _check_start_end_array(segment_start_idxs, segment_end_idxs) # Check if there is either 1 or No(ne) window value for every output name - # input_series combination - self._check_no_multiple_windows() + self._check_no_multiple_windows( + error_case="When using both `segment_start_idxs` and `segment_end_idxs`" + ) if segment_start_idxs is None or segment_end_idxs is None: assert all( @@ -1236,7 +1251,9 @@ def reduce(self, feat_cols_to_keep: List[str]) -> FeatureCollection: assert all(c.endswith("w=manual") for c in feat_cols_to_keep) # As the windows are created manual, the FeatureCollection cannot contain # multiple windows for the same output name - input_series combination - self._check_no_multiple_windows() + self._check_no_multiple_windows( + error_case="When reducing a FeatureCollection with manual windows" + ) manual_window = True feat_col_fd_mapping: Dict[str, Tuple[str, FeatureDescriptor]] = {} for (s_names, window), fd_list in self._feature_desc_dict.items():