feat: add Series|Expr cum_min and cum_max methods (#1384)

narwhals-dev · Nov 16, 2024 · 8ed6e7e · 8ed6e7e
1 parent aba8584
commit 8ed6e7e
Show file tree

Hide file tree

Showing 11 changed files with 389 additions and 3 deletions.
diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md
@@ -12,6 +12,8 @@
         - cast
         - count
         - cum_count
+        - cum_max
+        - cum_min
         - cum_sum
         - diff
         - drop_nulls

diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md
@@ -16,6 +16,8 @@
         - clip
         - count
         - cum_count
+        - cum_max
+        - cum_min
         - cum_sum
         - diff
         - drop_nulls

diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
@@ -433,6 +433,12 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
     def cum_count(self: Self, *, reverse: bool) -> Self:
         return reuse_series_implementation(self, "cum_count", reverse=reverse)
 
+    def cum_min(self: Self, *, reverse: bool) -> Self:
+        return reuse_series_implementation(self, "cum_min", reverse=reverse)
+
+    def cum_max(self: Self, *, reverse: bool) -> Self:
+        return reuse_series_implementation(self, "cum_max", reverse=reverse)
+
     @property
     def dt(self: Self) -> ArrowExprDateTimeNamespace:
         return ArrowExprDateTimeNamespace(self)

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
@@ -822,6 +822,38 @@ def cum_count(self: Self, *, reverse: bool) -> Self:
             else len(self) - not_na_series.cum_sum() + not_na_series - 1
         )
 
+    def cum_min(self: Self, *, reverse: bool) -> Self:
+        if self._backend_version < (13, 0, 0):
+            msg = "cum_min method is not supported for pyarrow < 13.0.0"
+            raise NotImplementedError(msg)
+
+        import pyarrow.compute as pc  # ignore-banned-import
+
+        native_series = self._native_series
+
+        result = (
+            pc.cumulative_min(native_series, skip_nulls=True)
+            if not reverse
+            else pc.cumulative_min(native_series[::-1], skip_nulls=True)[::-1]
+        )
+        return self._from_native_series(result)
+
+    def cum_max(self: Self, *, reverse: bool) -> Self:
+        if self._backend_version < (13, 0, 0):
+            msg = "cum_max method is not supported for pyarrow < 13.0.0"
+            raise NotImplementedError(msg)
+
+        import pyarrow.compute as pc  # ignore-banned-import
+
+        native_series = self._native_series
+
+        result = (
+            pc.cumulative_max(native_series, skip_nulls=True)
+            if not reverse
+            else pc.cumulative_max(native_series[::-1], skip_nulls=True)[::-1]
+        )
+        return self._from_native_series(result)
+
     def __iter__(self: Self) -> Iterator[Any]:
         yield from self._native_series.__iter__()
 

diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
@@ -444,6 +444,12 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
     def cum_count(self: Self, *, reverse: bool) -> Self:
         return reuse_series_implementation(self, "cum_count", reverse=reverse)
 
+    def cum_min(self: Self, *, reverse: bool) -> Self:
+        return reuse_series_implementation(self, "cum_min", reverse=reverse)
+
+    def cum_max(self: Self, *, reverse: bool) -> Self:
+        return reuse_series_implementation(self, "cum_max", reverse=reverse)
+
     @property
     def str(self: Self) -> PandasLikeExprStringNamespace:
         return PandasLikeExprStringNamespace(self)

diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
@@ -765,6 +765,24 @@ def cum_count(self: Self, *, reverse: bool) -> Self:
         )
         return self._from_native_series(result)
 
+    def cum_min(self: Self, *, reverse: bool) -> Self:
+        native_series = self._native_series
+        result = (
+            native_series.cummin(skipna=True)
+            if not reverse
+            else native_series[::-1].cummin(skipna=True)[::-1]
+        )
+        return self._from_native_series(result)
+
+    def cum_max(self: Self, *, reverse: bool) -> Self:
+        native_series = self._native_series
+        result = (
+            native_series.cummax(skipna=True)
+            if not reverse
+            else native_series[::-1].cummax(skipna=True)[::-1]
+        )
+        return self._from_native_series(result)
+
     def __iter__(self: Self) -> Iterator[Any]:
         yield from self._native_series.__iter__()
 

diff --git a/narwhals/expr.py b/narwhals/expr.py
@@ -2747,6 +2747,118 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self:
         """
         return self.__class__(lambda plx: self._call(plx).cum_count(reverse=reverse))
 
+    def cum_min(self: Self, *, reverse: bool = False) -> Self:
+        r"""Return the cumulative min of the non-null values in the column.
+
+        Arguments:
+            reverse: reverse the operation
+
+        Examples:
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = {"a": [3, 1, None, 2]}
+
+            We define a library agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(df):
+            ...     return df.with_columns(
+            ...         nw.col("a").cum_min().alias("cum_min"),
+            ...         nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"),
+            ...     )
+
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+
+            >>> func(pd.DataFrame(data))
+                 a  cum_min  cum_min_reverse
+            0  3.0      3.0              1.0
+            1  1.0      1.0              1.0
+            2  NaN      NaN              NaN
+            3  2.0      1.0              2.0
+
+            >>> func(pl.DataFrame(data))
+            shape: (4, 3)
+            ┌──────┬─────────┬─────────────────┐
+            │ a    ┆ cum_min ┆ cum_min_reverse │
+            │ ---  ┆ ---     ┆ ---             │
+            │ i64  ┆ i64     ┆ i64             │
+            ╞══════╪═════════╪═════════════════╡
+            │ 3    ┆ 3       ┆ 1               │
+            │ 1    ┆ 1       ┆ 1               │
+            │ null ┆ null    ┆ null            │
+            │ 2    ┆ 1       ┆ 2               │
+            └──────┴─────────┴─────────────────┘
+
+            >>> func(pa.table(data))
+            pyarrow.Table
+            a: int64
+            cum_min: int64
+            cum_min_reverse: int64
+            ----
+            a: [[3,1,null,2]]
+            cum_min: [[3,1,null,1]]
+            cum_min_reverse: [[1,1,null,2]]
+        """
+        return self.__class__(lambda plx: self._call(plx).cum_min(reverse=reverse))
+
+    def cum_max(self: Self, *, reverse: bool = False) -> Self:
+        r"""Return the cumulative max of the non-null values in the column.
+
+        Arguments:
+            reverse: reverse the operation
+
+        Examples:
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = {"a": [1, 3, None, 2]}
+
+            We define a library agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(df):
+            ...     return df.with_columns(
+            ...         nw.col("a").cum_max().alias("cum_max"),
+            ...         nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"),
+            ...     )
+
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+
+            >>> func(pd.DataFrame(data))
+                 a  cum_max  cum_max_reverse
+            0  1.0      1.0              3.0
+            1  3.0      3.0              3.0
+            2  NaN      NaN              NaN
+            3  2.0      3.0              2.0
+
+            >>> func(pl.DataFrame(data))
+            shape: (4, 3)
+            ┌──────┬─────────┬─────────────────┐
+            │ a    ┆ cum_max ┆ cum_max_reverse │
+            │ ---  ┆ ---     ┆ ---             │
+            │ i64  ┆ i64     ┆ i64             │
+            ╞══════╪═════════╪═════════════════╡
+            │ 1    ┆ 1       ┆ 3               │
+            │ 3    ┆ 3       ┆ 3               │
+            │ null ┆ null    ┆ null            │
+            │ 2    ┆ 3       ┆ 2               │
+            └──────┴─────────┴─────────────────┘
+
+            >>> func(pa.table(data))
+            pyarrow.Table
+            a: int64
+            cum_max: int64
+            cum_max_reverse: int64
+            ----
+            a: [[1,3,null,2]]
+            cum_max: [[1,3,null,3]]
+            cum_max_reverse: [[3,3,null,2]]
+        """
+        return self.__class__(lambda plx: self._call(plx).cum_max(reverse=reverse))
+
     @property
     def str(self: Self) -> ExprStringNamespace[Self]:
         return ExprStringNamespace(self)

diff --git a/narwhals/series.py b/narwhals/series.py
@@ -2681,6 +2681,110 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self:
             self._compliant_series.cum_count(reverse=reverse)
         )
 
+    def cum_min(self: Self, *, reverse: bool = False) -> Self:
+        r"""Return the cumulative min of the non-null values in the series.
+
+        Arguments:
+            reverse: reverse the operation
+
+        Examples:
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = [3, 1, None, 2]
+
+            We define a library agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(s):
+            ...     return s.cum_min()
+
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+
+            >>> func(pd.Series(data))
+            0    3.0
+            1    1.0
+            2    NaN
+            3    1.0
+            dtype: float64
+            >>> func(pl.Series(data))  # doctest:+NORMALIZE_WHITESPACE
+            shape: (4,)
+            Series: '' [i64]
+            [
+               3
+               1
+               null
+               1
+            ]
+            >>> func(pa.chunked_array([data]))  # doctest:+ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                3,
+                1,
+                null,
+                1
+              ]
+            ]
+
+        """
+        return self._from_compliant_series(
+            self._compliant_series.cum_min(reverse=reverse)
+        )
+
+    def cum_max(self: Self, *, reverse: bool = False) -> Self:
+        r"""Return the cumulative max of the non-null values in the series.
+
+        Arguments:
+            reverse: reverse the operation
+
+        Examples:
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = [1, 3, None, 2]
+
+            We define a library agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(s):
+            ...     return s.cum_max()
+
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+
+            >>> func(pd.Series(data))
+            0    1.0
+            1    3.0
+            2    NaN
+            3    3.0
+            dtype: float64
+            >>> func(pl.Series(data))  # doctest:+NORMALIZE_WHITESPACE
+            shape: (4,)
+            Series: '' [i64]
+            [
+               1
+               3
+               null
+               3
+            ]
+            >>> func(pa.chunked_array([data]))  # doctest:+ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                1,
+                3,
+                null,
+                3
+              ]
+            ]
+
+        """
+        return self._from_compliant_series(
+            self._compliant_series.cum_max(reverse=reverse)
+        )
+
     def __iter__(self: Self) -> Iterator[Any]:
         yield from self._compliant_series.__iter__()
 

diff --git a/tests/expr_and_series/cum_count_test.py b/tests/expr_and_series/cum_count_test.py
@@ -7,9 +7,7 @@
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
 
-data = {
-    "a": ["x", "y", None, "z"],
-}
+data = {"a": ["x", "y", None, "z"]}
 
 
 def test_cum_count_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: