Skip to content

Commit

Permalink
feat: add Series|Expr cum_min and cum_max methods (#1384)
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi authored Nov 16, 2024
1 parent aba8584 commit 8ed6e7e
Show file tree
Hide file tree
Showing 11 changed files with 389 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
- cast
- count
- cum_count
- cum_max
- cum_min
- cum_sum
- diff
- drop_nulls
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
- clip
- count
- cum_count
- cum_max
- cum_min
- cum_sum
- diff
- drop_nulls
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,12 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
def cum_count(self: Self, *, reverse: bool) -> Self:
return reuse_series_implementation(self, "cum_count", reverse=reverse)

def cum_min(self: Self, *, reverse: bool) -> Self:
return reuse_series_implementation(self, "cum_min", reverse=reverse)

def cum_max(self: Self, *, reverse: bool) -> Self:
return reuse_series_implementation(self, "cum_max", reverse=reverse)

@property
def dt(self: Self) -> ArrowExprDateTimeNamespace:
return ArrowExprDateTimeNamespace(self)
Expand Down
32 changes: 32 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,38 @@ def cum_count(self: Self, *, reverse: bool) -> Self:
else len(self) - not_na_series.cum_sum() + not_na_series - 1
)

def cum_min(self: Self, *, reverse: bool) -> Self:
if self._backend_version < (13, 0, 0):
msg = "cum_min method is not supported for pyarrow < 13.0.0"
raise NotImplementedError(msg)

import pyarrow.compute as pc # ignore-banned-import

native_series = self._native_series

result = (
pc.cumulative_min(native_series, skip_nulls=True)
if not reverse
else pc.cumulative_min(native_series[::-1], skip_nulls=True)[::-1]
)
return self._from_native_series(result)

def cum_max(self: Self, *, reverse: bool) -> Self:
if self._backend_version < (13, 0, 0):
msg = "cum_max method is not supported for pyarrow < 13.0.0"
raise NotImplementedError(msg)

import pyarrow.compute as pc # ignore-banned-import

native_series = self._native_series

result = (
pc.cumulative_max(native_series, skip_nulls=True)
if not reverse
else pc.cumulative_max(native_series[::-1], skip_nulls=True)[::-1]
)
return self._from_native_series(result)

def __iter__(self: Self) -> Iterator[Any]:
yield from self._native_series.__iter__()

Expand Down
6 changes: 6 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,12 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
def cum_count(self: Self, *, reverse: bool) -> Self:
return reuse_series_implementation(self, "cum_count", reverse=reverse)

def cum_min(self: Self, *, reverse: bool) -> Self:
return reuse_series_implementation(self, "cum_min", reverse=reverse)

def cum_max(self: Self, *, reverse: bool) -> Self:
return reuse_series_implementation(self, "cum_max", reverse=reverse)

@property
def str(self: Self) -> PandasLikeExprStringNamespace:
return PandasLikeExprStringNamespace(self)
Expand Down
18 changes: 18 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,24 @@ def cum_count(self: Self, *, reverse: bool) -> Self:
)
return self._from_native_series(result)

def cum_min(self: Self, *, reverse: bool) -> Self:
native_series = self._native_series
result = (
native_series.cummin(skipna=True)
if not reverse
else native_series[::-1].cummin(skipna=True)[::-1]
)
return self._from_native_series(result)

def cum_max(self: Self, *, reverse: bool) -> Self:
native_series = self._native_series
result = (
native_series.cummax(skipna=True)
if not reverse
else native_series[::-1].cummax(skipna=True)[::-1]
)
return self._from_native_series(result)

def __iter__(self: Self) -> Iterator[Any]:
yield from self._native_series.__iter__()

Expand Down
112 changes: 112 additions & 0 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2747,6 +2747,118 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self:
"""
return self.__class__(lambda plx: self._call(plx).cum_count(reverse=reverse))

def cum_min(self: Self, *, reverse: bool = False) -> Self:
r"""Return the cumulative min of the non-null values in the column.
Arguments:
reverse: reverse the operation
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [3, 1, None, 2]}
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(
... nw.col("a").cum_min().alias("cum_min"),
... nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"),
... )
We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(pd.DataFrame(data))
a cum_min cum_min_reverse
0 3.0 3.0 1.0
1 1.0 1.0 1.0
2 NaN NaN NaN
3 2.0 1.0 2.0
>>> func(pl.DataFrame(data))
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a ┆ cum_min ┆ cum_min_reverse │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞══════╪═════════╪═════════════════╡
│ 3 ┆ 3 ┆ 1 │
│ 1 ┆ 1 ┆ 1 │
│ null ┆ null ┆ null │
│ 2 ┆ 1 ┆ 2 │
└──────┴─────────┴─────────────────┘
>>> func(pa.table(data))
pyarrow.Table
a: int64
cum_min: int64
cum_min_reverse: int64
----
a: [[3,1,null,2]]
cum_min: [[3,1,null,1]]
cum_min_reverse: [[1,1,null,2]]
"""
return self.__class__(lambda plx: self._call(plx).cum_min(reverse=reverse))

def cum_max(self: Self, *, reverse: bool = False) -> Self:
r"""Return the cumulative max of the non-null values in the column.
Arguments:
reverse: reverse the operation
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 3, None, 2]}
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(
... nw.col("a").cum_max().alias("cum_max"),
... nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"),
... )
We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(pd.DataFrame(data))
a cum_max cum_max_reverse
0 1.0 1.0 3.0
1 3.0 3.0 3.0
2 NaN NaN NaN
3 2.0 3.0 2.0
>>> func(pl.DataFrame(data))
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a ┆ cum_max ┆ cum_max_reverse │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞══════╪═════════╪═════════════════╡
│ 1 ┆ 1 ┆ 3 │
│ 3 ┆ 3 ┆ 3 │
│ null ┆ null ┆ null │
│ 2 ┆ 3 ┆ 2 │
└──────┴─────────┴─────────────────┘
>>> func(pa.table(data))
pyarrow.Table
a: int64
cum_max: int64
cum_max_reverse: int64
----
a: [[1,3,null,2]]
cum_max: [[1,3,null,3]]
cum_max_reverse: [[3,3,null,2]]
"""
return self.__class__(lambda plx: self._call(plx).cum_max(reverse=reverse))

@property
def str(self: Self) -> ExprStringNamespace[Self]:
return ExprStringNamespace(self)
Expand Down
104 changes: 104 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2681,6 +2681,110 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self:
self._compliant_series.cum_count(reverse=reverse)
)

def cum_min(self: Self, *, reverse: bool = False) -> Self:
r"""Return the cumulative min of the non-null values in the series.
Arguments:
reverse: reverse the operation
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = [3, 1, None, 2]
We define a library agnostic function:
>>> @nw.narwhalify
... def func(s):
... return s.cum_min()
We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(pd.Series(data))
0 3.0
1 1.0
2 NaN
3 1.0
dtype: float64
>>> func(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE
shape: (4,)
Series: '' [i64]
[
3
1
null
1
]
>>> func(pa.chunked_array([data])) # doctest:+ELLIPSIS
<pyarrow.lib.ChunkedArray object at ...>
[
[
3,
1,
null,
1
]
]
"""
return self._from_compliant_series(
self._compliant_series.cum_min(reverse=reverse)
)

def cum_max(self: Self, *, reverse: bool = False) -> Self:
r"""Return the cumulative max of the non-null values in the series.
Arguments:
reverse: reverse the operation
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = [1, 3, None, 2]
We define a library agnostic function:
>>> @nw.narwhalify
... def func(s):
... return s.cum_max()
We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(pd.Series(data))
0 1.0
1 3.0
2 NaN
3 3.0
dtype: float64
>>> func(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE
shape: (4,)
Series: '' [i64]
[
1
3
null
3
]
>>> func(pa.chunked_array([data])) # doctest:+ELLIPSIS
<pyarrow.lib.ChunkedArray object at ...>
[
[
1,
3,
null,
3
]
]
"""
return self._from_compliant_series(
self._compliant_series.cum_max(reverse=reverse)
)

def __iter__(self: Self) -> Iterator[Any]:
yield from self._compliant_series.__iter__()

Expand Down
4 changes: 1 addition & 3 deletions tests/expr_and_series/cum_count_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data

data = {
"a": ["x", "y", None, "z"],
}
data = {"a": ["x", "y", None, "z"]}


def test_cum_count_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
Expand Down
Loading

0 comments on commit 8ed6e7e

Please sign in to comment.