1169 lines
40 KiB
Python
1169 lines
40 KiB
Python
from __future__ import annotations
|
|
|
|
from collections.abc import Mapping
|
|
from functools import partial
|
|
from operator import methodcaller
|
|
from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Protocol
|
|
|
|
from narwhals._compliant.any_namespace import (
|
|
CatNamespace,
|
|
DateTimeNamespace,
|
|
ListNamespace,
|
|
NameNamespace,
|
|
StringNamespace,
|
|
StructNamespace,
|
|
)
|
|
from narwhals._compliant.namespace import CompliantNamespace
|
|
from narwhals._compliant.typing import (
|
|
AliasName,
|
|
AliasNames,
|
|
CompliantExprT_co,
|
|
CompliantFrameT,
|
|
CompliantLazyFrameT,
|
|
CompliantSeriesOrNativeExprT_co,
|
|
EagerDataFrameT,
|
|
EagerExprT,
|
|
EagerSeriesT,
|
|
LazyExprT,
|
|
NativeExprT,
|
|
)
|
|
from narwhals._utils import _StoresCompliant
|
|
from narwhals.dependencies import get_numpy, is_numpy_array
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Mapping, Sequence
|
|
|
|
from typing_extensions import Self, TypeIs
|
|
|
|
from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
|
|
from narwhals._compliant.series import CompliantSeries
|
|
from narwhals._compliant.typing import AliasNames, EvalNames, EvalSeries, ScalarKwargs
|
|
from narwhals._expression_parsing import ExprKind, ExprMetadata
|
|
from narwhals._utils import Implementation, Version, _LimitedContext
|
|
from narwhals.typing import (
|
|
FillNullStrategy,
|
|
IntoDType,
|
|
NonNestedLiteral,
|
|
NumericLiteral,
|
|
RankMethod,
|
|
RollingInterpolationMethod,
|
|
TemporalLiteral,
|
|
TimeUnit,
|
|
)
|
|
|
|
__all__ = ["CompliantExpr", "DepthTrackingExpr", "EagerExpr", "LazyExpr", "NativeExpr"]
|
|
|
|
|
|
class NativeExpr(Protocol):
|
|
"""An `Expr`-like object from a package with [Lazy-only support](https://narwhals-dev.github.io/narwhals/extending/#levels-of-support).
|
|
|
|
Protocol members are chosen *purely* for matching statically - as they
|
|
are common to all currently supported packages.
|
|
"""
|
|
|
|
def between(self, *args: Any, **kwds: Any) -> Any: ...
|
|
def isin(self, *args: Any, **kwds: Any) -> Any: ...
|
|
|
|
|
|
class CompliantExpr(Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co]):
|
|
_implementation: Implementation
|
|
_version: Version
|
|
_evaluate_output_names: EvalNames[CompliantFrameT]
|
|
_alias_output_names: AliasNames | None
|
|
_metadata: ExprMetadata | None
|
|
|
|
def __call__(
|
|
self, df: CompliantFrameT
|
|
) -> Sequence[CompliantSeriesOrNativeExprT_co]: ...
|
|
def __narwhals_expr__(self) -> None: ...
|
|
def __narwhals_namespace__(self) -> CompliantNamespace[CompliantFrameT, Self]: ...
|
|
@classmethod
|
|
def from_column_names(
|
|
cls,
|
|
evaluate_column_names: EvalNames[CompliantFrameT],
|
|
/,
|
|
*,
|
|
context: _LimitedContext,
|
|
) -> Self: ...
|
|
@classmethod
|
|
def from_column_indices(
|
|
cls, *column_indices: int, context: _LimitedContext
|
|
) -> Self: ...
|
|
@staticmethod
|
|
def _eval_names_indices(indices: Sequence[int], /) -> EvalNames[CompliantFrameT]:
|
|
def fn(df: CompliantFrameT) -> Sequence[str]:
|
|
column_names = df.columns
|
|
return [column_names[i] for i in indices]
|
|
|
|
return fn
|
|
|
|
def is_null(self) -> Self: ...
|
|
def abs(self) -> Self: ...
|
|
def all(self) -> Self: ...
|
|
def any(self) -> Self: ...
|
|
def alias(self, name: str) -> Self: ...
|
|
def cast(self, dtype: IntoDType) -> Self: ...
|
|
def count(self) -> Self: ...
|
|
def min(self) -> Self: ...
|
|
def max(self) -> Self: ...
|
|
def mean(self) -> Self: ...
|
|
def sum(self) -> Self: ...
|
|
def median(self) -> Self: ...
|
|
def skew(self) -> Self: ...
|
|
def kurtosis(self) -> Self: ...
|
|
def std(self, *, ddof: int) -> Self: ...
|
|
def var(self, *, ddof: int) -> Self: ...
|
|
def n_unique(self) -> Self: ...
|
|
def null_count(self) -> Self: ...
|
|
def drop_nulls(self) -> Self: ...
|
|
def fill_null(
|
|
self,
|
|
value: Self | NonNestedLiteral,
|
|
strategy: FillNullStrategy | None,
|
|
limit: int | None,
|
|
) -> Self: ...
|
|
def diff(self) -> Self: ...
|
|
def exp(self) -> Self: ...
|
|
def sqrt(self) -> Self: ...
|
|
def unique(self) -> Self: ...
|
|
def len(self) -> Self: ...
|
|
def log(self, base: float) -> Self: ...
|
|
def round(self, decimals: int) -> Self: ...
|
|
def mode(self) -> Self: ...
|
|
def shift(self, n: int) -> Self: ...
|
|
def is_finite(self) -> Self: ...
|
|
def is_nan(self) -> Self: ...
|
|
def is_unique(self) -> Self: ...
|
|
def is_first_distinct(self) -> Self: ...
|
|
def is_last_distinct(self) -> Self: ...
|
|
def cum_sum(self, *, reverse: bool) -> Self: ...
|
|
def cum_count(self, *, reverse: bool) -> Self: ...
|
|
def cum_min(self, *, reverse: bool) -> Self: ...
|
|
def cum_max(self, *, reverse: bool) -> Self: ...
|
|
def cum_prod(self, *, reverse: bool) -> Self: ...
|
|
def is_in(self, other: Any) -> Self: ...
|
|
def rank(self, method: RankMethod, *, descending: bool) -> Self: ...
|
|
def replace_strict(
|
|
self,
|
|
old: Sequence[Any] | Mapping[Any, Any],
|
|
new: Sequence[Any],
|
|
*,
|
|
return_dtype: IntoDType | None,
|
|
) -> Self: ...
|
|
def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self: ...
|
|
def quantile(
|
|
self, quantile: float, interpolation: RollingInterpolationMethod
|
|
) -> Self: ...
|
|
def map_batches(
|
|
self,
|
|
function: Callable[[CompliantSeries[Any]], CompliantExpr[Any, Any]],
|
|
return_dtype: IntoDType | None,
|
|
) -> Self: ...
|
|
|
|
def clip(
|
|
self,
|
|
lower_bound: Self | NumericLiteral | TemporalLiteral | None,
|
|
upper_bound: Self | NumericLiteral | TemporalLiteral | None,
|
|
) -> Self: ...
|
|
|
|
def ewm_mean(
|
|
self,
|
|
*,
|
|
com: float | None,
|
|
span: float | None,
|
|
half_life: float | None,
|
|
alpha: float | None,
|
|
adjust: bool,
|
|
min_samples: int,
|
|
ignore_nulls: bool,
|
|
) -> Self: ...
|
|
|
|
def rolling_sum(
|
|
self, window_size: int, *, min_samples: int, center: bool
|
|
) -> Self: ...
|
|
|
|
def rolling_mean(
|
|
self, window_size: int, *, min_samples: int, center: bool
|
|
) -> Self: ...
|
|
|
|
def rolling_var(
|
|
self, window_size: int, *, min_samples: int, center: bool, ddof: int
|
|
) -> Self: ...
|
|
|
|
def rolling_std(
|
|
self, window_size: int, *, min_samples: int, center: bool, ddof: int
|
|
) -> Self: ...
|
|
|
|
def __and__(self, other: Any) -> Self: ...
|
|
def __or__(self, other: Any) -> Self: ...
|
|
def __add__(self, other: Any) -> Self: ...
|
|
def __sub__(self, other: Any) -> Self: ...
|
|
def __mul__(self, other: Any) -> Self: ...
|
|
def __floordiv__(self, other: Any) -> Self: ...
|
|
def __truediv__(self, other: Any) -> Self: ...
|
|
def __mod__(self, other: Any) -> Self: ...
|
|
def __pow__(self, other: Any) -> Self: ...
|
|
def __gt__(self, other: Any) -> Self: ...
|
|
def __ge__(self, other: Any) -> Self: ...
|
|
def __lt__(self, other: Any) -> Self: ...
|
|
def __le__(self, other: Any) -> Self: ...
|
|
def __invert__(self) -> Self: ...
|
|
def broadcast(
|
|
self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]
|
|
) -> Self: ...
|
|
def _is_multi_output_unnamed(self) -> bool:
|
|
"""Return `True` for multi-output aggregations without names.
|
|
|
|
For example, column `'a'` only appears in the output as a grouping key:
|
|
|
|
df.group_by('a').agg(nw.all().sum())
|
|
|
|
It does not get included in:
|
|
|
|
nw.all().sum().
|
|
"""
|
|
assert self._metadata is not None # noqa: S101
|
|
return self._metadata.expansion_kind.is_multi_unnamed()
|
|
|
|
def _evaluate_aliases(
|
|
self: CompliantExpr[CompliantFrameT, Any], frame: CompliantFrameT, /
|
|
) -> Sequence[str]:
|
|
names = self._evaluate_output_names(frame)
|
|
return alias(names) if (alias := self._alias_output_names) else names
|
|
|
|
@property
|
|
def str(self) -> StringNamespace[Self]: ...
|
|
@property
|
|
def name(self) -> NameNamespace[Self]: ...
|
|
@property
|
|
def dt(self) -> DateTimeNamespace[Self]: ...
|
|
@property
|
|
def cat(self) -> CatNamespace[Self]: ...
|
|
@property
|
|
def list(self) -> ListNamespace[Self]: ...
|
|
@property
|
|
def struct(self) -> StructNamespace[Self]: ...
|
|
|
|
|
|
class DepthTrackingExpr(
|
|
CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
|
|
Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
|
|
):
|
|
_depth: int
|
|
_function_name: str
|
|
|
|
@classmethod
|
|
def from_column_names(
|
|
cls: type[Self],
|
|
evaluate_column_names: EvalNames[CompliantFrameT],
|
|
/,
|
|
*,
|
|
context: _LimitedContext,
|
|
function_name: str = "",
|
|
) -> Self: ...
|
|
|
|
def _is_elementary(self) -> bool:
|
|
"""Check if expr is elementary.
|
|
|
|
Examples:
|
|
- nw.col('a').mean() # depth 1
|
|
- nw.mean('a') # depth 1
|
|
- nw.len() # depth 0
|
|
|
|
as opposed to, say
|
|
|
|
- nw.col('a').filter(nw.col('b')>nw.col('c')).max()
|
|
|
|
Elementary expressions are the only ones supported properly in
|
|
pandas, PyArrow, and Dask.
|
|
"""
|
|
return self._depth < 2
|
|
|
|
def __repr__(self) -> str: # pragma: no cover
|
|
return f"{type(self).__name__}(depth={self._depth}, function_name={self._function_name})"
|
|
|
|
|
|
class EagerExpr(
|
|
DepthTrackingExpr[EagerDataFrameT, EagerSeriesT],
|
|
Protocol[EagerDataFrameT, EagerSeriesT],
|
|
):
|
|
_call: EvalSeries[EagerDataFrameT, EagerSeriesT]
|
|
_scalar_kwargs: ScalarKwargs
|
|
|
|
def __init__(
|
|
self,
|
|
call: EvalSeries[EagerDataFrameT, EagerSeriesT],
|
|
*,
|
|
depth: int,
|
|
function_name: str,
|
|
evaluate_output_names: EvalNames[EagerDataFrameT],
|
|
alias_output_names: AliasNames | None,
|
|
implementation: Implementation,
|
|
version: Version,
|
|
scalar_kwargs: ScalarKwargs | None = None,
|
|
) -> None: ...
|
|
|
|
def __call__(self, df: EagerDataFrameT) -> Sequence[EagerSeriesT]:
|
|
return self._call(df)
|
|
|
|
def __narwhals_namespace__(
|
|
self,
|
|
) -> EagerNamespace[EagerDataFrameT, EagerSeriesT, Self, Any, Any]: ...
|
|
def __narwhals_expr__(self) -> None: ...
|
|
|
|
@classmethod
|
|
def _from_callable(
|
|
cls,
|
|
func: EvalSeries[EagerDataFrameT, EagerSeriesT],
|
|
*,
|
|
depth: int,
|
|
function_name: str,
|
|
evaluate_output_names: EvalNames[EagerDataFrameT],
|
|
alias_output_names: AliasNames | None,
|
|
context: _LimitedContext,
|
|
scalar_kwargs: ScalarKwargs | None = None,
|
|
) -> Self:
|
|
return cls(
|
|
func,
|
|
depth=depth,
|
|
function_name=function_name,
|
|
evaluate_output_names=evaluate_output_names,
|
|
alias_output_names=alias_output_names,
|
|
implementation=context._implementation,
|
|
version=context._version,
|
|
scalar_kwargs=scalar_kwargs,
|
|
)
|
|
|
|
@classmethod
|
|
def _from_series(cls, series: EagerSeriesT) -> Self:
|
|
return cls(
|
|
lambda _df: [series],
|
|
depth=0,
|
|
function_name="series",
|
|
evaluate_output_names=lambda _df: [series.name],
|
|
alias_output_names=None,
|
|
implementation=series._implementation,
|
|
version=series._version,
|
|
)
|
|
|
|
def _with_alias_output_names(self, alias_name: AliasName | None, /) -> Self:
|
|
current_alias_output_names = self._alias_output_names
|
|
alias_output_names: AliasNames | None = (
|
|
None
|
|
if alias_name is None
|
|
else (
|
|
lambda output_names: [
|
|
alias_name(x) for x in current_alias_output_names(output_names)
|
|
]
|
|
)
|
|
if current_alias_output_names is not None
|
|
else (lambda output_names: [alias_name(x) for x in output_names])
|
|
)
|
|
|
|
def func(df: EagerDataFrameT) -> list[EagerSeriesT]:
|
|
if alias_output_names:
|
|
return [
|
|
series.alias(name)
|
|
for series, name in zip(
|
|
self(df), alias_output_names(self._evaluate_output_names(df))
|
|
)
|
|
]
|
|
return [
|
|
series.alias(name)
|
|
for series, name in zip(self(df), self._evaluate_output_names(df))
|
|
]
|
|
|
|
return self.__class__(
|
|
func,
|
|
depth=self._depth,
|
|
function_name=self._function_name,
|
|
evaluate_output_names=self._evaluate_output_names,
|
|
alias_output_names=alias_output_names,
|
|
implementation=self._implementation,
|
|
version=self._version,
|
|
scalar_kwargs=self._scalar_kwargs,
|
|
)
|
|
|
|
def _reuse_series(
|
|
self,
|
|
method_name: str,
|
|
*,
|
|
returns_scalar: bool = False,
|
|
scalar_kwargs: ScalarKwargs | None = None,
|
|
**expressifiable_args: Any,
|
|
) -> Self:
|
|
"""Reuse Series implementation for expression.
|
|
|
|
If Series.foo is already defined, and we'd like Expr.foo to be the same, we can
|
|
leverage this method to do that for us.
|
|
|
|
Arguments:
|
|
method_name: name of method.
|
|
returns_scalar: whether the Series version returns a scalar. In this case,
|
|
the expression version should return a 1-row Series.
|
|
scalar_kwargs: non-expressifiable args which we may need to reuse in `agg` or `over`,
|
|
such as `ddof` for `std` and `var`.
|
|
expressifiable_args: keyword arguments to pass to function, which may
|
|
be expressifiable (e.g. `nw.col('a').is_between(3, nw.col('b')))`).
|
|
"""
|
|
func = partial(
|
|
self._reuse_series_inner,
|
|
method_name=method_name,
|
|
returns_scalar=returns_scalar,
|
|
scalar_kwargs=scalar_kwargs or {},
|
|
expressifiable_args=expressifiable_args,
|
|
)
|
|
return self._from_callable(
|
|
func,
|
|
depth=self._depth + 1,
|
|
function_name=f"{self._function_name}->{method_name}",
|
|
evaluate_output_names=self._evaluate_output_names,
|
|
alias_output_names=self._alias_output_names,
|
|
scalar_kwargs=scalar_kwargs,
|
|
context=self,
|
|
)
|
|
|
|
# For PyArrow.Series, we return Python Scalars (like Polars does) instead of PyArrow Scalars.
|
|
# However, when working with expressions, we keep everything PyArrow-native.
|
|
def _reuse_series_extra_kwargs(
|
|
self, *, returns_scalar: bool = False
|
|
) -> dict[str, Any]:
|
|
return {}
|
|
|
|
@classmethod
|
|
def _is_expr(cls, obj: Self | Any) -> TypeIs[Self]:
|
|
return hasattr(obj, "__narwhals_expr__")
|
|
|
|
def _reuse_series_inner(
|
|
self,
|
|
df: EagerDataFrameT,
|
|
*,
|
|
method_name: str,
|
|
returns_scalar: bool,
|
|
scalar_kwargs: ScalarKwargs,
|
|
expressifiable_args: dict[str, Any],
|
|
) -> Sequence[EagerSeriesT]:
|
|
kwargs = {
|
|
**scalar_kwargs,
|
|
**{
|
|
name: df._evaluate_expr(value) if self._is_expr(value) else value
|
|
for name, value in expressifiable_args.items()
|
|
},
|
|
}
|
|
method = methodcaller(
|
|
method_name,
|
|
**self._reuse_series_extra_kwargs(returns_scalar=returns_scalar),
|
|
**kwargs,
|
|
)
|
|
out: Sequence[EagerSeriesT] = [
|
|
series._from_scalar(method(series)) if returns_scalar else method(series)
|
|
for series in self(df)
|
|
]
|
|
aliases = self._evaluate_aliases(df)
|
|
if [s.name for s in out] != list(aliases): # pragma: no cover
|
|
msg = (
|
|
f"Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues\n"
|
|
f"Expression aliases: {aliases}\n"
|
|
f"Series names: {[s.name for s in out]}"
|
|
)
|
|
raise AssertionError(msg)
|
|
return out
|
|
|
|
def _reuse_series_namespace(
|
|
self,
|
|
series_namespace: Literal["cat", "dt", "list", "name", "str", "struct"],
|
|
method_name: str,
|
|
**expressifiable_args: Any,
|
|
) -> Self:
|
|
"""Reuse Series implementation for expression.
|
|
|
|
Just like `_reuse_series`, but for e.g. `Expr.dt.foo` instead
|
|
of `Expr.foo`.
|
|
|
|
Arguments:
|
|
series_namespace: The Series namespace.
|
|
method_name: name of method, within `series_namespace`.
|
|
expressifiable_args: keyword arguments to pass to function, which may
|
|
be expressifiable (e.g. `nw.col('a').str.replace('abc', nw.col('b')))`).
|
|
"""
|
|
|
|
def inner(df: EagerDataFrameT) -> list[EagerSeriesT]:
|
|
kwargs = {
|
|
name: df._evaluate_expr(value) if self._is_expr(value) else value
|
|
for name, value in expressifiable_args.items()
|
|
}
|
|
return [
|
|
getattr(getattr(series, series_namespace), method_name)(**kwargs)
|
|
for series in self(df)
|
|
]
|
|
|
|
return self._from_callable(
|
|
inner,
|
|
depth=self._depth + 1,
|
|
function_name=f"{self._function_name}->{series_namespace}.{method_name}",
|
|
evaluate_output_names=self._evaluate_output_names,
|
|
alias_output_names=self._alias_output_names,
|
|
scalar_kwargs=self._scalar_kwargs,
|
|
context=self,
|
|
)
|
|
|
|
def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
|
|
# Mark the resulting Series with `_broadcast = True`.
|
|
# Then, when extracting native objects, `extract_native` will
|
|
# know what to do.
|
|
def func(df: EagerDataFrameT) -> list[EagerSeriesT]:
|
|
results = []
|
|
for result in self(df):
|
|
result._broadcast = True
|
|
results.append(result)
|
|
return results
|
|
|
|
return type(self)(
|
|
func,
|
|
depth=self._depth,
|
|
function_name=self._function_name,
|
|
evaluate_output_names=self._evaluate_output_names,
|
|
alias_output_names=self._alias_output_names,
|
|
implementation=self._implementation,
|
|
version=self._version,
|
|
scalar_kwargs=self._scalar_kwargs,
|
|
)
|
|
|
|
def cast(self, dtype: IntoDType) -> Self:
|
|
return self._reuse_series("cast", dtype=dtype)
|
|
|
|
def __eq__(self, other: Self | Any) -> Self: # type: ignore[override]
|
|
return self._reuse_series("__eq__", other=other)
|
|
|
|
def __ne__(self, other: Self | Any) -> Self: # type: ignore[override]
|
|
return self._reuse_series("__ne__", other=other)
|
|
|
|
def __ge__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__ge__", other=other)
|
|
|
|
def __gt__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__gt__", other=other)
|
|
|
|
def __le__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__le__", other=other)
|
|
|
|
def __lt__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__lt__", other=other)
|
|
|
|
def __and__(self, other: Self | bool | Any) -> Self:
|
|
return self._reuse_series("__and__", other=other)
|
|
|
|
def __or__(self, other: Self | bool | Any) -> Self:
|
|
return self._reuse_series("__or__", other=other)
|
|
|
|
def __add__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__add__", other=other)
|
|
|
|
def __sub__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__sub__", other=other)
|
|
|
|
def __rsub__(self, other: Self | Any) -> Self:
|
|
return self.alias("literal")._reuse_series("__rsub__", other=other)
|
|
|
|
def __mul__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__mul__", other=other)
|
|
|
|
def __truediv__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__truediv__", other=other)
|
|
|
|
def __rtruediv__(self, other: Self | Any) -> Self:
|
|
return self.alias("literal")._reuse_series("__rtruediv__", other=other)
|
|
|
|
def __floordiv__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__floordiv__", other=other)
|
|
|
|
def __rfloordiv__(self, other: Self | Any) -> Self:
|
|
return self.alias("literal")._reuse_series("__rfloordiv__", other=other)
|
|
|
|
def __pow__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__pow__", other=other)
|
|
|
|
def __rpow__(self, other: Self | Any) -> Self:
|
|
return self.alias("literal")._reuse_series("__rpow__", other=other)
|
|
|
|
def __mod__(self, other: Self | Any) -> Self:
|
|
return self._reuse_series("__mod__", other=other)
|
|
|
|
def __rmod__(self, other: Self | Any) -> Self:
|
|
return self.alias("literal")._reuse_series("__rmod__", other=other)
|
|
|
|
# Unary
|
|
def __invert__(self) -> Self:
|
|
return self._reuse_series("__invert__")
|
|
|
|
# Reductions
|
|
def null_count(self) -> Self:
|
|
return self._reuse_series("null_count", returns_scalar=True)
|
|
|
|
def n_unique(self) -> Self:
|
|
return self._reuse_series("n_unique", returns_scalar=True)
|
|
|
|
def sum(self) -> Self:
|
|
return self._reuse_series("sum", returns_scalar=True)
|
|
|
|
def count(self) -> Self:
|
|
return self._reuse_series("count", returns_scalar=True)
|
|
|
|
def mean(self) -> Self:
|
|
return self._reuse_series("mean", returns_scalar=True)
|
|
|
|
def median(self) -> Self:
|
|
return self._reuse_series("median", returns_scalar=True)
|
|
|
|
def std(self, *, ddof: int) -> Self:
|
|
return self._reuse_series(
|
|
"std", returns_scalar=True, scalar_kwargs={"ddof": ddof}
|
|
)
|
|
|
|
def var(self, *, ddof: int) -> Self:
|
|
return self._reuse_series(
|
|
"var", returns_scalar=True, scalar_kwargs={"ddof": ddof}
|
|
)
|
|
|
|
def skew(self) -> Self:
|
|
return self._reuse_series("skew", returns_scalar=True)
|
|
|
|
def kurtosis(self) -> Self:
|
|
return self._reuse_series("kurtosis", returns_scalar=True)
|
|
|
|
def any(self) -> Self:
|
|
return self._reuse_series("any", returns_scalar=True)
|
|
|
|
def all(self) -> Self:
|
|
return self._reuse_series("all", returns_scalar=True)
|
|
|
|
def max(self) -> Self:
|
|
return self._reuse_series("max", returns_scalar=True)
|
|
|
|
def min(self) -> Self:
|
|
return self._reuse_series("min", returns_scalar=True)
|
|
|
|
def arg_min(self) -> Self:
|
|
return self._reuse_series("arg_min", returns_scalar=True)
|
|
|
|
def arg_max(self) -> Self:
|
|
return self._reuse_series("arg_max", returns_scalar=True)
|
|
|
|
# Other
|
|
|
|
def clip(
|
|
self,
|
|
lower_bound: Self | NumericLiteral | TemporalLiteral | None,
|
|
upper_bound: Self | NumericLiteral | TemporalLiteral | None,
|
|
) -> Self:
|
|
return self._reuse_series(
|
|
"clip", lower_bound=lower_bound, upper_bound=upper_bound
|
|
)
|
|
|
|
def is_null(self) -> Self:
|
|
return self._reuse_series("is_null")
|
|
|
|
def is_nan(self) -> Self:
|
|
return self._reuse_series("is_nan")
|
|
|
|
def fill_null(
|
|
self,
|
|
value: Self | NonNestedLiteral,
|
|
strategy: FillNullStrategy | None,
|
|
limit: int | None,
|
|
) -> Self:
|
|
return self._reuse_series(
|
|
"fill_null", value=value, scalar_kwargs={"strategy": strategy, "limit": limit}
|
|
)
|
|
|
|
def is_in(self, other: Any) -> Self:
|
|
return self._reuse_series("is_in", other=other)
|
|
|
|
def arg_true(self) -> Self:
|
|
return self._reuse_series("arg_true")
|
|
|
|
def filter(self, *predicates: Self) -> Self:
|
|
plx = self.__narwhals_namespace__()
|
|
predicate = plx.all_horizontal(*predicates, ignore_nulls=False)
|
|
return self._reuse_series("filter", predicate=predicate)
|
|
|
|
def drop_nulls(self) -> Self:
|
|
return self._reuse_series("drop_nulls")
|
|
|
|
def replace_strict(
|
|
self,
|
|
old: Sequence[Any] | Mapping[Any, Any],
|
|
new: Sequence[Any],
|
|
*,
|
|
return_dtype: IntoDType | None,
|
|
) -> Self:
|
|
return self._reuse_series(
|
|
"replace_strict", old=old, new=new, return_dtype=return_dtype
|
|
)
|
|
|
|
def sort(self, *, descending: bool, nulls_last: bool) -> Self:
|
|
return self._reuse_series("sort", descending=descending, nulls_last=nulls_last)
|
|
|
|
def abs(self) -> Self:
|
|
return self._reuse_series("abs")
|
|
|
|
def unique(self) -> Self:
|
|
return self._reuse_series("unique", maintain_order=False)
|
|
|
|
def diff(self) -> Self:
|
|
return self._reuse_series("diff")
|
|
|
|
def sample(
|
|
self,
|
|
n: int | None,
|
|
*,
|
|
fraction: float | None,
|
|
with_replacement: bool,
|
|
seed: int | None,
|
|
) -> Self:
|
|
return self._reuse_series(
|
|
"sample", n=n, fraction=fraction, with_replacement=with_replacement, seed=seed
|
|
)
|
|
|
|
def alias(self, name: str) -> Self:
|
|
def alias_output_names(names: Sequence[str]) -> Sequence[str]:
|
|
if len(names) != 1:
|
|
msg = f"Expected function with single output, found output names: {names}"
|
|
raise ValueError(msg)
|
|
return [name]
|
|
|
|
# Define this one manually, so that we can
|
|
# override `output_names` and not increase depth
|
|
return type(self)(
|
|
lambda df: [series.alias(name) for series in self(df)],
|
|
depth=self._depth,
|
|
function_name=self._function_name,
|
|
evaluate_output_names=self._evaluate_output_names,
|
|
alias_output_names=alias_output_names,
|
|
implementation=self._implementation,
|
|
version=self._version,
|
|
scalar_kwargs=self._scalar_kwargs,
|
|
)
|
|
|
|
def is_unique(self) -> Self:
|
|
return self._reuse_series("is_unique")
|
|
|
|
def is_first_distinct(self) -> Self:
|
|
return self._reuse_series("is_first_distinct")
|
|
|
|
def is_last_distinct(self) -> Self:
|
|
return self._reuse_series("is_last_distinct")
|
|
|
|
def quantile(
|
|
self, quantile: float, interpolation: RollingInterpolationMethod
|
|
) -> Self:
|
|
return self._reuse_series(
|
|
"quantile",
|
|
returns_scalar=True,
|
|
scalar_kwargs={"quantile": quantile, "interpolation": interpolation},
|
|
)
|
|
|
|
def head(self, n: int) -> Self:
|
|
return self._reuse_series("head", scalar_kwargs={"n": n})
|
|
|
|
def tail(self, n: int) -> Self:
|
|
return self._reuse_series("tail", scalar_kwargs={"n": n})
|
|
|
|
def round(self, decimals: int) -> Self:
|
|
return self._reuse_series("round", decimals=decimals)
|
|
|
|
def len(self) -> Self:
|
|
return self._reuse_series("len", returns_scalar=True)
|
|
|
|
def gather_every(self, n: int, offset: int) -> Self:
|
|
return self._reuse_series("gather_every", n=n, offset=offset)
|
|
|
|
def mode(self) -> Self:
|
|
return self._reuse_series("mode")
|
|
|
|
def is_finite(self) -> Self:
|
|
return self._reuse_series("is_finite")
|
|
|
|
def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
|
|
return self._reuse_series(
|
|
"rolling_mean",
|
|
scalar_kwargs={
|
|
"window_size": window_size,
|
|
"min_samples": min_samples,
|
|
"center": center,
|
|
},
|
|
)
|
|
|
|
def rolling_std(
|
|
self, window_size: int, *, min_samples: int, center: bool, ddof: int
|
|
) -> Self:
|
|
return self._reuse_series(
|
|
"rolling_std",
|
|
scalar_kwargs={
|
|
"window_size": window_size,
|
|
"min_samples": min_samples,
|
|
"center": center,
|
|
"ddof": ddof,
|
|
},
|
|
)
|
|
|
|
def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
|
|
return self._reuse_series(
|
|
"rolling_sum",
|
|
scalar_kwargs={
|
|
"window_size": window_size,
|
|
"min_samples": min_samples,
|
|
"center": center,
|
|
},
|
|
)
|
|
|
|
def rolling_var(
|
|
self, window_size: int, *, min_samples: int, center: bool, ddof: int
|
|
) -> Self:
|
|
return self._reuse_series(
|
|
"rolling_var",
|
|
scalar_kwargs={
|
|
"window_size": window_size,
|
|
"min_samples": min_samples,
|
|
"center": center,
|
|
"ddof": ddof,
|
|
},
|
|
)
|
|
|
|
def map_batches(
|
|
self, function: Callable[[Any], Any], return_dtype: IntoDType | None
|
|
) -> Self:
|
|
def func(df: EagerDataFrameT) -> Sequence[EagerSeriesT]:
|
|
input_series_list = self(df)
|
|
output_names = [input_series.name for input_series in input_series_list]
|
|
result = [function(series) for series in input_series_list]
|
|
if is_numpy_array(result[0]) or (
|
|
(np := get_numpy()) is not None and np.isscalar(result[0])
|
|
):
|
|
from_numpy = partial(
|
|
self.__narwhals_namespace__()._series.from_numpy, context=self
|
|
)
|
|
result = [
|
|
from_numpy(array).alias(output_name)
|
|
for array, output_name in zip(result, output_names)
|
|
]
|
|
if return_dtype is not None:
|
|
result = [series.cast(return_dtype) for series in result]
|
|
return result
|
|
|
|
return self._from_callable(
|
|
func,
|
|
depth=self._depth + 1,
|
|
function_name=self._function_name + "->map_batches",
|
|
evaluate_output_names=self._evaluate_output_names,
|
|
alias_output_names=self._alias_output_names,
|
|
context=self,
|
|
)
|
|
|
|
def shift(self, n: int) -> Self:
|
|
return self._reuse_series("shift", scalar_kwargs={"n": n})
|
|
|
|
def cum_sum(self, *, reverse: bool) -> Self:
|
|
return self._reuse_series("cum_sum", scalar_kwargs={"reverse": reverse})
|
|
|
|
def cum_count(self, *, reverse: bool) -> Self:
|
|
return self._reuse_series("cum_count", scalar_kwargs={"reverse": reverse})
|
|
|
|
def cum_min(self, *, reverse: bool) -> Self:
|
|
return self._reuse_series("cum_min", scalar_kwargs={"reverse": reverse})
|
|
|
|
def cum_max(self, *, reverse: bool) -> Self:
|
|
return self._reuse_series("cum_max", scalar_kwargs={"reverse": reverse})
|
|
|
|
def cum_prod(self, *, reverse: bool) -> Self:
|
|
return self._reuse_series("cum_prod", scalar_kwargs={"reverse": reverse})
|
|
|
|
def rank(self, method: RankMethod, *, descending: bool) -> Self:
|
|
return self._reuse_series(
|
|
"rank", scalar_kwargs={"method": method, "descending": descending}
|
|
)
|
|
|
|
def log(self, base: float) -> Self:
|
|
return self._reuse_series("log", base=base)
|
|
|
|
def exp(self) -> Self:
|
|
return self._reuse_series("exp")
|
|
|
|
def sqrt(self) -> Self:
|
|
return self._reuse_series("sqrt")
|
|
|
|
@property
|
|
def cat(self) -> EagerExprCatNamespace[Self]:
|
|
return EagerExprCatNamespace(self)
|
|
|
|
@property
|
|
def dt(self) -> EagerExprDateTimeNamespace[Self]:
|
|
return EagerExprDateTimeNamespace(self)
|
|
|
|
@property
|
|
def list(self) -> EagerExprListNamespace[Self]:
|
|
return EagerExprListNamespace(self)
|
|
|
|
@property
|
|
def name(self) -> EagerExprNameNamespace[Self]:
|
|
return EagerExprNameNamespace(self)
|
|
|
|
@property
|
|
def str(self) -> EagerExprStringNamespace[Self]:
|
|
return EagerExprStringNamespace(self)
|
|
|
|
@property
|
|
def struct(self) -> EagerExprStructNamespace[Self]:
|
|
return EagerExprStructNamespace(self)
|
|
|
|
|
|
# mypy thinks `NativeExprT` should be covariant, pyright thinks it should be invariant
|
|
class LazyExpr( # type: ignore[misc]
|
|
CompliantExpr[CompliantLazyFrameT, NativeExprT],
|
|
Protocol[CompliantLazyFrameT, NativeExprT],
|
|
):
|
|
def _with_alias_output_names(self, func: AliasNames | None, /) -> Self: ...
|
|
def alias(self, name: str) -> Self:
|
|
def fn(names: Sequence[str]) -> Sequence[str]:
|
|
if len(names) != 1:
|
|
msg = f"Expected function with single output, found output names: {names}"
|
|
raise ValueError(msg)
|
|
return [name]
|
|
|
|
return self._with_alias_output_names(fn)
|
|
|
|
@property
|
|
def name(self) -> LazyExprNameNamespace[Self]:
|
|
return LazyExprNameNamespace(self)
|
|
|
|
|
|
class _ExprNamespace( # type: ignore[misc]
|
|
_StoresCompliant[CompliantExprT_co], Protocol[CompliantExprT_co]
|
|
):
|
|
_compliant_expr: CompliantExprT_co
|
|
|
|
@property
|
|
def compliant(self) -> CompliantExprT_co:
|
|
return self._compliant_expr
|
|
|
|
|
|
class EagerExprNamespace(_ExprNamespace[EagerExprT], Generic[EagerExprT]):
|
|
def __init__(self, expr: EagerExprT, /) -> None:
|
|
self._compliant_expr = expr
|
|
|
|
|
|
class LazyExprNamespace(_ExprNamespace[LazyExprT], Generic[LazyExprT]):
|
|
def __init__(self, expr: LazyExprT, /) -> None:
|
|
self._compliant_expr = expr
|
|
|
|
|
|
class EagerExprCatNamespace(
|
|
EagerExprNamespace[EagerExprT], CatNamespace[EagerExprT], Generic[EagerExprT]
|
|
):
|
|
def get_categories(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("cat", "get_categories")
|
|
|
|
|
|
class EagerExprDateTimeNamespace(
|
|
EagerExprNamespace[EagerExprT], DateTimeNamespace[EagerExprT], Generic[EagerExprT]
|
|
):
|
|
def to_string(self, format: str) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "to_string", format=format)
|
|
|
|
def replace_time_zone(self, time_zone: str | None) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace(
|
|
"dt", "replace_time_zone", time_zone=time_zone
|
|
)
|
|
|
|
def convert_time_zone(self, time_zone: str) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace(
|
|
"dt", "convert_time_zone", time_zone=time_zone
|
|
)
|
|
|
|
def timestamp(self, time_unit: TimeUnit) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace(
|
|
"dt", "timestamp", time_unit=time_unit
|
|
)
|
|
|
|
def date(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "date")
|
|
|
|
def year(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "year")
|
|
|
|
def month(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "month")
|
|
|
|
def day(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "day")
|
|
|
|
def hour(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "hour")
|
|
|
|
def minute(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "minute")
|
|
|
|
def second(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "second")
|
|
|
|
def millisecond(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "millisecond")
|
|
|
|
def microsecond(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "microsecond")
|
|
|
|
def nanosecond(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "nanosecond")
|
|
|
|
def ordinal_day(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "ordinal_day")
|
|
|
|
def weekday(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "weekday")
|
|
|
|
def total_minutes(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "total_minutes")
|
|
|
|
def total_seconds(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "total_seconds")
|
|
|
|
def total_milliseconds(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "total_milliseconds")
|
|
|
|
def total_microseconds(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "total_microseconds")
|
|
|
|
def total_nanoseconds(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "total_nanoseconds")
|
|
|
|
def truncate(self, every: str) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "truncate", every=every)
|
|
|
|
def offset_by(self, by: str) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("dt", "offset_by", by=by)
|
|
|
|
|
|
class EagerExprListNamespace(
|
|
EagerExprNamespace[EagerExprT], ListNamespace[EagerExprT], Generic[EagerExprT]
|
|
):
|
|
def len(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("list", "len")
|
|
|
|
|
|
class CompliantExprNameNamespace( # type: ignore[misc]
|
|
_ExprNamespace[CompliantExprT_co],
|
|
NameNamespace[CompliantExprT_co],
|
|
Protocol[CompliantExprT_co],
|
|
):
|
|
def keep(self) -> CompliantExprT_co:
|
|
return self._from_callable(None)
|
|
|
|
def map(self, function: AliasName) -> CompliantExprT_co:
|
|
return self._from_callable(function)
|
|
|
|
def prefix(self, prefix: str) -> CompliantExprT_co:
|
|
return self._from_callable(lambda name: f"{prefix}{name}")
|
|
|
|
def suffix(self, suffix: str) -> CompliantExprT_co:
|
|
return self._from_callable(lambda name: f"{name}{suffix}")
|
|
|
|
def to_lowercase(self) -> CompliantExprT_co:
|
|
return self._from_callable(str.lower)
|
|
|
|
def to_uppercase(self) -> CompliantExprT_co:
|
|
return self._from_callable(str.upper)
|
|
|
|
@staticmethod
|
|
def _alias_output_names(func: AliasName, /) -> AliasNames:
|
|
def fn(output_names: Sequence[str], /) -> Sequence[str]:
|
|
return [func(name) for name in output_names]
|
|
|
|
return fn
|
|
|
|
def _from_callable(self, func: AliasName | None, /) -> CompliantExprT_co: ...
|
|
|
|
|
|
class EagerExprNameNamespace(
|
|
EagerExprNamespace[EagerExprT],
|
|
CompliantExprNameNamespace[EagerExprT],
|
|
Generic[EagerExprT],
|
|
):
|
|
def _from_callable(self, func: AliasName | None) -> EagerExprT:
|
|
expr = self.compliant
|
|
return expr._with_alias_output_names(func)
|
|
|
|
|
|
class LazyExprNameNamespace(
|
|
LazyExprNamespace[LazyExprT],
|
|
CompliantExprNameNamespace[LazyExprT],
|
|
Generic[LazyExprT],
|
|
):
|
|
def _from_callable(self, func: AliasName | None) -> LazyExprT:
|
|
expr = self.compliant
|
|
output_names = self._alias_output_names(func) if func else None
|
|
return expr._with_alias_output_names(output_names)
|
|
|
|
|
|
class EagerExprStringNamespace(
|
|
EagerExprNamespace[EagerExprT], StringNamespace[EagerExprT], Generic[EagerExprT]
|
|
):
|
|
def len_chars(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "len_chars")
|
|
|
|
def replace(self, pattern: str, value: str, *, literal: bool, n: int) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace(
|
|
"str", "replace", pattern=pattern, value=value, literal=literal, n=n
|
|
)
|
|
|
|
def replace_all(self, pattern: str, value: str, *, literal: bool) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace(
|
|
"str", "replace_all", pattern=pattern, value=value, literal=literal
|
|
)
|
|
|
|
def strip_chars(self, characters: str | None) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace(
|
|
"str", "strip_chars", characters=characters
|
|
)
|
|
|
|
def starts_with(self, prefix: str) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "starts_with", prefix=prefix)
|
|
|
|
def ends_with(self, suffix: str) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "ends_with", suffix=suffix)
|
|
|
|
def contains(self, pattern: str, *, literal: bool) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace(
|
|
"str", "contains", pattern=pattern, literal=literal
|
|
)
|
|
|
|
def slice(self, offset: int, length: int | None) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace(
|
|
"str", "slice", offset=offset, length=length
|
|
)
|
|
|
|
def split(self, by: str) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "split", by=by)
|
|
|
|
def to_datetime(self, format: str | None) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "to_datetime", format=format)
|
|
|
|
def to_date(self, format: str | None) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "to_date", format=format)
|
|
|
|
def to_lowercase(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "to_lowercase")
|
|
|
|
def to_uppercase(self) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "to_uppercase")
|
|
|
|
def zfill(self, width: int) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("str", "zfill", width=width)
|
|
|
|
|
|
class EagerExprStructNamespace(
|
|
EagerExprNamespace[EagerExprT], StructNamespace[EagerExprT], Generic[EagerExprT]
|
|
):
|
|
def field(self, name: str) -> EagerExprT:
|
|
return self.compliant._reuse_series_namespace("struct", "field", name=name).alias(
|
|
name
|
|
)
|