516 lines
17 KiB
Python
516 lines
17 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING, Any, Generic, Literal, Protocol
|
|
|
|
from narwhals._compliant.any_namespace import (
|
|
CatNamespace,
|
|
DateTimeNamespace,
|
|
ListNamespace,
|
|
StringNamespace,
|
|
StructNamespace,
|
|
)
|
|
from narwhals._compliant.typing import (
|
|
CompliantSeriesT_co,
|
|
EagerDataFrameAny,
|
|
EagerSeriesT_co,
|
|
NativeSeriesT,
|
|
NativeSeriesT_co,
|
|
)
|
|
from narwhals._translate import FromIterable, FromNative, NumpyConvertible, ToNarwhals
|
|
from narwhals._typing_compat import TypeVar, assert_never
|
|
from narwhals._utils import (
|
|
_StoresCompliant,
|
|
_StoresNative,
|
|
is_compliant_series,
|
|
is_sized_multi_index_selector,
|
|
unstable,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
from types import ModuleType
|
|
|
|
import pandas as pd
|
|
import polars as pl
|
|
import pyarrow as pa
|
|
from typing_extensions import NotRequired, Self, TypedDict
|
|
|
|
from narwhals._compliant.dataframe import CompliantDataFrame
|
|
from narwhals._compliant.expr import CompliantExpr, EagerExpr
|
|
from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
|
|
from narwhals._utils import Implementation, Version, _LimitedContext
|
|
from narwhals.dtypes import DType
|
|
from narwhals.series import Series
|
|
from narwhals.typing import (
|
|
ClosedInterval,
|
|
FillNullStrategy,
|
|
Into1DArray,
|
|
IntoDType,
|
|
MultiIndexSelector,
|
|
NonNestedLiteral,
|
|
NumericLiteral,
|
|
RankMethod,
|
|
RollingInterpolationMethod,
|
|
SizedMultiIndexSelector,
|
|
TemporalLiteral,
|
|
_1DArray,
|
|
_SliceIndex,
|
|
)
|
|
|
|
class HistData(TypedDict, Generic[NativeSeriesT, "_CountsT_co"]):
|
|
breakpoint: NotRequired[list[float] | _1DArray | list[Any]]
|
|
count: NativeSeriesT | _1DArray | _CountsT_co | list[Any]
|
|
|
|
|
|
_CountsT_co = TypeVar("_CountsT_co", bound="Iterable[Any]", covariant=True)
|
|
|
|
__all__ = [
|
|
"CompliantSeries",
|
|
"EagerSeries",
|
|
"EagerSeriesCatNamespace",
|
|
"EagerSeriesDateTimeNamespace",
|
|
"EagerSeriesHist",
|
|
"EagerSeriesListNamespace",
|
|
"EagerSeriesNamespace",
|
|
"EagerSeriesStringNamespace",
|
|
"EagerSeriesStructNamespace",
|
|
]
|
|
|
|
|
|
class CompliantSeries(
|
|
NumpyConvertible["_1DArray", "Into1DArray"],
|
|
FromIterable,
|
|
FromNative[NativeSeriesT],
|
|
ToNarwhals["Series[NativeSeriesT]"],
|
|
Protocol[NativeSeriesT],
|
|
):
|
|
_implementation: Implementation
|
|
_version: Version
|
|
|
|
@property
|
|
def dtype(self) -> DType: ...
|
|
@property
|
|
def name(self) -> str: ...
|
|
@property
|
|
def native(self) -> NativeSeriesT: ...
|
|
def __narwhals_series__(self) -> Self:
|
|
return self
|
|
|
|
def __narwhals_namespace__(self) -> CompliantNamespace[Any, Any]: ...
|
|
def __native_namespace__(self) -> ModuleType: ...
|
|
def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray: ...
|
|
def __contains__(self, other: Any) -> bool: ...
|
|
def __getitem__(self, item: MultiIndexSelector[Self]) -> Any: ...
|
|
def __iter__(self) -> Iterator[Any]: ...
|
|
def __len__(self) -> int:
|
|
return len(self.native)
|
|
|
|
def _with_native(self, series: Any) -> Self: ...
|
|
def _with_version(self, version: Version) -> Self: ...
|
|
def _to_expr(self) -> CompliantExpr[Any, Self]: ...
|
|
@classmethod
|
|
def from_native(cls, data: NativeSeriesT, /, *, context: _LimitedContext) -> Self: ...
|
|
@classmethod
|
|
def from_numpy(cls, data: Into1DArray, /, *, context: _LimitedContext) -> Self: ...
|
|
@classmethod
|
|
def from_iterable(
|
|
cls,
|
|
data: Iterable[Any],
|
|
/,
|
|
*,
|
|
context: _LimitedContext,
|
|
name: str = "",
|
|
dtype: IntoDType | None = None,
|
|
) -> Self: ...
|
|
def to_narwhals(self) -> Series[NativeSeriesT]:
|
|
return self._version.series(self, level="full")
|
|
|
|
# Operators
|
|
def __add__(self, other: Any) -> Self: ...
|
|
def __and__(self, other: Any) -> Self: ...
|
|
def __eq__(self, other: object) -> Self: ... # type: ignore[override]
|
|
def __floordiv__(self, other: Any) -> Self: ...
|
|
def __ge__(self, other: Any) -> Self: ...
|
|
def __gt__(self, other: Any) -> Self: ...
|
|
def __invert__(self) -> Self: ...
|
|
def __le__(self, other: Any) -> Self: ...
|
|
def __lt__(self, other: Any) -> Self: ...
|
|
def __mod__(self, other: Any) -> Self: ...
|
|
def __mul__(self, other: Any) -> Self: ...
|
|
def __ne__(self, other: object) -> Self: ... # type: ignore[override]
|
|
def __or__(self, other: Any) -> Self: ...
|
|
def __pow__(self, other: Any) -> Self: ...
|
|
def __radd__(self, other: Any) -> Self: ...
|
|
def __rand__(self, other: Any) -> Self: ...
|
|
def __rfloordiv__(self, other: Any) -> Self: ...
|
|
def __rmod__(self, other: Any) -> Self: ...
|
|
def __rmul__(self, other: Any) -> Self: ...
|
|
def __ror__(self, other: Any) -> Self: ...
|
|
def __rpow__(self, other: Any) -> Self: ...
|
|
def __rsub__(self, other: Any) -> Self: ...
|
|
def __rtruediv__(self, other: Any) -> Self: ...
|
|
def __sub__(self, other: Any) -> Self: ...
|
|
def __truediv__(self, other: Any) -> Self: ...
|
|
|
|
def abs(self) -> Self: ...
|
|
def alias(self, name: str) -> Self: ...
|
|
def all(self) -> bool: ...
|
|
def any(self) -> bool: ...
|
|
def arg_max(self) -> int: ...
|
|
def arg_min(self) -> int: ...
|
|
def arg_true(self) -> Self: ...
|
|
def cast(self, dtype: IntoDType) -> Self: ...
|
|
def clip(
|
|
self,
|
|
lower_bound: Self | NumericLiteral | TemporalLiteral | None,
|
|
upper_bound: Self | NumericLiteral | TemporalLiteral | None,
|
|
) -> Self: ...
|
|
def count(self) -> int: ...
|
|
def cum_count(self, *, reverse: bool) -> Self: ...
|
|
def cum_max(self, *, reverse: bool) -> Self: ...
|
|
def cum_min(self, *, reverse: bool) -> Self: ...
|
|
def cum_prod(self, *, reverse: bool) -> Self: ...
|
|
def cum_sum(self, *, reverse: bool) -> Self: ...
|
|
def diff(self) -> Self: ...
|
|
def drop_nulls(self) -> Self: ...
|
|
def ewm_mean(
|
|
self,
|
|
*,
|
|
com: float | None,
|
|
span: float | None,
|
|
half_life: float | None,
|
|
alpha: float | None,
|
|
adjust: bool,
|
|
min_samples: int,
|
|
ignore_nulls: bool,
|
|
) -> Self: ...
|
|
def exp(self) -> Self: ...
|
|
def sqrt(self) -> Self: ...
|
|
def fill_null(
|
|
self,
|
|
value: Self | NonNestedLiteral,
|
|
strategy: FillNullStrategy | None,
|
|
limit: int | None,
|
|
) -> Self: ...
|
|
def filter(self, predicate: Any) -> Self: ...
|
|
def gather_every(self, n: int, offset: int) -> Self: ...
|
|
def head(self, n: int) -> Self: ...
|
|
def is_between(
|
|
self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval
|
|
) -> Self: ...
|
|
def is_finite(self) -> Self: ...
|
|
def is_first_distinct(self) -> Self: ...
|
|
def is_in(self, other: Any) -> Self: ...
|
|
def is_last_distinct(self) -> Self: ...
|
|
def is_nan(self) -> Self: ...
|
|
def is_null(self) -> Self: ...
|
|
def is_sorted(self, *, descending: bool) -> bool: ...
|
|
def is_unique(self) -> Self: ...
|
|
def item(self, index: int | None) -> Any: ...
|
|
def kurtosis(self) -> float | None: ...
|
|
def len(self) -> int: ...
|
|
def log(self, base: float) -> Self: ...
|
|
def max(self) -> Any: ...
|
|
def mean(self) -> float: ...
|
|
def median(self) -> float: ...
|
|
def min(self) -> Any: ...
|
|
def mode(self) -> Self: ...
|
|
def n_unique(self) -> int: ...
|
|
def null_count(self) -> int: ...
|
|
def quantile(
|
|
self, quantile: float, interpolation: RollingInterpolationMethod
|
|
) -> float: ...
|
|
def rank(self, method: RankMethod, *, descending: bool) -> Self: ...
|
|
def replace_strict(
|
|
self,
|
|
old: Sequence[Any] | Mapping[Any, Any],
|
|
new: Sequence[Any],
|
|
*,
|
|
return_dtype: IntoDType | None,
|
|
) -> Self: ...
|
|
def rolling_mean(
|
|
self, window_size: int, *, min_samples: int, center: bool
|
|
) -> Self: ...
|
|
def rolling_std(
|
|
self, window_size: int, *, min_samples: int, center: bool, ddof: int
|
|
) -> Self: ...
|
|
def rolling_sum(
|
|
self, window_size: int, *, min_samples: int, center: bool
|
|
) -> Self: ...
|
|
def rolling_var(
|
|
self, window_size: int, *, min_samples: int, center: bool, ddof: int
|
|
) -> Self: ...
|
|
def round(self, decimals: int) -> Self: ...
|
|
def sample(
|
|
self,
|
|
n: int | None,
|
|
*,
|
|
fraction: float | None,
|
|
with_replacement: bool,
|
|
seed: int | None,
|
|
) -> Self: ...
|
|
def scatter(self, indices: int | Sequence[int], values: Any) -> Self: ...
|
|
def shift(self, n: int) -> Self: ...
|
|
def skew(self) -> float | None: ...
|
|
def sort(self, *, descending: bool, nulls_last: bool) -> Self: ...
|
|
def std(self, *, ddof: int) -> float: ...
|
|
def sum(self) -> float: ...
|
|
def tail(self, n: int) -> Self: ...
|
|
def to_arrow(self) -> pa.Array[Any]: ...
|
|
def to_dummies(
|
|
self, *, separator: str, drop_first: bool
|
|
) -> CompliantDataFrame[Self, Any, Any, Any]: ...
|
|
def to_frame(self) -> CompliantDataFrame[Self, Any, Any, Any]: ...
|
|
def to_list(self) -> list[Any]: ...
|
|
def to_pandas(self) -> pd.Series[Any]: ...
|
|
def to_polars(self) -> pl.Series: ...
|
|
def unique(self, *, maintain_order: bool) -> Self: ...
|
|
def value_counts(
|
|
self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
|
|
) -> CompliantDataFrame[Self, Any, Any, Any]: ...
|
|
def var(self, *, ddof: int) -> float: ...
|
|
def zip_with(self, mask: Any, other: Any) -> Self: ...
|
|
@unstable
|
|
def hist_from_bins(
|
|
self, bins: list[float], *, include_breakpoint: bool
|
|
) -> CompliantDataFrame[Self, Any, Any, Any]:
|
|
"""`Series.hist(bins=..., bin_count=None)`."""
|
|
...
|
|
|
|
@unstable
|
|
def hist_from_bin_count(
|
|
self, bin_count: int, *, include_breakpoint: bool
|
|
) -> CompliantDataFrame[Self, Any, Any, Any]:
|
|
"""`Series.hist(bins=None, bin_count=...)`."""
|
|
...
|
|
|
|
@property
|
|
def str(self) -> Any: ...
|
|
@property
|
|
def dt(self) -> Any: ...
|
|
@property
|
|
def cat(self) -> Any: ...
|
|
@property
|
|
def list(self) -> Any: ...
|
|
@property
|
|
def struct(self) -> Any: ...
|
|
|
|
|
|
class EagerSeries(CompliantSeries[NativeSeriesT], Protocol[NativeSeriesT]):
|
|
_native_series: Any
|
|
_implementation: Implementation
|
|
_version: Version
|
|
_broadcast: bool
|
|
|
|
@property
|
|
def _backend_version(self) -> tuple[int, ...]:
|
|
return self._implementation._backend_version()
|
|
|
|
@classmethod
|
|
def _align_full_broadcast(cls, *series: Self) -> Sequence[Self]:
|
|
"""Ensure all of `series` have the same length (and index if `pandas`).
|
|
|
|
Scalars get broadcasted to the full length of the longest Series.
|
|
|
|
This is useful when you need to construct a full Series anyway, such as:
|
|
|
|
DataFrame.select(...)
|
|
|
|
It should not be used in binary operations, such as:
|
|
|
|
nw.col("a") - nw.col("a").mean()
|
|
|
|
because then it's more efficient to extract the right-hand-side's single element as a scalar.
|
|
"""
|
|
...
|
|
|
|
def _from_scalar(self, value: Any) -> Self:
|
|
return self.from_iterable([value], name=self.name, context=self)
|
|
|
|
def _with_native(
|
|
self, series: NativeSeriesT, *, preserve_broadcast: bool = False
|
|
) -> Self:
|
|
"""Return a new `CompliantSeries`, wrapping the native `series`.
|
|
|
|
In cases when operations are known to not affect whether a result should
|
|
be broadcast, we can pass `preserve_broadcast=True`.
|
|
Set this with care - it should only be set for unary expressions which don't
|
|
change length or order, such as `.alias` or `.fill_null`. If in doubt, don't
|
|
set it, you probably don't need it.
|
|
"""
|
|
...
|
|
|
|
def __narwhals_namespace__(
|
|
self,
|
|
) -> EagerNamespace[Any, Self, Any, Any, NativeSeriesT]: ...
|
|
|
|
def _to_expr(self) -> EagerExpr[Any, Any]:
|
|
return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return]
|
|
|
|
def _gather(self, rows: SizedMultiIndexSelector[NativeSeriesT]) -> Self: ...
|
|
def _gather_slice(self, rows: _SliceIndex | range) -> Self: ...
|
|
def __getitem__(self, item: MultiIndexSelector[Self]) -> Self:
|
|
if isinstance(item, (slice, range)):
|
|
return self._gather_slice(item)
|
|
elif is_compliant_series(item):
|
|
return self._gather(item.native)
|
|
elif is_sized_multi_index_selector(item):
|
|
return self._gather(item)
|
|
else:
|
|
assert_never(item)
|
|
|
|
@property
|
|
def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT]: ...
|
|
@property
|
|
def dt(self) -> EagerSeriesDateTimeNamespace[Self, NativeSeriesT]: ...
|
|
@property
|
|
def cat(self) -> EagerSeriesCatNamespace[Self, NativeSeriesT]: ...
|
|
@property
|
|
def list(self) -> EagerSeriesListNamespace[Self, NativeSeriesT]: ...
|
|
@property
|
|
def struct(self) -> EagerSeriesStructNamespace[Self, NativeSeriesT]: ...
|
|
|
|
|
|
class _SeriesNamespace( # type: ignore[misc]
|
|
_StoresCompliant[CompliantSeriesT_co],
|
|
_StoresNative[NativeSeriesT_co],
|
|
Protocol[CompliantSeriesT_co, NativeSeriesT_co],
|
|
):
|
|
_compliant_series: CompliantSeriesT_co
|
|
|
|
@property
|
|
def compliant(self) -> CompliantSeriesT_co:
|
|
return self._compliant_series
|
|
|
|
@property
|
|
def implementation(self) -> Implementation:
|
|
return self.compliant._implementation
|
|
|
|
@property
|
|
def backend_version(self) -> tuple[int, ...]:
|
|
return self.implementation._backend_version()
|
|
|
|
@property
|
|
def version(self) -> Version:
|
|
return self.compliant._version
|
|
|
|
@property
|
|
def native(self) -> NativeSeriesT_co:
|
|
return self._compliant_series.native # type: ignore[no-any-return]
|
|
|
|
def with_native(self, series: Any, /) -> CompliantSeriesT_co:
|
|
return self.compliant._with_native(series)
|
|
|
|
|
|
class EagerSeriesNamespace(
|
|
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
|
Generic[EagerSeriesT_co, NativeSeriesT_co],
|
|
):
|
|
_compliant_series: EagerSeriesT_co
|
|
|
|
def __init__(self, series: EagerSeriesT_co, /) -> None:
|
|
self._compliant_series = series
|
|
|
|
|
|
class EagerSeriesCatNamespace( # type: ignore[misc]
|
|
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
|
CatNamespace[EagerSeriesT_co],
|
|
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
|
): ...
|
|
|
|
|
|
class EagerSeriesDateTimeNamespace( # type: ignore[misc]
|
|
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
|
DateTimeNamespace[EagerSeriesT_co],
|
|
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
|
): ...
|
|
|
|
|
|
class EagerSeriesListNamespace( # type: ignore[misc]
|
|
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
|
ListNamespace[EagerSeriesT_co],
|
|
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
|
): ...
|
|
|
|
|
|
class EagerSeriesStringNamespace( # type: ignore[misc]
|
|
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
|
StringNamespace[EagerSeriesT_co],
|
|
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
|
): ...
|
|
|
|
|
|
class EagerSeriesStructNamespace( # type: ignore[misc]
|
|
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
|
StructNamespace[EagerSeriesT_co],
|
|
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
|
): ...
|
|
|
|
|
|
class EagerSeriesHist(Protocol[NativeSeriesT, _CountsT_co]):
|
|
_series: EagerSeries[NativeSeriesT]
|
|
_breakpoint: bool
|
|
_data: HistData[NativeSeriesT, _CountsT_co]
|
|
|
|
@property
|
|
def native(self) -> NativeSeriesT:
|
|
return self._series.native
|
|
|
|
@classmethod
|
|
def from_series(
|
|
cls, series: EagerSeries[NativeSeriesT], *, include_breakpoint: bool
|
|
) -> Self:
|
|
obj = cls.__new__(cls)
|
|
obj._series = series
|
|
obj._breakpoint = include_breakpoint
|
|
return obj
|
|
|
|
def to_frame(self) -> EagerDataFrameAny: ...
|
|
def _linear_space( # NOTE: Roughly `pl.linear_space`
|
|
self,
|
|
start: float,
|
|
end: float,
|
|
num_samples: int,
|
|
*,
|
|
closed: Literal["both", "none"] = "both",
|
|
) -> _1DArray: ...
|
|
|
|
# NOTE: *Could* be handled at narwhals-level
|
|
def is_empty_series(self) -> bool: ...
|
|
|
|
# NOTE: **Should** be handled at narwhals-level
|
|
def data_empty(self) -> HistData[NativeSeriesT, _CountsT_co]:
|
|
return {"breakpoint": [], "count": []} if self._breakpoint else {"count": []}
|
|
|
|
# NOTE: *Could* be handled at narwhals-level, **iff** we add `nw.repeat`, `nw.linear_space`
|
|
# See https://github.com/narwhals-dev/narwhals/pull/2839#discussion_r2215630696
|
|
def series_empty(
|
|
self, arg: int | list[float], /
|
|
) -> HistData[NativeSeriesT, _CountsT_co]: ...
|
|
|
|
def with_bins(self, bins: list[float], /) -> Self:
|
|
if len(bins) <= 1:
|
|
self._data = self.data_empty()
|
|
elif self.is_empty_series():
|
|
self._data = self.series_empty(bins)
|
|
else:
|
|
self._data = self._calculate_hist(bins)
|
|
return self
|
|
|
|
def with_bin_count(self, bin_count: int, /) -> Self:
|
|
if bin_count == 0:
|
|
self._data = self.data_empty()
|
|
elif self.is_empty_series():
|
|
self._data = self.series_empty(bin_count)
|
|
else:
|
|
self._data = self._calculate_hist(self._calculate_bins(bin_count))
|
|
return self
|
|
|
|
def _calculate_breakpoint(self, arg: int | list[float], /) -> list[float] | _1DArray:
|
|
bins = self._linear_space(0, 1, arg + 1) if isinstance(arg, int) else arg
|
|
return bins[1:]
|
|
|
|
def _calculate_bins(self, bin_count: int) -> _1DArray: ...
|
|
def _calculate_hist(
|
|
self, bins: list[float] | _1DArray
|
|
) -> HistData[NativeSeriesT, _CountsT_co]: ...
|