team-10/venv/Lib/site-packages/narwhals/_compliant/series.py

517 lines
17 KiB
Python
Raw Normal View History

2025-08-02 02:00:33 +02:00
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Generic, Literal, Protocol
from narwhals._compliant.any_namespace import (
CatNamespace,
DateTimeNamespace,
ListNamespace,
StringNamespace,
StructNamespace,
)
from narwhals._compliant.typing import (
CompliantSeriesT_co,
EagerDataFrameAny,
EagerSeriesT_co,
NativeSeriesT,
NativeSeriesT_co,
)
from narwhals._translate import FromIterable, FromNative, NumpyConvertible, ToNarwhals
from narwhals._typing_compat import TypeVar, assert_never
from narwhals._utils import (
_StoresCompliant,
_StoresNative,
is_compliant_series,
is_sized_multi_index_selector,
unstable,
)
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping, Sequence
from types import ModuleType
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import NotRequired, Self, TypedDict
from narwhals._compliant.dataframe import CompliantDataFrame
from narwhals._compliant.expr import CompliantExpr, EagerExpr
from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace
from narwhals._utils import Implementation, Version, _LimitedContext
from narwhals.dtypes import DType
from narwhals.series import Series
from narwhals.typing import (
ClosedInterval,
FillNullStrategy,
Into1DArray,
IntoDType,
MultiIndexSelector,
NonNestedLiteral,
NumericLiteral,
RankMethod,
RollingInterpolationMethod,
SizedMultiIndexSelector,
TemporalLiteral,
_1DArray,
_SliceIndex,
)
class HistData(TypedDict, Generic[NativeSeriesT, "_CountsT_co"]):
breakpoint: NotRequired[list[float] | _1DArray | list[Any]]
count: NativeSeriesT | _1DArray | _CountsT_co | list[Any]
_CountsT_co = TypeVar("_CountsT_co", bound="Iterable[Any]", covariant=True)
__all__ = [
"CompliantSeries",
"EagerSeries",
"EagerSeriesCatNamespace",
"EagerSeriesDateTimeNamespace",
"EagerSeriesHist",
"EagerSeriesListNamespace",
"EagerSeriesNamespace",
"EagerSeriesStringNamespace",
"EagerSeriesStructNamespace",
]
class CompliantSeries(
NumpyConvertible["_1DArray", "Into1DArray"],
FromIterable,
FromNative[NativeSeriesT],
ToNarwhals["Series[NativeSeriesT]"],
Protocol[NativeSeriesT],
):
_implementation: Implementation
_version: Version
@property
def dtype(self) -> DType: ...
@property
def name(self) -> str: ...
@property
def native(self) -> NativeSeriesT: ...
def __narwhals_series__(self) -> Self:
return self
def __narwhals_namespace__(self) -> CompliantNamespace[Any, Any]: ...
def __native_namespace__(self) -> ModuleType: ...
def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray: ...
def __contains__(self, other: Any) -> bool: ...
def __getitem__(self, item: MultiIndexSelector[Self]) -> Any: ...
def __iter__(self) -> Iterator[Any]: ...
def __len__(self) -> int:
return len(self.native)
def _with_native(self, series: Any) -> Self: ...
def _with_version(self, version: Version) -> Self: ...
def _to_expr(self) -> CompliantExpr[Any, Self]: ...
@classmethod
def from_native(cls, data: NativeSeriesT, /, *, context: _LimitedContext) -> Self: ...
@classmethod
def from_numpy(cls, data: Into1DArray, /, *, context: _LimitedContext) -> Self: ...
@classmethod
def from_iterable(
cls,
data: Iterable[Any],
/,
*,
context: _LimitedContext,
name: str = "",
dtype: IntoDType | None = None,
) -> Self: ...
def to_narwhals(self) -> Series[NativeSeriesT]:
return self._version.series(self, level="full")
# Operators
def __add__(self, other: Any) -> Self: ...
def __and__(self, other: Any) -> Self: ...
def __eq__(self, other: object) -> Self: ... # type: ignore[override]
def __floordiv__(self, other: Any) -> Self: ...
def __ge__(self, other: Any) -> Self: ...
def __gt__(self, other: Any) -> Self: ...
def __invert__(self) -> Self: ...
def __le__(self, other: Any) -> Self: ...
def __lt__(self, other: Any) -> Self: ...
def __mod__(self, other: Any) -> Self: ...
def __mul__(self, other: Any) -> Self: ...
def __ne__(self, other: object) -> Self: ... # type: ignore[override]
def __or__(self, other: Any) -> Self: ...
def __pow__(self, other: Any) -> Self: ...
def __radd__(self, other: Any) -> Self: ...
def __rand__(self, other: Any) -> Self: ...
def __rfloordiv__(self, other: Any) -> Self: ...
def __rmod__(self, other: Any) -> Self: ...
def __rmul__(self, other: Any) -> Self: ...
def __ror__(self, other: Any) -> Self: ...
def __rpow__(self, other: Any) -> Self: ...
def __rsub__(self, other: Any) -> Self: ...
def __rtruediv__(self, other: Any) -> Self: ...
def __sub__(self, other: Any) -> Self: ...
def __truediv__(self, other: Any) -> Self: ...
def abs(self) -> Self: ...
def alias(self, name: str) -> Self: ...
def all(self) -> bool: ...
def any(self) -> bool: ...
def arg_max(self) -> int: ...
def arg_min(self) -> int: ...
def arg_true(self) -> Self: ...
def cast(self, dtype: IntoDType) -> Self: ...
def clip(
self,
lower_bound: Self | NumericLiteral | TemporalLiteral | None,
upper_bound: Self | NumericLiteral | TemporalLiteral | None,
) -> Self: ...
def count(self) -> int: ...
def cum_count(self, *, reverse: bool) -> Self: ...
def cum_max(self, *, reverse: bool) -> Self: ...
def cum_min(self, *, reverse: bool) -> Self: ...
def cum_prod(self, *, reverse: bool) -> Self: ...
def cum_sum(self, *, reverse: bool) -> Self: ...
def diff(self) -> Self: ...
def drop_nulls(self) -> Self: ...
def ewm_mean(
self,
*,
com: float | None,
span: float | None,
half_life: float | None,
alpha: float | None,
adjust: bool,
min_samples: int,
ignore_nulls: bool,
) -> Self: ...
def exp(self) -> Self: ...
def sqrt(self) -> Self: ...
def fill_null(
self,
value: Self | NonNestedLiteral,
strategy: FillNullStrategy | None,
limit: int | None,
) -> Self: ...
def filter(self, predicate: Any) -> Self: ...
def gather_every(self, n: int, offset: int) -> Self: ...
def head(self, n: int) -> Self: ...
def is_between(
self, lower_bound: Any, upper_bound: Any, closed: ClosedInterval
) -> Self: ...
def is_finite(self) -> Self: ...
def is_first_distinct(self) -> Self: ...
def is_in(self, other: Any) -> Self: ...
def is_last_distinct(self) -> Self: ...
def is_nan(self) -> Self: ...
def is_null(self) -> Self: ...
def is_sorted(self, *, descending: bool) -> bool: ...
def is_unique(self) -> Self: ...
def item(self, index: int | None) -> Any: ...
def kurtosis(self) -> float | None: ...
def len(self) -> int: ...
def log(self, base: float) -> Self: ...
def max(self) -> Any: ...
def mean(self) -> float: ...
def median(self) -> float: ...
def min(self) -> Any: ...
def mode(self) -> Self: ...
def n_unique(self) -> int: ...
def null_count(self) -> int: ...
def quantile(
self, quantile: float, interpolation: RollingInterpolationMethod
) -> float: ...
def rank(self, method: RankMethod, *, descending: bool) -> Self: ...
def replace_strict(
self,
old: Sequence[Any] | Mapping[Any, Any],
new: Sequence[Any],
*,
return_dtype: IntoDType | None,
) -> Self: ...
def rolling_mean(
self, window_size: int, *, min_samples: int, center: bool
) -> Self: ...
def rolling_std(
self, window_size: int, *, min_samples: int, center: bool, ddof: int
) -> Self: ...
def rolling_sum(
self, window_size: int, *, min_samples: int, center: bool
) -> Self: ...
def rolling_var(
self, window_size: int, *, min_samples: int, center: bool, ddof: int
) -> Self: ...
def round(self, decimals: int) -> Self: ...
def sample(
self,
n: int | None,
*,
fraction: float | None,
with_replacement: bool,
seed: int | None,
) -> Self: ...
def scatter(self, indices: int | Sequence[int], values: Any) -> Self: ...
def shift(self, n: int) -> Self: ...
def skew(self) -> float | None: ...
def sort(self, *, descending: bool, nulls_last: bool) -> Self: ...
def std(self, *, ddof: int) -> float: ...
def sum(self) -> float: ...
def tail(self, n: int) -> Self: ...
def to_arrow(self) -> pa.Array[Any]: ...
def to_dummies(
self, *, separator: str, drop_first: bool
) -> CompliantDataFrame[Self, Any, Any, Any]: ...
def to_frame(self) -> CompliantDataFrame[Self, Any, Any, Any]: ...
def to_list(self) -> list[Any]: ...
def to_pandas(self) -> pd.Series[Any]: ...
def to_polars(self) -> pl.Series: ...
def unique(self, *, maintain_order: bool) -> Self: ...
def value_counts(
self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
) -> CompliantDataFrame[Self, Any, Any, Any]: ...
def var(self, *, ddof: int) -> float: ...
def zip_with(self, mask: Any, other: Any) -> Self: ...
@unstable
def hist_from_bins(
self, bins: list[float], *, include_breakpoint: bool
) -> CompliantDataFrame[Self, Any, Any, Any]:
"""`Series.hist(bins=..., bin_count=None)`."""
...
@unstable
def hist_from_bin_count(
self, bin_count: int, *, include_breakpoint: bool
) -> CompliantDataFrame[Self, Any, Any, Any]:
"""`Series.hist(bins=None, bin_count=...)`."""
...
@property
def str(self) -> Any: ...
@property
def dt(self) -> Any: ...
@property
def cat(self) -> Any: ...
@property
def list(self) -> Any: ...
@property
def struct(self) -> Any: ...
class EagerSeries(CompliantSeries[NativeSeriesT], Protocol[NativeSeriesT]):
_native_series: Any
_implementation: Implementation
_version: Version
_broadcast: bool
@property
def _backend_version(self) -> tuple[int, ...]:
return self._implementation._backend_version()
@classmethod
def _align_full_broadcast(cls, *series: Self) -> Sequence[Self]:
"""Ensure all of `series` have the same length (and index if `pandas`).
Scalars get broadcasted to the full length of the longest Series.
This is useful when you need to construct a full Series anyway, such as:
DataFrame.select(...)
It should not be used in binary operations, such as:
nw.col("a") - nw.col("a").mean()
because then it's more efficient to extract the right-hand-side's single element as a scalar.
"""
...
def _from_scalar(self, value: Any) -> Self:
return self.from_iterable([value], name=self.name, context=self)
def _with_native(
self, series: NativeSeriesT, *, preserve_broadcast: bool = False
) -> Self:
"""Return a new `CompliantSeries`, wrapping the native `series`.
In cases when operations are known to not affect whether a result should
be broadcast, we can pass `preserve_broadcast=True`.
Set this with care - it should only be set for unary expressions which don't
change length or order, such as `.alias` or `.fill_null`. If in doubt, don't
set it, you probably don't need it.
"""
...
def __narwhals_namespace__(
self,
) -> EagerNamespace[Any, Self, Any, Any, NativeSeriesT]: ...
def _to_expr(self) -> EagerExpr[Any, Any]:
return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return]
def _gather(self, rows: SizedMultiIndexSelector[NativeSeriesT]) -> Self: ...
def _gather_slice(self, rows: _SliceIndex | range) -> Self: ...
def __getitem__(self, item: MultiIndexSelector[Self]) -> Self:
if isinstance(item, (slice, range)):
return self._gather_slice(item)
elif is_compliant_series(item):
return self._gather(item.native)
elif is_sized_multi_index_selector(item):
return self._gather(item)
else:
assert_never(item)
@property
def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT]: ...
@property
def dt(self) -> EagerSeriesDateTimeNamespace[Self, NativeSeriesT]: ...
@property
def cat(self) -> EagerSeriesCatNamespace[Self, NativeSeriesT]: ...
@property
def list(self) -> EagerSeriesListNamespace[Self, NativeSeriesT]: ...
@property
def struct(self) -> EagerSeriesStructNamespace[Self, NativeSeriesT]: ...
class _SeriesNamespace( # type: ignore[misc]
_StoresCompliant[CompliantSeriesT_co],
_StoresNative[NativeSeriesT_co],
Protocol[CompliantSeriesT_co, NativeSeriesT_co],
):
_compliant_series: CompliantSeriesT_co
@property
def compliant(self) -> CompliantSeriesT_co:
return self._compliant_series
@property
def implementation(self) -> Implementation:
return self.compliant._implementation
@property
def backend_version(self) -> tuple[int, ...]:
return self.implementation._backend_version()
@property
def version(self) -> Version:
return self.compliant._version
@property
def native(self) -> NativeSeriesT_co:
return self._compliant_series.native # type: ignore[no-any-return]
def with_native(self, series: Any, /) -> CompliantSeriesT_co:
return self.compliant._with_native(series)
class EagerSeriesNamespace(
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
Generic[EagerSeriesT_co, NativeSeriesT_co],
):
_compliant_series: EagerSeriesT_co
def __init__(self, series: EagerSeriesT_co, /) -> None:
self._compliant_series = series
class EagerSeriesCatNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
CatNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...
class EagerSeriesDateTimeNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
DateTimeNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...
class EagerSeriesListNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
ListNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...
class EagerSeriesStringNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
StringNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...
class EagerSeriesStructNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
StructNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...
class EagerSeriesHist(Protocol[NativeSeriesT, _CountsT_co]):
_series: EagerSeries[NativeSeriesT]
_breakpoint: bool
_data: HistData[NativeSeriesT, _CountsT_co]
@property
def native(self) -> NativeSeriesT:
return self._series.native
@classmethod
def from_series(
cls, series: EagerSeries[NativeSeriesT], *, include_breakpoint: bool
) -> Self:
obj = cls.__new__(cls)
obj._series = series
obj._breakpoint = include_breakpoint
return obj
def to_frame(self) -> EagerDataFrameAny: ...
def _linear_space( # NOTE: Roughly `pl.linear_space`
self,
start: float,
end: float,
num_samples: int,
*,
closed: Literal["both", "none"] = "both",
) -> _1DArray: ...
# NOTE: *Could* be handled at narwhals-level
def is_empty_series(self) -> bool: ...
# NOTE: **Should** be handled at narwhals-level
def data_empty(self) -> HistData[NativeSeriesT, _CountsT_co]:
return {"breakpoint": [], "count": []} if self._breakpoint else {"count": []}
# NOTE: *Could* be handled at narwhals-level, **iff** we add `nw.repeat`, `nw.linear_space`
# See https://github.com/narwhals-dev/narwhals/pull/2839#discussion_r2215630696
def series_empty(
self, arg: int | list[float], /
) -> HistData[NativeSeriesT, _CountsT_co]: ...
def with_bins(self, bins: list[float], /) -> Self:
if len(bins) <= 1:
self._data = self.data_empty()
elif self.is_empty_series():
self._data = self.series_empty(bins)
else:
self._data = self._calculate_hist(bins)
return self
def with_bin_count(self, bin_count: int, /) -> Self:
if bin_count == 0:
self._data = self.data_empty()
elif self.is_empty_series():
self._data = self.series_empty(bin_count)
else:
self._data = self._calculate_hist(self._calculate_bins(bin_count))
return self
def _calculate_breakpoint(self, arg: int | list[float], /) -> list[float] | _1DArray:
bins = self._linear_space(0, 1, arg + 1) if isinstance(arg, int) else arg
return bins[1:]
def _calculate_bins(self, bin_count: int) -> _1DArray: ...
def _calculate_hist(
self, bins: list[float] | _1DArray
) -> HistData[NativeSeriesT, _CountsT_co]: ...