team-10/env/Lib/site-packages/narwhals/typing.py
2025-08-02 07:34:44 +02:00

428 lines
15 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union
from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries
if TYPE_CHECKING:
import datetime as dt
from collections.abc import Iterable, Sequence, Sized
from decimal import Decimal
from types import ModuleType
import numpy as np
from typing_extensions import TypeAlias
from narwhals import dtypes
from narwhals.dataframe import DataFrame, LazyFrame
from narwhals.expr import Expr
from narwhals.series import Series
# All dataframes supported by Narwhals have a
# `columns` property. Their similarities don't extend
# _that_ much further unfortunately...
class NativeFrame(Protocol):
@property
def columns(self) -> Any: ...
def join(self, *args: Any, **kwargs: Any) -> Any: ...
class NativeLazyFrame(NativeFrame, Protocol):
def explain(self, *args: Any, **kwargs: Any) -> Any: ...
class NativeSeries(Sized, Iterable[Any], Protocol):
def filter(self, *args: Any, **kwargs: Any) -> Any: ...
class DataFrameLike(Protocol):
def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...
class SupportsNativeNamespace(Protocol):
def __native_namespace__(self) -> ModuleType: ...
# ruff: noqa: N802
class DTypes(Protocol):
@property
def Decimal(self) -> type[dtypes.Decimal]: ...
@property
def Int128(self) -> type[dtypes.Int128]: ...
@property
def Int64(self) -> type[dtypes.Int64]: ...
@property
def Int32(self) -> type[dtypes.Int32]: ...
@property
def Int16(self) -> type[dtypes.Int16]: ...
@property
def Int8(self) -> type[dtypes.Int8]: ...
@property
def UInt128(self) -> type[dtypes.UInt128]: ...
@property
def UInt64(self) -> type[dtypes.UInt64]: ...
@property
def UInt32(self) -> type[dtypes.UInt32]: ...
@property
def UInt16(self) -> type[dtypes.UInt16]: ...
@property
def UInt8(self) -> type[dtypes.UInt8]: ...
@property
def Float64(self) -> type[dtypes.Float64]: ...
@property
def Float32(self) -> type[dtypes.Float32]: ...
@property
def String(self) -> type[dtypes.String]: ...
@property
def Boolean(self) -> type[dtypes.Boolean]: ...
@property
def Object(self) -> type[dtypes.Object]: ...
@property
def Categorical(self) -> type[dtypes.Categorical]: ...
@property
def Enum(self) -> type[dtypes.Enum]: ...
@property
def Datetime(self) -> type[dtypes.Datetime]: ...
@property
def Duration(self) -> type[dtypes.Duration]: ...
@property
def Date(self) -> type[dtypes.Date]: ...
@property
def Field(self) -> type[dtypes.Field]: ...
@property
def Struct(self) -> type[dtypes.Struct]: ...
@property
def List(self) -> type[dtypes.List]: ...
@property
def Array(self) -> type[dtypes.Array]: ...
@property
def Unknown(self) -> type[dtypes.Unknown]: ...
@property
def Time(self) -> type[dtypes.Time]: ...
@property
def Binary(self) -> type[dtypes.Binary]: ...
IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"]
"""Anything which can be converted to an expression.
Use this to mean "either a Narwhals expression, or something which can be converted
into one". For example, `exprs` in `DataFrame.select` is typed to accept `IntoExpr`,
as it can either accept a `nw.Expr` (e.g. `df.select(nw.col('a'))`) or a string
which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`.
"""
IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"]
"""Anything which can be converted to a Narwhals DataFrame.
Use this if your function accepts a narwhalifiable object but doesn't care about its backend.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoDataFrame
>>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
... df = nw.from_native(df_native, eager_only=True)
... return df.shape
"""
IntoLazyFrame: TypeAlias = "NativeLazyFrame"
IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"]
"""Anything which can be converted to a Narwhals DataFrame or LazyFrame.
Use this if your function can accept an object which can be converted to either
`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrame
>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
... df = nw.from_native(df_native)
... return df.collect_schema().names()
"""
Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"]
"""Narwhals DataFrame or Narwhals LazyFrame.
Use this if your function can work with either and your function doesn't care
about its backend.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import Frame
>>> @nw.narwhalify
... def agnostic_columns(df: Frame) -> list[str]:
... return df.columns
"""
IntoSeries: TypeAlias = "NativeSeries"
"""Anything which can be converted to a Narwhals Series.
Use this if your function can accept an object which can be converted to `nw.Series`
and it doesn't care about its backend.
Examples:
>>> from typing import Any
>>> import narwhals as nw
>>> from narwhals.typing import IntoSeries
>>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]:
... s = nw.from_native(s_native)
... return s.to_list()
"""
IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame")
"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame.
Use this if your function accepts an object which is convertible to `nw.DataFrame`
or `nw.LazyFrame` and returns an object of the same type.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(c=nw.col("a") + 1).to_native()
"""
IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame")
"""TypeVar bound to object convertible to Narwhals DataFrame.
Use this if your function accepts an object which can be converted to `nw.DataFrame`
and returns an object of the same class.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoDataFrameT
>>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT:
... df = nw.from_native(df_native, eager_only=True)
... return df.with_columns(c=df["a"] + 1).to_native()
"""
IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame")
FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]")
"""TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame.
Use this if your function accepts either `nw.DataFrame` or `nw.LazyFrame` and returns
an object of the same kind.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import FrameT
>>> @nw.narwhalify
... def agnostic_func(df: FrameT) -> FrameT:
... return df.with_columns(c=nw.col("a") + 1)
"""
DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]")
"""TypeVar bound to Narwhals DataFrame.
Use this if your function can accept a Narwhals DataFrame and returns a Narwhals
DataFrame backed by the same backend.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import DataFrameT
>>> @nw.narwhalify
>>> def func(df: DataFrameT) -> DataFrameT:
... return df.with_columns(c=df["a"] + 1)
"""
LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]")
SeriesT = TypeVar("SeriesT", bound="Series[Any]")
IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries")
"""TypeVar bound to object convertible to Narwhals Series.
Use this if your function accepts an object which can be converted to `nw.Series`
and returns an object of the same class.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoSeriesT
>>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT:
... s = nw.from_native(s_native, series_only=True)
... return s.abs().to_native()
"""
DTypeBackend: TypeAlias = 'Literal["pyarrow", "numpy_nullable"] | None'
SizeUnit: TypeAlias = Literal[
"b",
"kb",
"mb",
"gb",
"tb",
"bytes",
"kilobytes",
"megabytes",
"gigabytes",
"terabytes",
]
TimeUnit: TypeAlias = Literal["ns", "us", "ms", "s"]
AsofJoinStrategy: TypeAlias = Literal["backward", "forward", "nearest"]
"""Join strategy.
- *"backward"*: Selects the last row in the right DataFrame whose `on` key
is less than or equal to the left's key.
- *"forward"*: Selects the first row in the right DataFrame whose `on` key
is greater than or equal to the left's key.
- *"nearest"*: Search selects the last row in the right DataFrame whose value
is nearest to the left's key.
"""
ClosedInterval: TypeAlias = Literal["left", "right", "none", "both"]
"""Define which sides of the interval are closed (inclusive)."""
ConcatMethod: TypeAlias = Literal["horizontal", "vertical", "diagonal"]
"""Concatenating strategy.
- *"vertical"*: Concatenate vertically. Column names must match.
- *"horizontal"*: Concatenate horizontally. If lengths don't match, then
missing rows are filled with null values.
- *"diagonal"*: Finds a union between the column schemas and fills missing
column values with null.
"""
FillNullStrategy: TypeAlias = Literal["forward", "backward"]
"""Strategy used to fill null values."""
JoinStrategy: TypeAlias = Literal["inner", "left", "full", "cross", "semi", "anti"]
"""Join strategy.
- *"inner"*: Returns rows that have matching values in both tables.
- *"left"*: Returns all rows from the left table, and the matched rows from
the right table.
- *"full"*: Returns all rows in both dataframes, with the `suffix` appended to
the right join keys.
- *"cross"*: Returns the Cartesian product of rows from both tables.
- *"semi"*: Filter rows that have a match in the right table.
- *"anti"*: Filter rows that do not have a match in the right table.
"""
PivotAgg: TypeAlias = Literal[
"min", "max", "first", "last", "sum", "mean", "median", "len"
]
"""A predefined aggregate function string."""
RankMethod: TypeAlias = Literal["average", "min", "max", "dense", "ordinal"]
"""The method used to assign ranks to tied elements.
- *"average"*: The average of the ranks that would have been assigned to
all the tied values is assigned to each value.
- *"min"*: The minimum of the ranks that would have been assigned to all
the tied values is assigned to each value. (This is also referred to
as "competition" ranking.)
- *"max"*: The maximum of the ranks that would have been assigned to all
the tied values is assigned to each value.
- *"dense"*: Like "min", but the rank of the next highest element is
assigned the rank immediately after those assigned to the tied elements.
- *"ordinal"*: All values are given a distinct rank, corresponding to the
order that the values occur in the Series.
"""
RollingInterpolationMethod: TypeAlias = Literal[
"nearest", "higher", "lower", "midpoint", "linear"
]
"""Interpolation method."""
UniqueKeepStrategy: TypeAlias = Literal["any", "first", "last", "none"]
"""Which of the duplicate rows to keep.
- *"any"*: Does not give any guarantee of which row is kept.
This allows more optimizations.
- *"none"*: Don't keep duplicate rows.
- *"first"*: Keep first unique row.
- *"last"*: Keep last unique row.
"""
LazyUniqueKeepStrategy: TypeAlias = Literal["any", "none"]
"""Which of the duplicate rows to keep.
- *"any"*: Does not give any guarantee of which row is kept.
- *"none"*: Don't keep duplicate rows.
"""
_ShapeT = TypeVar("_ShapeT", bound="tuple[int, ...]")
_NDArray: TypeAlias = "np.ndarray[_ShapeT, Any]"
_1DArray: TypeAlias = "_NDArray[tuple[int]]" # noqa: PYI042
_1DArrayInt: TypeAlias = "np.ndarray[tuple[int], np.dtype[np.integer[Any]]]" # noqa: PYI042
_2DArray: TypeAlias = "_NDArray[tuple[int, int]]" # noqa: PYI042, PYI047
_AnyDArray: TypeAlias = "_NDArray[tuple[int, ...]]" # noqa: PYI047
_NumpyScalar: TypeAlias = "np.generic[Any]"
Into1DArray: TypeAlias = "_1DArray | _NumpyScalar"
"""A 1-dimensional `numpy.ndarray` or scalar that can be converted into one."""
NumericLiteral: TypeAlias = "int | float | Decimal"
TemporalLiteral: TypeAlias = "dt.date | dt.datetime | dt.time | dt.timedelta"
NonNestedLiteral: TypeAlias = (
"NumericLiteral | TemporalLiteral | str | bool | bytes | None"
)
PythonLiteral: TypeAlias = "NonNestedLiteral | list[Any] | tuple[Any, ...]"
NonNestedDType: TypeAlias = "dtypes.NumericType | dtypes.TemporalType | dtypes.String | dtypes.Boolean | dtypes.Binary | dtypes.Categorical | dtypes.Unknown | dtypes.Object"
"""Any Narwhals DType that does not have required arguments."""
IntoDType: TypeAlias = "dtypes.DType | type[NonNestedDType]"
"""Anything that can be converted into a Narwhals DType.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]})
>>> df = nw.from_native(df_native)
>>> df.select(
... nw.col("a").cast(nw.Int32),
... nw.col("b").cast(nw.String()).str.split(".").cast(nw.List(nw.Int8)),
... )
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|shape: (3, 2) |
|┌─────┬──────────┐|
|│ a ┆ b │|
|│ --- ┆ --- │|
|│ i32 ┆ list[i8] │|
|╞═════╪══════════╡|
|│ 1 ┆ [4, 0] │|
|│ 2 ┆ [5, 0] │|
|│ 3 ┆ [6, 0] │|
|└─────┴──────────┘|
└──────────────────┘
"""
# Annotations for `__getitem__` methods
_T = TypeVar("_T")
_Slice: TypeAlias = "slice[_T, Any, Any] | slice[Any, _T, Any] | slice[None, None, _T]"
_SliceNone: TypeAlias = "slice[None, None, None]"
# Index/column positions
SingleIndexSelector: TypeAlias = int
_SliceIndex: TypeAlias = "_Slice[int] | _SliceNone"
"""E.g. `[1:]` or `[:3]` or `[::2]`."""
SizedMultiIndexSelector: TypeAlias = "Sequence[int] | _T | _1DArrayInt"
MultiIndexSelector: TypeAlias = "_SliceIndex | SizedMultiIndexSelector[_T]"
# Labels/column names
SingleNameSelector: TypeAlias = str
_SliceName: TypeAlias = "_Slice[str] | _SliceNone"
SizedMultiNameSelector: TypeAlias = "Sequence[str] | _T | _1DArray"
MultiNameSelector: TypeAlias = "_SliceName | SizedMultiNameSelector[_T]"
# Mixed selectors
SingleColSelector: TypeAlias = "SingleIndexSelector | SingleNameSelector"
MultiColSelector: TypeAlias = "MultiIndexSelector[_T] | MultiNameSelector[_T]"
__all__ = [
"CompliantDataFrame",
"CompliantLazyFrame",
"CompliantSeries",
"DataFrameT",
"Frame",
"FrameT",
"IntoDataFrame",
"IntoDataFrameT",
"IntoExpr",
"IntoFrame",
"IntoFrameT",
"IntoSeries",
"IntoSeriesT",
]