from __future__ import annotations from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries if TYPE_CHECKING: import datetime as dt from collections.abc import Iterable, Sequence, Sized from decimal import Decimal from types import ModuleType import numpy as np from typing_extensions import TypeAlias from narwhals import dtypes from narwhals.dataframe import DataFrame, LazyFrame from narwhals.expr import Expr from narwhals.series import Series # All dataframes supported by Narwhals have a # `columns` property. Their similarities don't extend # _that_ much further unfortunately... class NativeFrame(Protocol): @property def columns(self) -> Any: ... def join(self, *args: Any, **kwargs: Any) -> Any: ... class NativeLazyFrame(NativeFrame, Protocol): def explain(self, *args: Any, **kwargs: Any) -> Any: ... class NativeSeries(Sized, Iterable[Any], Protocol): def filter(self, *args: Any, **kwargs: Any) -> Any: ... class DataFrameLike(Protocol): def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ... class SupportsNativeNamespace(Protocol): def __native_namespace__(self) -> ModuleType: ... # ruff: noqa: N802 class DTypes(Protocol): @property def Decimal(self) -> type[dtypes.Decimal]: ... @property def Int128(self) -> type[dtypes.Int128]: ... @property def Int64(self) -> type[dtypes.Int64]: ... @property def Int32(self) -> type[dtypes.Int32]: ... @property def Int16(self) -> type[dtypes.Int16]: ... @property def Int8(self) -> type[dtypes.Int8]: ... @property def UInt128(self) -> type[dtypes.UInt128]: ... @property def UInt64(self) -> type[dtypes.UInt64]: ... @property def UInt32(self) -> type[dtypes.UInt32]: ... @property def UInt16(self) -> type[dtypes.UInt16]: ... @property def UInt8(self) -> type[dtypes.UInt8]: ... @property def Float64(self) -> type[dtypes.Float64]: ... @property def Float32(self) -> type[dtypes.Float32]: ... @property def String(self) -> type[dtypes.String]: ... @property def Boolean(self) -> type[dtypes.Boolean]: ... @property def Object(self) -> type[dtypes.Object]: ... @property def Categorical(self) -> type[dtypes.Categorical]: ... @property def Enum(self) -> type[dtypes.Enum]: ... @property def Datetime(self) -> type[dtypes.Datetime]: ... @property def Duration(self) -> type[dtypes.Duration]: ... @property def Date(self) -> type[dtypes.Date]: ... @property def Field(self) -> type[dtypes.Field]: ... @property def Struct(self) -> type[dtypes.Struct]: ... @property def List(self) -> type[dtypes.List]: ... @property def Array(self) -> type[dtypes.Array]: ... @property def Unknown(self) -> type[dtypes.Unknown]: ... @property def Time(self) -> type[dtypes.Time]: ... @property def Binary(self) -> type[dtypes.Binary]: ... IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"] """Anything which can be converted to an expression. Use this to mean "either a Narwhals expression, or something which can be converted into one". For example, `exprs` in `DataFrame.select` is typed to accept `IntoExpr`, as it can either accept a `nw.Expr` (e.g. `df.select(nw.col('a'))`) or a string which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`. """ IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"] """Anything which can be converted to a Narwhals DataFrame. Use this if your function accepts a narwhalifiable object but doesn't care about its backend. Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoDataFrame >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]: ... df = nw.from_native(df_native, eager_only=True) ... return df.shape """ IntoLazyFrame: TypeAlias = "NativeLazyFrame" IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"] """Anything which can be converted to a Narwhals DataFrame or LazyFrame. Use this if your function can accept an object which can be converted to either `nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend. Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoFrame >>> def agnostic_columns(df_native: IntoFrame) -> list[str]: ... df = nw.from_native(df_native) ... return df.collect_schema().names() """ Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"] """Narwhals DataFrame or Narwhals LazyFrame. Use this if your function can work with either and your function doesn't care about its backend. Examples: >>> import narwhals as nw >>> from narwhals.typing import Frame >>> @nw.narwhalify ... def agnostic_columns(df: Frame) -> list[str]: ... return df.columns """ IntoSeries: TypeAlias = "NativeSeries" """Anything which can be converted to a Narwhals Series. Use this if your function can accept an object which can be converted to `nw.Series` and it doesn't care about its backend. Examples: >>> from typing import Any >>> import narwhals as nw >>> from narwhals.typing import IntoSeries >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]: ... s = nw.from_native(s_native) ... return s.to_list() """ IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame") """TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame. Use this if your function accepts an object which is convertible to `nw.DataFrame` or `nw.LazyFrame` and returns an object of the same type. Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.with_columns(c=nw.col("a") + 1).to_native() """ IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame") """TypeVar bound to object convertible to Narwhals DataFrame. Use this if your function accepts an object which can be converted to `nw.DataFrame` and returns an object of the same class. Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoDataFrameT >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT: ... df = nw.from_native(df_native, eager_only=True) ... return df.with_columns(c=df["a"] + 1).to_native() """ IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame") FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]") """TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame. Use this if your function accepts either `nw.DataFrame` or `nw.LazyFrame` and returns an object of the same kind. Examples: >>> import narwhals as nw >>> from narwhals.typing import FrameT >>> @nw.narwhalify ... def agnostic_func(df: FrameT) -> FrameT: ... return df.with_columns(c=nw.col("a") + 1) """ DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]") """TypeVar bound to Narwhals DataFrame. Use this if your function can accept a Narwhals DataFrame and returns a Narwhals DataFrame backed by the same backend. Examples: >>> import narwhals as nw >>> from narwhals.typing import DataFrameT >>> @nw.narwhalify >>> def func(df: DataFrameT) -> DataFrameT: ... return df.with_columns(c=df["a"] + 1) """ LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]") SeriesT = TypeVar("SeriesT", bound="Series[Any]") IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries") """TypeVar bound to object convertible to Narwhals Series. Use this if your function accepts an object which can be converted to `nw.Series` and returns an object of the same class. Examples: >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.abs().to_native() """ DTypeBackend: TypeAlias = 'Literal["pyarrow", "numpy_nullable"] | None' SizeUnit: TypeAlias = Literal[ "b", "kb", "mb", "gb", "tb", "bytes", "kilobytes", "megabytes", "gigabytes", "terabytes", ] TimeUnit: TypeAlias = Literal["ns", "us", "ms", "s"] AsofJoinStrategy: TypeAlias = Literal["backward", "forward", "nearest"] """Join strategy. - *"backward"*: Selects the last row in the right DataFrame whose `on` key is less than or equal to the left's key. - *"forward"*: Selects the first row in the right DataFrame whose `on` key is greater than or equal to the left's key. - *"nearest"*: Search selects the last row in the right DataFrame whose value is nearest to the left's key. """ ClosedInterval: TypeAlias = Literal["left", "right", "none", "both"] """Define which sides of the interval are closed (inclusive).""" ConcatMethod: TypeAlias = Literal["horizontal", "vertical", "diagonal"] """Concatenating strategy. - *"vertical"*: Concatenate vertically. Column names must match. - *"horizontal"*: Concatenate horizontally. If lengths don't match, then missing rows are filled with null values. - *"diagonal"*: Finds a union between the column schemas and fills missing column values with null. """ FillNullStrategy: TypeAlias = Literal["forward", "backward"] """Strategy used to fill null values.""" JoinStrategy: TypeAlias = Literal["inner", "left", "full", "cross", "semi", "anti"] """Join strategy. - *"inner"*: Returns rows that have matching values in both tables. - *"left"*: Returns all rows from the left table, and the matched rows from the right table. - *"full"*: Returns all rows in both dataframes, with the `suffix` appended to the right join keys. - *"cross"*: Returns the Cartesian product of rows from both tables. - *"semi"*: Filter rows that have a match in the right table. - *"anti"*: Filter rows that do not have a match in the right table. """ PivotAgg: TypeAlias = Literal[ "min", "max", "first", "last", "sum", "mean", "median", "len" ] """A predefined aggregate function string.""" RankMethod: TypeAlias = Literal["average", "min", "max", "dense", "ordinal"] """The method used to assign ranks to tied elements. - *"average"*: The average of the ranks that would have been assigned to all the tied values is assigned to each value. - *"min"*: The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.) - *"max"*: The maximum of the ranks that would have been assigned to all the tied values is assigned to each value. - *"dense"*: Like "min", but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements. - *"ordinal"*: All values are given a distinct rank, corresponding to the order that the values occur in the Series. """ RollingInterpolationMethod: TypeAlias = Literal[ "nearest", "higher", "lower", "midpoint", "linear" ] """Interpolation method.""" UniqueKeepStrategy: TypeAlias = Literal["any", "first", "last", "none"] """Which of the duplicate rows to keep. - *"any"*: Does not give any guarantee of which row is kept. This allows more optimizations. - *"none"*: Don't keep duplicate rows. - *"first"*: Keep first unique row. - *"last"*: Keep last unique row. """ LazyUniqueKeepStrategy: TypeAlias = Literal["any", "none"] """Which of the duplicate rows to keep. - *"any"*: Does not give any guarantee of which row is kept. - *"none"*: Don't keep duplicate rows. """ _ShapeT = TypeVar("_ShapeT", bound="tuple[int, ...]") _NDArray: TypeAlias = "np.ndarray[_ShapeT, Any]" _1DArray: TypeAlias = "_NDArray[tuple[int]]" # noqa: PYI042 _1DArrayInt: TypeAlias = "np.ndarray[tuple[int], np.dtype[np.integer[Any]]]" # noqa: PYI042 _2DArray: TypeAlias = "_NDArray[tuple[int, int]]" # noqa: PYI042, PYI047 _AnyDArray: TypeAlias = "_NDArray[tuple[int, ...]]" # noqa: PYI047 _NumpyScalar: TypeAlias = "np.generic[Any]" Into1DArray: TypeAlias = "_1DArray | _NumpyScalar" """A 1-dimensional `numpy.ndarray` or scalar that can be converted into one.""" NumericLiteral: TypeAlias = "int | float | Decimal" TemporalLiteral: TypeAlias = "dt.date | dt.datetime | dt.time | dt.timedelta" NonNestedLiteral: TypeAlias = ( "NumericLiteral | TemporalLiteral | str | bool | bytes | None" ) PythonLiteral: TypeAlias = "NonNestedLiteral | list[Any] | tuple[Any, ...]" NonNestedDType: TypeAlias = "dtypes.NumericType | dtypes.TemporalType | dtypes.String | dtypes.Boolean | dtypes.Binary | dtypes.Categorical | dtypes.Unknown | dtypes.Object" """Any Narwhals DType that does not have required arguments.""" IntoDType: TypeAlias = "dtypes.DType | type[NonNestedDType]" """Anything that can be converted into a Narwhals DType. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]}) >>> df = nw.from_native(df_native) >>> df.select( ... nw.col("a").cast(nw.Int32), ... nw.col("b").cast(nw.String()).str.split(".").cast(nw.List(nw.Int8)), ... ) ┌──────────────────┐ |Narwhals DataFrame| |------------------| |shape: (3, 2) | |┌─────┬──────────┐| |│ a ┆ b │| |│ --- ┆ --- │| |│ i32 ┆ list[i8] │| |╞═════╪══════════╡| |│ 1 ┆ [4, 0] │| |│ 2 ┆ [5, 0] │| |│ 3 ┆ [6, 0] │| |└─────┴──────────┘| └──────────────────┘ """ # Annotations for `__getitem__` methods _T = TypeVar("_T") _Slice: TypeAlias = "slice[_T, Any, Any] | slice[Any, _T, Any] | slice[None, None, _T]" _SliceNone: TypeAlias = "slice[None, None, None]" # Index/column positions SingleIndexSelector: TypeAlias = int _SliceIndex: TypeAlias = "_Slice[int] | _SliceNone" """E.g. `[1:]` or `[:3]` or `[::2]`.""" SizedMultiIndexSelector: TypeAlias = "Sequence[int] | _T | _1DArrayInt" MultiIndexSelector: TypeAlias = "_SliceIndex | SizedMultiIndexSelector[_T]" # Labels/column names SingleNameSelector: TypeAlias = str _SliceName: TypeAlias = "_Slice[str] | _SliceNone" SizedMultiNameSelector: TypeAlias = "Sequence[str] | _T | _1DArray" MultiNameSelector: TypeAlias = "_SliceName | SizedMultiNameSelector[_T]" # Mixed selectors SingleColSelector: TypeAlias = "SingleIndexSelector | SingleNameSelector" MultiColSelector: TypeAlias = "MultiIndexSelector[_T] | MultiNameSelector[_T]" __all__ = [ "CompliantDataFrame", "CompliantLazyFrame", "CompliantSeries", "DataFrameT", "Frame", "FrameT", "IntoDataFrame", "IntoDataFrameT", "IntoExpr", "IntoFrame", "IntoFrameT", "IntoSeries", "IntoSeriesT", ]