team-10/env/Lib/site-packages/narwhals/stable/v2/__init__.py
2025-08-02 07:34:44 +02:00

1257 lines
37 KiB
Python

from __future__ import annotations
from functools import wraps
from typing import TYPE_CHECKING, Any, Callable, Literal, cast, overload
import narwhals as nw
from narwhals import exceptions, functions as nw_f
from narwhals._typing_compat import TypeVar, assert_never
from narwhals._utils import (
Implementation,
Version,
generate_temporary_column_name,
inherit_doc,
is_ordered_categorical,
maybe_align_index,
maybe_convert_dtypes,
maybe_get_index,
maybe_reset_index,
maybe_set_index,
not_implemented,
)
from narwhals.dataframe import DataFrame as NwDataFrame, LazyFrame as NwLazyFrame
from narwhals.dtypes import (
Array,
Binary,
Boolean,
Categorical,
Date,
Datetime,
Decimal,
Duration,
Enum,
Field,
Float32,
Float64,
Int8,
Int16,
Int32,
Int64,
Int128,
List,
Object,
String,
Struct,
Time,
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Unknown,
)
from narwhals.expr import Expr as NwExpr
from narwhals.functions import _new_series_impl, concat, show_versions
from narwhals.schema import Schema as NwSchema
from narwhals.series import Series as NwSeries
from narwhals.stable.v2 import dependencies, dtypes, selectors
from narwhals.translate import _from_native_impl, get_native_namespace, to_py_scalar
from narwhals.typing import IntoDataFrameT, IntoFrameT
if TYPE_CHECKING:
from collections.abc import Iterable, Mapping, Sequence
from types import ModuleType
from typing_extensions import ParamSpec, Self
from narwhals._translate import IntoArrowTable
from narwhals.dataframe import MultiColSelector, MultiIndexSelector
from narwhals.dtypes import DType
from narwhals.typing import (
IntoDType,
IntoExpr,
IntoFrame,
IntoSeries,
NonNestedLiteral,
SingleColSelector,
SingleIndexSelector,
_1DArray,
_2DArray,
)
DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]")
LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]")
SeriesT = TypeVar("SeriesT", bound="Series[Any]")
T = TypeVar("T", default=Any)
P = ParamSpec("P")
R = TypeVar("R")
IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries", default=Any)
class DataFrame(NwDataFrame[IntoDataFrameT]):
@inherit_doc(NwDataFrame)
def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
assert df._version is Version.V2 # noqa: S101
super().__init__(df, level=level)
# We need to override any method which don't return Self so that type
# annotations are correct.
@property
def _series(self) -> type[Series[Any]]:
return cast("type[Series[Any]]", Series)
@property
def _lazyframe(self) -> type[LazyFrame[Any]]:
return cast("type[LazyFrame[Any]]", LazyFrame)
@overload
def __getitem__(self, item: tuple[SingleIndexSelector, SingleColSelector]) -> Any: ...
@overload
def __getitem__( # type: ignore[overload-overlap]
self, item: str | tuple[MultiIndexSelector, SingleColSelector]
) -> Series[Any]: ...
@overload
def __getitem__(
self,
item: (
SingleIndexSelector
| MultiIndexSelector
| MultiColSelector
| tuple[SingleIndexSelector, MultiColSelector]
| tuple[MultiIndexSelector, MultiColSelector]
),
) -> Self: ...
def __getitem__(
self,
item: (
SingleIndexSelector
| SingleColSelector
| MultiColSelector
| MultiIndexSelector
| tuple[SingleIndexSelector, SingleColSelector]
| tuple[SingleIndexSelector, MultiColSelector]
| tuple[MultiIndexSelector, SingleColSelector]
| tuple[MultiIndexSelector, MultiColSelector]
),
) -> Series[Any] | Self | Any:
return super().__getitem__(item)
def get_column(self, name: str) -> Series:
# Type checkers complain that `nw.Series` is not assignable to `nw.v2.stable.Series`.
# However the return type actually is `nw.v2.stable.Series`, check `tests/v2_test.py`.
return super().get_column(name) # type: ignore[return-value]
def lazy(
self, backend: ModuleType | Implementation | str | None = None
) -> LazyFrame[Any]:
return _stableify(super().lazy(backend=backend))
@overload # type: ignore[override]
def to_dict(self, *, as_series: Literal[True] = ...) -> dict[str, Series[Any]]: ...
@overload
def to_dict(self, *, as_series: Literal[False]) -> dict[str, list[Any]]: ...
@overload
def to_dict(
self, *, as_series: bool
) -> dict[str, Series[Any]] | dict[str, list[Any]]: ...
def to_dict(
self, *, as_series: bool = True
) -> dict[str, Series[Any]] | dict[str, list[Any]]:
# Type checkers complain that `nw.Series` is not assignable to `nw.v2.stable.Series`.
# However the return type actually is `nw.v2.stable.Series`, check `tests/v2_test.py::test_to_dict_as_series`.
return super().to_dict(as_series=as_series) # type: ignore[return-value]
def is_duplicated(self) -> Series[Any]:
return _stableify(super().is_duplicated())
def is_unique(self) -> Series[Any]:
return _stableify(super().is_unique())
class LazyFrame(NwLazyFrame[IntoFrameT]):
@inherit_doc(NwLazyFrame)
def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None:
assert df._version is Version.V2 # noqa: S101
super().__init__(df, level=level)
@property
def _dataframe(self) -> type[DataFrame[Any]]:
return DataFrame
def collect(
self, backend: ModuleType | Implementation | str | None = None, **kwargs: Any
) -> DataFrame[Any]:
return _stableify(super().collect(backend=backend, **kwargs))
class Series(NwSeries[IntoSeriesT]):
@inherit_doc(NwSeries)
def __init__(
self, series: Any, *, level: Literal["full", "lazy", "interchange"]
) -> None:
assert series._version is Version.V2 # noqa: S101
super().__init__(series, level=level)
# We need to override any method which don't return Self so that type
# annotations are correct.
@property
def _dataframe(self) -> type[DataFrame[Any]]:
return DataFrame
def to_frame(self) -> DataFrame[Any]:
return _stableify(super().to_frame())
def value_counts(
self,
*,
sort: bool = False,
parallel: bool = False,
name: str | None = None,
normalize: bool = False,
) -> DataFrame[Any]:
return _stableify(
super().value_counts(
sort=sort, parallel=parallel, name=name, normalize=normalize
)
)
# Too unstable to consider including here.
hist: Any = not_implemented()
class Expr(NwExpr): ...
class Schema(NwSchema):
_version = Version.V2
@inherit_doc(NwSchema)
def __init__(
self, schema: Mapping[str, DType] | Iterable[tuple[str, DType]] | None = None
) -> None:
super().__init__(schema)
@overload
def _stableify(obj: NwDataFrame[IntoFrameT]) -> DataFrame[IntoFrameT]: ...
@overload
def _stableify(obj: NwLazyFrame[IntoFrameT]) -> LazyFrame[IntoFrameT]: ...
@overload
def _stableify(obj: NwSeries[IntoSeriesT]) -> Series[IntoSeriesT]: ...
@overload
def _stableify(obj: NwExpr) -> Expr: ...
def _stableify(
obj: NwDataFrame[IntoFrameT]
| NwLazyFrame[IntoFrameT]
| NwSeries[IntoSeriesT]
| NwExpr,
) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT] | Expr:
if isinstance(obj, NwDataFrame):
return DataFrame(obj._compliant_frame._with_version(Version.V2), level=obj._level)
if isinstance(obj, NwLazyFrame):
return LazyFrame(obj._compliant_frame._with_version(Version.V2), level=obj._level)
if isinstance(obj, NwSeries):
return Series(obj._compliant_series._with_version(Version.V2), level=obj._level)
if isinstance(obj, NwExpr):
return Expr(obj._to_compliant_expr, obj._metadata)
assert_never(obj)
@overload
def from_native(native_object: SeriesT, **kwds: Any) -> SeriesT: ...
@overload
def from_native(native_object: DataFrameT, **kwds: Any) -> DataFrameT: ...
@overload
def from_native(native_object: LazyFrameT, **kwds: Any) -> LazyFrameT: ...
@overload
def from_native(
native_object: DataFrameT | LazyFrameT, **kwds: Any
) -> DataFrameT | LazyFrameT: ...
@overload
def from_native(
native_object: IntoDataFrameT | IntoSeries,
*,
pass_through: Literal[True],
eager_only: Literal[False] = ...,
series_only: Literal[False] = ...,
allow_series: Literal[True],
) -> DataFrame[IntoDataFrameT]: ...
@overload
def from_native(
native_object: IntoDataFrameT | IntoSeriesT,
*,
pass_through: Literal[True],
eager_only: Literal[True],
series_only: Literal[False] = ...,
allow_series: Literal[True],
) -> DataFrame[IntoDataFrameT] | Series[IntoSeriesT]: ...
@overload
def from_native(
native_object: IntoDataFrameT,
*,
pass_through: Literal[True],
eager_only: Literal[False] = ...,
series_only: Literal[False] = ...,
allow_series: None = ...,
) -> DataFrame[IntoDataFrameT]: ...
@overload
def from_native(
native_object: T,
*,
pass_through: Literal[True],
eager_only: Literal[False] = ...,
series_only: Literal[False] = ...,
allow_series: None = ...,
) -> T: ...
@overload
def from_native(
native_object: IntoDataFrameT,
*,
pass_through: Literal[True],
eager_only: Literal[True],
series_only: Literal[False] = ...,
allow_series: None = ...,
) -> DataFrame[IntoDataFrameT]: ...
@overload
def from_native(
native_object: T,
*,
pass_through: Literal[True],
eager_only: Literal[True],
series_only: Literal[False] = ...,
allow_series: None = ...,
) -> T: ...
@overload
def from_native(
native_object: IntoSeriesT,
*,
pass_through: Literal[True],
eager_only: Literal[False] = ...,
series_only: Literal[True],
allow_series: None = ...,
) -> Series[IntoSeriesT]: ...
@overload
def from_native(
native_object: IntoDataFrameT,
*,
pass_through: Literal[False] = ...,
eager_only: Literal[False] = ...,
series_only: Literal[False] = ...,
allow_series: None = ...,
) -> DataFrame[IntoDataFrameT]: ...
@overload
def from_native(
native_object: IntoDataFrameT,
*,
pass_through: Literal[False] = ...,
eager_only: Literal[True],
series_only: Literal[False] = ...,
allow_series: None = ...,
) -> DataFrame[IntoDataFrameT]: ...
@overload
def from_native(
native_object: IntoFrame | IntoSeries,
*,
pass_through: Literal[False] = ...,
eager_only: Literal[False] = ...,
series_only: Literal[False] = ...,
allow_series: Literal[True],
) -> DataFrame[Any] | LazyFrame[Any] | Series[Any]: ...
@overload
def from_native(
native_object: IntoSeriesT,
*,
pass_through: Literal[False] = ...,
eager_only: Literal[False] = ...,
series_only: Literal[True],
allow_series: None = ...,
) -> Series[IntoSeriesT]: ...
# All params passed in as variables
@overload
def from_native(
native_object: Any,
*,
pass_through: bool,
eager_only: bool,
series_only: bool,
allow_series: bool | None,
) -> Any: ...
def from_native( # noqa: D417
native_object: IntoFrameT | IntoFrame | IntoSeriesT | IntoSeries | T,
*,
pass_through: bool = False,
eager_only: bool = False,
series_only: bool = False,
allow_series: bool | None = None,
**kwds: Any,
) -> LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T:
"""Convert `native_object` to Narwhals Dataframe, Lazyframe, or Series.
Arguments:
native_object: Raw object from user.
Depending on the other arguments, input object can be
- a Dataframe / Lazyframe / Series supported by Narwhals (pandas, Polars, PyArrow, ...)
- an object which implements `__narwhals_dataframe__`, `__narwhals_lazyframe__`,
or `__narwhals_series__`
pass_through: Determine what happens if the object can't be converted to Narwhals
- `False` (default): raise an error
- `True`: pass object through as-is
eager_only: Whether to only allow eager objects
- `False` (default): don't require `native_object` to be eager
- `True`: only convert to Narwhals if `native_object` is eager
series_only: Whether to only allow Series
- `False` (default): don't require `native_object` to be a Series
- `True`: only convert to Narwhals if `native_object` is a Series
allow_series: Whether to allow Series (default is only Dataframe / Lazyframe)
- `False` or `None` (default): don't convert to Narwhals if `native_object` is a Series
- `True`: allow `native_object` to be a Series
Returns:
DataFrame, LazyFrame, Series, or original object, depending
on which combination of parameters was passed.
"""
# Early returns
if isinstance(native_object, (DataFrame, LazyFrame)) and not series_only:
return native_object
if isinstance(native_object, Series) and (series_only or allow_series):
return native_object
if kwds:
msg = f"from_native() got an unexpected keyword argument {next(iter(kwds))!r}"
raise TypeError(msg)
return _from_native_impl( # type: ignore[no-any-return]
native_object,
pass_through=pass_through,
eager_only=eager_only,
series_only=series_only,
allow_series=allow_series,
version=Version.V2,
)
@overload
def to_native(
narwhals_object: DataFrame[IntoDataFrameT], *, pass_through: Literal[False] = ...
) -> IntoDataFrameT: ...
@overload
def to_native(
narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ...
) -> IntoFrameT: ...
@overload
def to_native(
narwhals_object: Series[IntoSeriesT], *, pass_through: Literal[False] = ...
) -> IntoSeriesT: ...
@overload
def to_native(narwhals_object: Any, *, pass_through: bool) -> Any: ...
def to_native(
narwhals_object: DataFrame[IntoDataFrameT]
| LazyFrame[IntoFrameT]
| Series[IntoSeriesT],
*,
pass_through: bool = False,
) -> IntoFrameT | IntoSeriesT | Any:
"""Convert Narwhals object to native one.
Arguments:
narwhals_object: Narwhals object.
pass_through: Determine what happens if `narwhals_object` isn't a Narwhals class
- `False` (default): raise an error
- `True`: pass object through as-is
Returns:
Object of class that user started with.
"""
return nw.to_native(narwhals_object, pass_through=pass_through)
def narwhalify(
func: Callable[..., Any] | None = None,
*,
pass_through: bool = True,
eager_only: bool = False,
series_only: bool = False,
allow_series: bool | None = True,
) -> Callable[..., Any]:
"""Decorate function so it becomes dataframe-agnostic.
This will try to convert any dataframe/series-like object into the Narwhals
respective DataFrame/Series, while leaving the other parameters as they are.
Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be
converted back to the original dataframe/series type, while if the output is another
type it will be left as is.
By setting `pass_through=False`, then every input and every output will be required to be a
dataframe/series-like object.
Arguments:
func: Function to wrap in a `from_native`-`to_native` block.
pass_through: Determine what happens if the object can't be converted to Narwhals
- `False`: raise an error
- `True` (default): pass object through as-is
eager_only: Whether to only allow eager objects
- `False` (default): don't require `native_object` to be eager
- `True`: only convert to Narwhals if `native_object` is eager
series_only: Whether to only allow Series
- `False` (default): don't require `native_object` to be a Series
- `True`: only convert to Narwhals if `native_object` is a Series
allow_series: Whether to allow Series (default is only Dataframe / Lazyframe)
- `False` or `None`: don't convert to Narwhals if `native_object` is a Series
- `True` (default): allow `native_object` to be a Series
Returns:
Decorated function.
"""
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> Any:
args = [
from_native(
arg,
pass_through=pass_through,
eager_only=eager_only,
series_only=series_only,
allow_series=allow_series,
)
for arg in args
] # type: ignore[assignment]
kwargs = {
name: from_native(
value,
pass_through=pass_through,
eager_only=eager_only,
series_only=series_only,
allow_series=allow_series,
)
for name, value in kwargs.items()
}
backends = {
b()
for v in (*args, *kwargs.values())
if (b := getattr(v, "__native_namespace__", None))
}
if backends.__len__() > 1:
msg = "Found multiple backends. Make sure that all dataframe/series inputs come from the same backend."
raise ValueError(msg)
result = func(*args, **kwargs)
return to_native(result, pass_through=pass_through)
return wrapper
if func is None:
return decorator
else:
# If func is not None, it means the decorator is used without arguments
return decorator(func)
def all() -> Expr:
"""Instantiate an expression representing all columns.
Returns:
A new expression.
"""
return _stableify(nw.all())
def col(*names: str | Iterable[str]) -> Expr:
"""Creates an expression that references one or more columns by their name(s).
Arguments:
names: Name(s) of the columns to use.
Returns:
A new expression.
"""
return _stableify(nw.col(*names))
def exclude(*names: str | Iterable[str]) -> Expr:
"""Creates an expression that excludes columns by their name(s).
Arguments:
names: Name(s) of the columns to exclude.
Returns:
A new expression.
"""
return _stableify(nw.exclude(*names))
def nth(*indices: int | Sequence[int]) -> Expr:
"""Creates an expression that references one or more columns by their index(es).
Notes:
`nth` is not supported for Polars version<1.0.0. Please use
[`narwhals.col`][] instead.
Arguments:
indices: One or more indices representing the columns to retrieve.
Returns:
A new expression.
"""
return _stableify(nw.nth(*indices))
def len() -> Expr:
"""Return the number of rows.
Returns:
A new expression.
"""
return _stableify(nw.len())
def lit(value: NonNestedLiteral, dtype: IntoDType | None = None) -> Expr:
"""Return an expression representing a literal value.
Arguments:
value: The value to use as literal.
dtype: The data type of the literal value. If not provided, the data type will
be inferred by the native library.
Returns:
A new expression.
"""
return _stableify(nw.lit(value, dtype))
def min(*columns: str) -> Expr:
"""Return the minimum value.
Note:
Syntactic sugar for ``nw.col(columns).min()``.
Arguments:
columns: Name(s) of the columns to use in the aggregation function.
Returns:
A new expression.
"""
return _stableify(nw.min(*columns))
def max(*columns: str) -> Expr:
"""Return the maximum value.
Note:
Syntactic sugar for ``nw.col(columns).max()``.
Arguments:
columns: Name(s) of the columns to use in the aggregation function.
Returns:
A new expression.
"""
return _stableify(nw.max(*columns))
def mean(*columns: str) -> Expr:
"""Get the mean value.
Note:
Syntactic sugar for ``nw.col(columns).mean()``
Arguments:
columns: Name(s) of the columns to use in the aggregation function
Returns:
A new expression.
"""
return _stableify(nw.mean(*columns))
def median(*columns: str) -> Expr:
"""Get the median value.
Notes:
- Syntactic sugar for ``nw.col(columns).median()``
- Results might slightly differ across backends due to differences in the
underlying algorithms used to compute the median.
Arguments:
columns: Name(s) of the columns to use in the aggregation function
Returns:
A new expression.
"""
return _stableify(nw.median(*columns))
def sum(*columns: str) -> Expr:
"""Sum all values.
Note:
Syntactic sugar for ``nw.col(columns).sum()``
Arguments:
columns: Name(s) of the columns to use in the aggregation function
Returns:
A new expression.
"""
return _stableify(nw.sum(*columns))
def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
"""Sum all values horizontally across columns.
Warning:
Unlike Polars, we support horizontal sum over numeric columns only.
Arguments:
exprs: Name(s) of the columns to use in the aggregation function. Accepts
expression input.
Returns:
A new expression.
"""
return _stableify(nw.sum_horizontal(*exprs))
def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr], ignore_nulls: bool) -> Expr:
r"""Compute the bitwise AND horizontally across columns.
Arguments:
exprs: Name(s) of the columns to use in the aggregation function. Accepts
expression input.
ignore_nulls: Whether to ignore nulls:
- If `True`, null values are ignored. If there are no elements, the result
is `True`.
- If `False`, Kleene logic is followed. Note that this is not allowed for
pandas with classical NumPy dtypes when null values are present.
Returns:
A new expression.
"""
return _stableify(nw.all_horizontal(*exprs, ignore_nulls=ignore_nulls))
def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr], ignore_nulls: bool) -> Expr:
r"""Compute the bitwise OR horizontally across columns.
Arguments:
exprs: Name(s) of the columns to use in the aggregation function. Accepts
expression input.
ignore_nulls: Whether to ignore nulls:
- If `True`, null values are ignored. If there are no elements, the result
is `False`.
- If `False`, Kleene logic is followed. Note that this is not allowed for
pandas with classical NumPy dtypes when null values are present.
Returns:
A new expression.
"""
return _stableify(nw.any_horizontal(*exprs, ignore_nulls=ignore_nulls))
def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
"""Compute the mean of all values horizontally across columns.
Arguments:
exprs: Name(s) of the columns to use in the aggregation function. Accepts
expression input.
Returns:
A new expression.
"""
return _stableify(nw.mean_horizontal(*exprs))
def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
"""Get the minimum value horizontally across columns.
Notes:
We support `min_horizontal` over numeric columns only.
Arguments:
exprs: Name(s) of the columns to use in the aggregation function. Accepts
expression input.
Returns:
A new expression.
"""
return _stableify(nw.min_horizontal(*exprs))
def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
"""Get the maximum value horizontally across columns.
Notes:
We support `max_horizontal` over numeric columns only.
Arguments:
exprs: Name(s) of the columns to use in the aggregation function. Accepts
expression input.
Returns:
A new expression.
"""
return _stableify(nw.max_horizontal(*exprs))
def concat_str(
exprs: IntoExpr | Iterable[IntoExpr],
*more_exprs: IntoExpr,
separator: str = "",
ignore_nulls: bool = False,
) -> Expr:
r"""Horizontally concatenate columns into a single string column.
Arguments:
exprs: Columns to concatenate into a single string column. Accepts expression
input. Strings are parsed as column names, other non-expression inputs are
parsed as literals. Non-`String` columns are cast to `String`.
*more_exprs: Additional columns to concatenate into a single string column,
specified as positional arguments.
separator: String that will be used to separate the values of each column.
ignore_nulls: Ignore null values (default is `False`).
If set to `False`, null values will be propagated and if the row contains any
null values, the output is null.
Returns:
A new expression.
"""
return _stableify(
nw.concat_str(exprs, *more_exprs, separator=separator, ignore_nulls=ignore_nulls)
)
def coalesce(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
"""Folds the columns from left to right, keeping the first non-null value.
Arguments:
exprs: Columns to coalesce, must be a str, nw.Expr, or nw.Series
where strings are parsed as column names and both nw.Expr/nw.Series
are passed through as-is. Scalar values must be wrapped in `nw.lit`.
*more_exprs: Additional columns to coalesce, specified as positional arguments.
Raises:
TypeError: If any of the inputs are not a str, nw.Expr, or nw.Series.
Returns:
A new expression.
"""
return _stableify(nw.coalesce(exprs, *more_exprs))
class When(nw_f.When):
@classmethod
def from_when(cls, when: nw_f.When) -> When:
return cls(when._predicate)
def then(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Then:
return Then.from_then(super().then(value))
class Then(nw_f.Then, Expr):
@classmethod
def from_then(cls, then: nw_f.Then) -> Then:
return cls(then._to_compliant_expr, then._metadata)
def otherwise(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Expr:
return _stableify(super().otherwise(value))
def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
"""Start a `when-then-otherwise` expression.
Expression similar to an `if-else` statement in Python. Always initiated by a
`pl.when(<condition>).then(<value if condition>)`, and optionally followed by a
`.otherwise(<value if condition is false>)` can be appended at the end. If not
appended, and the condition is not `True`, `None` will be returned.
Info:
Chaining multiple `.when(<condition>).then(<value>)` statements is currently
not supported.
See [Narwhals#668](https://github.com/narwhals-dev/narwhals/issues/668).
Arguments:
predicates: Condition(s) that must be met in order to apply the subsequent
statement. Accepts one or more boolean expressions, which are implicitly
combined with `&`. String input is parsed as a column name.
Returns:
A "when" object, which `.then` can be called on.
"""
return When.from_when(nw_f.when(*predicates))
def new_series(
name: str,
values: Any,
dtype: IntoDType | None = None,
*,
backend: ModuleType | Implementation | str,
) -> Series[Any]:
"""Instantiate Narwhals Series from iterable (e.g. list or array).
Arguments:
name: Name of resulting Series.
values: Values of make Series from.
dtype: (Narwhals) dtype. If not provided, the native library
may auto-infer it from `values`.
backend: specifies which eager backend instantiate to.
`backend` can be specified in various ways
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
Returns:
A new Series
"""
return _stableify(_new_series_impl(name, values, dtype, backend=backend))
def from_arrow(
native_frame: IntoArrowTable, *, backend: ModuleType | Implementation | str
) -> DataFrame[Any]:
"""Construct a DataFrame from an object which supports the PyCapsule Interface.
Arguments:
native_frame: Object which implements `__arrow_c_stream__`.
backend: specifies which eager backend instantiate to.
`backend` can be specified in various ways
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
Returns:
A new DataFrame.
"""
return _stableify(nw_f.from_arrow(native_frame, backend=backend))
def from_dict(
data: Mapping[str, Any],
schema: Mapping[str, DType] | Schema | None = None,
*,
backend: ModuleType | Implementation | str | None = None,
) -> DataFrame[Any]:
"""Instantiate DataFrame from dictionary.
Indexes (if present, for pandas-like backends) are aligned following
the [left-hand-rule](../concepts/pandas_index.md/).
Notes:
For pandas-like dataframes, conversion to schema is applied after dataframe
creation.
Arguments:
data: Dictionary to create DataFrame from.
schema: The DataFrame schema as Schema or dict of {name: type}. If not
specified, the schema will be inferred by the native library.
backend: specifies which eager backend instantiate to. Only
necessary if inputs are not Narwhals Series.
`backend` can be specified in various ways
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
Returns:
A new DataFrame.
"""
return _stableify(nw_f.from_dict(data, schema, backend=backend))
def from_numpy(
data: _2DArray,
schema: Mapping[str, DType] | Schema | Sequence[str] | None = None,
*,
backend: ModuleType | Implementation | str,
) -> DataFrame[Any]:
"""Construct a DataFrame from a NumPy ndarray.
Notes:
Only row orientation is currently supported.
For pandas-like dataframes, conversion to schema is applied after dataframe
creation.
Arguments:
data: Two-dimensional data represented as a NumPy ndarray.
schema: The DataFrame schema as Schema, dict of {name: type}, or a sequence of str.
backend: specifies which eager backend instantiate to.
`backend` can be specified in various ways
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
Returns:
A new DataFrame.
"""
return _stableify(nw_f.from_numpy(data, schema, backend=backend))
def read_csv(
source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
) -> DataFrame[Any]:
"""Read a CSV file into a DataFrame.
Arguments:
source: Path to a file.
backend: The eager backend for DataFrame creation.
`backend` can be specified in various ways
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
kwargs: Extra keyword arguments which are passed to the native CSV reader.
For example, you could use
`nw.read_csv('file.csv', backend='pandas', engine='pyarrow')`.
Returns:
DataFrame.
"""
return _stableify(nw_f.read_csv(source, backend=backend, **kwargs))
def scan_csv(
source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
) -> LazyFrame[Any]:
"""Lazily read from a CSV file.
For the libraries that do not support lazy dataframes, the function reads
a csv file eagerly and then converts the resulting dataframe to a lazyframe.
Arguments:
source: Path to a file.
backend: The eager backend for DataFrame creation.
`backend` can be specified in various ways
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
kwargs: Extra keyword arguments which are passed to the native CSV reader.
For example, you could use
`nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
Returns:
LazyFrame.
"""
return _stableify(nw_f.scan_csv(source, backend=backend, **kwargs))
def read_parquet(
source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
) -> DataFrame[Any]:
"""Read into a DataFrame from a parquet file.
Arguments:
source: Path to a file.
backend: The eager backend for DataFrame creation.
`backend` can be specified in various ways
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN` or `CUDF`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
kwargs: Extra keyword arguments which are passed to the native parquet reader.
For example, you could use
`nw.read_parquet('file.parquet', backend=pd, engine='pyarrow')`.
Returns:
DataFrame.
"""
return _stableify(nw_f.read_parquet(source, backend=backend, **kwargs))
def scan_parquet(
source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
) -> LazyFrame[Any]:
"""Lazily read from a parquet file.
For the libraries that do not support lazy dataframes, the function reads
a parquet file eagerly and then converts the resulting dataframe to a lazyframe.
Note:
Spark like backends require a session object to be passed in `kwargs`.
For instance:
```py
import narwhals as nw
from sqlframe.duckdb import DuckDBSession
nw.scan_parquet(source, backend="sqlframe", session=DuckDBSession())
```
Arguments:
source: Path to a file.
backend: The eager backend for DataFrame creation.
`backend` can be specified in various ways
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
`POLARS`, `MODIN`, `CUDF`, `PYSPARK` or `SQLFRAME`.
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"`, `"cudf"`,
`"pyspark"` or `"sqlframe"`.
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin`, `cudf`,
`pyspark.sql` or `sqlframe`.
kwargs: Extra keyword arguments which are passed to the native parquet reader.
For example, you could use
`nw.scan_parquet('file.parquet', backend=pd, engine='pyarrow')`.
Returns:
LazyFrame.
"""
return _stableify(nw_f.scan_parquet(source, backend=backend, **kwargs))
__all__ = [
"Array",
"Binary",
"Boolean",
"Categorical",
"DataFrame",
"Date",
"Datetime",
"Decimal",
"Duration",
"Enum",
"Expr",
"Field",
"Float32",
"Float64",
"Implementation",
"Int8",
"Int16",
"Int32",
"Int64",
"Int128",
"LazyFrame",
"List",
"Object",
"Schema",
"Series",
"String",
"Struct",
"Time",
"UInt8",
"UInt16",
"UInt32",
"UInt64",
"UInt128",
"Unknown",
"all",
"all_horizontal",
"any_horizontal",
"coalesce",
"col",
"concat",
"concat_str",
"dependencies",
"dtypes",
"dtypes",
"exceptions",
"exclude",
"from_arrow",
"from_dict",
"from_native",
"from_numpy",
"generate_temporary_column_name",
"get_native_namespace",
"is_ordered_categorical",
"len",
"lit",
"max",
"max_horizontal",
"maybe_align_index",
"maybe_convert_dtypes",
"maybe_get_index",
"maybe_reset_index",
"maybe_set_index",
"mean",
"mean_horizontal",
"median",
"min",
"min_horizontal",
"narwhalify",
"new_series",
"nth",
"read_csv",
"read_parquet",
"scan_csv",
"scan_parquet",
"selectors",
"selectors",
"show_versions",
"sum",
"sum_horizontal",
"to_native",
"to_py_scalar",
"when",
]