from __future__ import annotations

from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union

from narwhals._compliant import CompliantDataFrame, CompliantLazyFrame, CompliantSeries

if TYPE_CHECKING:
    import datetime as dt
    from collections.abc import Iterable, Sequence, Sized
    from decimal import Decimal
    from types import ModuleType

    import numpy as np
    from typing_extensions import TypeAlias

    from narwhals import dtypes
    from narwhals.dataframe import DataFrame, LazyFrame
    from narwhals.expr import Expr
    from narwhals.series import Series

    # All dataframes supported by Narwhals have a
    # `columns` property. Their similarities don't extend
    # _that_ much further unfortunately...
    class NativeFrame(Protocol):
        @property
        def columns(self) -> Any: ...

        def join(self, *args: Any, **kwargs: Any) -> Any: ...

    class NativeLazyFrame(NativeFrame, Protocol):
        def explain(self, *args: Any, **kwargs: Any) -> Any: ...

    class NativeSeries(Sized, Iterable[Any], Protocol):
        def filter(self, *args: Any, **kwargs: Any) -> Any: ...

    class DataFrameLike(Protocol):
        def __dataframe__(self, *args: Any, **kwargs: Any) -> Any: ...

    class SupportsNativeNamespace(Protocol):
        def __native_namespace__(self) -> ModuleType: ...

    # ruff: noqa: N802
    class DTypes(Protocol):
        @property
        def Decimal(self) -> type[dtypes.Decimal]: ...
        @property
        def Int128(self) -> type[dtypes.Int128]: ...
        @property
        def Int64(self) -> type[dtypes.Int64]: ...
        @property
        def Int32(self) -> type[dtypes.Int32]: ...
        @property
        def Int16(self) -> type[dtypes.Int16]: ...
        @property
        def Int8(self) -> type[dtypes.Int8]: ...
        @property
        def UInt128(self) -> type[dtypes.UInt128]: ...
        @property
        def UInt64(self) -> type[dtypes.UInt64]: ...
        @property
        def UInt32(self) -> type[dtypes.UInt32]: ...
        @property
        def UInt16(self) -> type[dtypes.UInt16]: ...
        @property
        def UInt8(self) -> type[dtypes.UInt8]: ...
        @property
        def Float64(self) -> type[dtypes.Float64]: ...
        @property
        def Float32(self) -> type[dtypes.Float32]: ...
        @property
        def String(self) -> type[dtypes.String]: ...
        @property
        def Boolean(self) -> type[dtypes.Boolean]: ...
        @property
        def Object(self) -> type[dtypes.Object]: ...
        @property
        def Categorical(self) -> type[dtypes.Categorical]: ...
        @property
        def Enum(self) -> type[dtypes.Enum]: ...
        @property
        def Datetime(self) -> type[dtypes.Datetime]: ...
        @property
        def Duration(self) -> type[dtypes.Duration]: ...
        @property
        def Date(self) -> type[dtypes.Date]: ...
        @property
        def Field(self) -> type[dtypes.Field]: ...
        @property
        def Struct(self) -> type[dtypes.Struct]: ...
        @property
        def List(self) -> type[dtypes.List]: ...
        @property
        def Array(self) -> type[dtypes.Array]: ...
        @property
        def Unknown(self) -> type[dtypes.Unknown]: ...
        @property
        def Time(self) -> type[dtypes.Time]: ...
        @property
        def Binary(self) -> type[dtypes.Binary]: ...


IntoExpr: TypeAlias = Union["Expr", str, "Series[Any]"]
"""Anything which can be converted to an expression.

Use this to mean "either a Narwhals expression, or something which can be converted
into one". For example, `exprs` in `DataFrame.select` is typed to accept `IntoExpr`,
as it can either accept a `nw.Expr` (e.g. `df.select(nw.col('a'))`) or a string
which will be interpreted as a `nw.Expr`, e.g. `df.select('a')`.
"""

IntoDataFrame: TypeAlias = Union["NativeFrame", "DataFrameLike"]
"""Anything which can be converted to a Narwhals DataFrame.

Use this if your function accepts a narwhalifiable object but doesn't care about its backend.

Examples:
    >>> import narwhals as nw
    >>> from narwhals.typing import IntoDataFrame
    >>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
    ...     df = nw.from_native(df_native, eager_only=True)
    ...     return df.shape
"""

IntoLazyFrame: TypeAlias = "NativeLazyFrame"

IntoFrame: TypeAlias = Union["IntoDataFrame", "IntoLazyFrame"]
"""Anything which can be converted to a Narwhals DataFrame or LazyFrame.

Use this if your function can accept an object which can be converted to either
`nw.DataFrame` or `nw.LazyFrame` and it doesn't care about its backend.

Examples:
    >>> import narwhals as nw
    >>> from narwhals.typing import IntoFrame
    >>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
    ...     df = nw.from_native(df_native)
    ...     return df.collect_schema().names()
"""

Frame: TypeAlias = Union["DataFrame[Any]", "LazyFrame[Any]"]
"""Narwhals DataFrame or Narwhals LazyFrame.

Use this if your function can work with either and your function doesn't care
about its backend.

Examples:
    >>> import narwhals as nw
    >>> from narwhals.typing import Frame
    >>> @nw.narwhalify
    ... def agnostic_columns(df: Frame) -> list[str]:
    ...     return df.columns
"""

IntoSeries: TypeAlias = "NativeSeries"
"""Anything which can be converted to a Narwhals Series.

Use this if your function can accept an object which can be converted to `nw.Series`
and it doesn't care about its backend.

Examples:
    >>> from typing import Any
    >>> import narwhals as nw
    >>> from narwhals.typing import IntoSeries
    >>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]:
    ...     s = nw.from_native(s_native)
    ...     return s.to_list()
"""

IntoFrameT = TypeVar("IntoFrameT", bound="IntoFrame")
"""TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame.

Use this if your function accepts an object which is convertible to `nw.DataFrame`
or `nw.LazyFrame` and returns an object of the same type.

Examples:
    >>> import narwhals as nw
    >>> from narwhals.typing import IntoFrameT
    >>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT:
    ...     df = nw.from_native(df_native)
    ...     return df.with_columns(c=nw.col("a") + 1).to_native()
"""

IntoDataFrameT = TypeVar("IntoDataFrameT", bound="IntoDataFrame")
"""TypeVar bound to object convertible to Narwhals DataFrame.

Use this if your function accepts an object which can be converted to `nw.DataFrame`
and returns an object of the same class.

Examples:
    >>> import narwhals as nw
    >>> from narwhals.typing import IntoDataFrameT
    >>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT:
    ...     df = nw.from_native(df_native, eager_only=True)
    ...     return df.with_columns(c=df["a"] + 1).to_native()
"""

IntoLazyFrameT = TypeVar("IntoLazyFrameT", bound="IntoLazyFrame")

FrameT = TypeVar("FrameT", "DataFrame[Any]", "LazyFrame[Any]")
"""TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame.

Use this if your function accepts either `nw.DataFrame` or `nw.LazyFrame` and returns
an object of the same kind.

Examples:
    >>> import narwhals as nw
    >>> from narwhals.typing import FrameT
    >>> @nw.narwhalify
    ... def agnostic_func(df: FrameT) -> FrameT:
    ...     return df.with_columns(c=nw.col("a") + 1)
"""

DataFrameT = TypeVar("DataFrameT", bound="DataFrame[Any]")
"""TypeVar bound to Narwhals DataFrame.

Use this if your function can accept a Narwhals DataFrame and returns a Narwhals
DataFrame backed by the same backend.

Examples:
    >>> import narwhals as nw
    >>> from narwhals.typing import DataFrameT
    >>> @nw.narwhalify
    >>> def func(df: DataFrameT) -> DataFrameT:
    ...     return df.with_columns(c=df["a"] + 1)
"""

LazyFrameT = TypeVar("LazyFrameT", bound="LazyFrame[Any]")
SeriesT = TypeVar("SeriesT", bound="Series[Any]")

IntoSeriesT = TypeVar("IntoSeriesT", bound="IntoSeries")
"""TypeVar bound to object convertible to Narwhals Series.

Use this if your function accepts an object which can be converted to `nw.Series`
and returns an object of the same class.

Examples:
    >>> import narwhals as nw
    >>> from narwhals.typing import IntoSeriesT
    >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT:
    ...     s = nw.from_native(s_native, series_only=True)
    ...     return s.abs().to_native()
"""

DTypeBackend: TypeAlias = 'Literal["pyarrow", "numpy_nullable"] | None'
SizeUnit: TypeAlias = Literal[
    "b",
    "kb",
    "mb",
    "gb",
    "tb",
    "bytes",
    "kilobytes",
    "megabytes",
    "gigabytes",
    "terabytes",
]

TimeUnit: TypeAlias = Literal["ns", "us", "ms", "s"]

AsofJoinStrategy: TypeAlias = Literal["backward", "forward", "nearest"]
"""Join strategy.

- *"backward"*: Selects the last row in the right DataFrame whose `on` key
    is less than or equal to the left's key.
- *"forward"*: Selects the first row in the right DataFrame whose `on` key
    is greater than or equal to the left's key.
- *"nearest"*: Search selects the last row in the right DataFrame whose value
    is nearest to the left's key.
"""

ClosedInterval: TypeAlias = Literal["left", "right", "none", "both"]
"""Define which sides of the interval are closed (inclusive)."""

ConcatMethod: TypeAlias = Literal["horizontal", "vertical", "diagonal"]
"""Concatenating strategy.

- *"vertical"*: Concatenate vertically. Column names must match.
- *"horizontal"*: Concatenate horizontally. If lengths don't match, then
    missing rows are filled with null values.
- *"diagonal"*: Finds a union between the column schemas and fills missing
    column values with null.
"""

FillNullStrategy: TypeAlias = Literal["forward", "backward"]
"""Strategy used to fill null values."""

JoinStrategy: TypeAlias = Literal["inner", "left", "full", "cross", "semi", "anti"]
"""Join strategy.

- *"inner"*: Returns rows that have matching values in both tables.
- *"left"*: Returns all rows from the left table, and the matched rows from
    the right table.
- *"full"*: Returns all rows in both dataframes, with the `suffix` appended to
    the right join keys.
- *"cross"*: Returns the Cartesian product of rows from both tables.
- *"semi"*: Filter rows that have a match in the right table.
- *"anti"*: Filter rows that do not have a match in the right table.
"""

PivotAgg: TypeAlias = Literal[
    "min", "max", "first", "last", "sum", "mean", "median", "len"
]
"""A predefined aggregate function string."""

RankMethod: TypeAlias = Literal["average", "min", "max", "dense", "ordinal"]
"""The method used to assign ranks to tied elements.

- *"average"*: The average of the ranks that would have been assigned to
    all the tied values is assigned to each value.
- *"min"*: The minimum of the ranks that would have been assigned to all
    the tied values is assigned to each value. (This is also referred to
    as "competition" ranking.)
- *"max"*: The maximum of the ranks that would have been assigned to all
    the tied values is assigned to each value.
- *"dense"*: Like "min", but the rank of the next highest element is
    assigned the rank immediately after those assigned to the tied elements.
- *"ordinal"*: All values are given a distinct rank, corresponding to the
    order that the values occur in the Series.
"""

RollingInterpolationMethod: TypeAlias = Literal[
    "nearest", "higher", "lower", "midpoint", "linear"
]
"""Interpolation method."""

UniqueKeepStrategy: TypeAlias = Literal["any", "first", "last", "none"]
"""Which of the duplicate rows to keep.

- *"any"*: Does not give any guarantee of which row is kept.
    This allows more optimizations.
- *"none"*: Don't keep duplicate rows.
- *"first"*: Keep first unique row.
- *"last"*: Keep last unique row.
"""

LazyUniqueKeepStrategy: TypeAlias = Literal["any", "none"]
"""Which of the duplicate rows to keep.

- *"any"*: Does not give any guarantee of which row is kept.
- *"none"*: Don't keep duplicate rows.
"""


_ShapeT = TypeVar("_ShapeT", bound="tuple[int, ...]")
_NDArray: TypeAlias = "np.ndarray[_ShapeT, Any]"
_1DArray: TypeAlias = "_NDArray[tuple[int]]"  # noqa: PYI042
_1DArrayInt: TypeAlias = "np.ndarray[tuple[int], np.dtype[np.integer[Any]]]"  # noqa: PYI042
_2DArray: TypeAlias = "_NDArray[tuple[int, int]]"  # noqa: PYI042, PYI047
_AnyDArray: TypeAlias = "_NDArray[tuple[int, ...]]"  # noqa: PYI047
_NumpyScalar: TypeAlias = "np.generic[Any]"
Into1DArray: TypeAlias = "_1DArray | _NumpyScalar"
"""A 1-dimensional `numpy.ndarray` or scalar that can be converted into one."""


NumericLiteral: TypeAlias = "int | float | Decimal"
TemporalLiteral: TypeAlias = "dt.date | dt.datetime | dt.time | dt.timedelta"
NonNestedLiteral: TypeAlias = (
    "NumericLiteral | TemporalLiteral | str | bool | bytes | None"
)
PythonLiteral: TypeAlias = "NonNestedLiteral | list[Any] | tuple[Any, ...]"

NonNestedDType: TypeAlias = "dtypes.NumericType | dtypes.TemporalType | dtypes.String | dtypes.Boolean | dtypes.Binary | dtypes.Categorical | dtypes.Unknown | dtypes.Object"
"""Any Narwhals DType that does not have required arguments."""

IntoDType: TypeAlias = "dtypes.DType | type[NonNestedDType]"
"""Anything that can be converted into a Narwhals DType.

Examples:
    >>> import polars as pl
    >>> import narwhals as nw
    >>> df_native = pl.DataFrame({"a": [1, 2, 3], "b": [4.0, 5.0, 6.0]})
    >>> df = nw.from_native(df_native)
    >>> df.select(
    ...     nw.col("a").cast(nw.Int32),
    ...     nw.col("b").cast(nw.String()).str.split(".").cast(nw.List(nw.Int8)),
    ... )
    ┌──────────────────┐
    |Narwhals DataFrame|
    |------------------|
    |shape: (3, 2)     |
    |┌─────┬──────────┐|
    |│ a   ┆ b        │|
    |│ --- ┆ ---      │|
    |│ i32 ┆ list[i8] │|
    |╞═════╪══════════╡|
    |│ 1   ┆ [4, 0]   │|
    |│ 2   ┆ [5, 0]   │|
    |│ 3   ┆ [6, 0]   │|
    |└─────┴──────────┘|
    └──────────────────┘
"""


# Annotations for `__getitem__` methods
_T = TypeVar("_T")
_Slice: TypeAlias = "slice[_T, Any, Any] | slice[Any, _T, Any] | slice[None, None, _T]"
_SliceNone: TypeAlias = "slice[None, None, None]"
# Index/column positions
SingleIndexSelector: TypeAlias = int
_SliceIndex: TypeAlias = "_Slice[int] | _SliceNone"
"""E.g. `[1:]` or `[:3]` or `[::2]`."""
SizedMultiIndexSelector: TypeAlias = "Sequence[int] | _T | _1DArrayInt"
MultiIndexSelector: TypeAlias = "_SliceIndex | SizedMultiIndexSelector[_T]"
# Labels/column names
SingleNameSelector: TypeAlias = str
_SliceName: TypeAlias = "_Slice[str] | _SliceNone"
SizedMultiNameSelector: TypeAlias = "Sequence[str] | _T | _1DArray"
MultiNameSelector: TypeAlias = "_SliceName | SizedMultiNameSelector[_T]"
# Mixed selectors
SingleColSelector: TypeAlias = "SingleIndexSelector | SingleNameSelector"
MultiColSelector: TypeAlias = "MultiIndexSelector[_T] | MultiNameSelector[_T]"


__all__ = [
    "CompliantDataFrame",
    "CompliantLazyFrame",
    "CompliantSeries",
    "DataFrameT",
    "Frame",
    "FrameT",
    "IntoDataFrame",
    "IntoDataFrameT",
    "IntoExpr",
    "IntoFrame",
    "IntoFrameT",
    "IntoSeries",
    "IntoSeriesT",
]