team-10/env/Lib/site-packages/narwhals/dependencies.py
2025-08-02 07:34:44 +02:00

589 lines
18 KiB
Python

# pandas / Polars / etc. : if a user passes a dataframe from one of these
# libraries, it means they must already have imported the given module.
# So, we can just check sys.modules.
from __future__ import annotations
import sys
from typing import TYPE_CHECKING, Any
from narwhals._exceptions import issue_warning
if TYPE_CHECKING:
import cudf
import dask.dataframe as dd
import duckdb
import ibis
import modin.pandas as mpd
import pandas as pd
import polars as pl
import pyarrow as pa
import pyspark.sql as pyspark_sql
from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame
from typing_extensions import TypeGuard, TypeIs
from narwhals._spark_like.dataframe import SQLFrameDataFrame
from narwhals.dataframe import DataFrame, LazyFrame
from narwhals.series import Series
from narwhals.typing import (
IntoDataFrameT,
IntoLazyFrameT,
IntoSeriesT,
_1DArray,
_1DArrayInt,
_2DArray,
_NDArray,
_NumpyScalar,
_ShapeT,
)
# We silently allow these but - given that they claim
# to be drop-in replacements for pandas - testing is
# their responsibility.
IMPORT_HOOKS = frozenset(["fireducks"])
def get_polars() -> Any:
"""Get Polars module (if already imported - else return None)."""
return sys.modules.get("polars", None)
def get_pandas() -> Any:
"""Get pandas module (if already imported - else return None)."""
return sys.modules.get("pandas", None)
def get_modin() -> Any: # pragma: no cover
"""Get modin.pandas module (if already imported - else return None)."""
if (modin := sys.modules.get("modin", None)) is not None:
return modin.pandas
return None
def get_cudf() -> Any:
"""Get cudf module (if already imported - else return None)."""
return sys.modules.get("cudf", None)
def get_cupy() -> Any:
"""Get cupy module (if already imported - else return None)."""
return sys.modules.get("cupy", None)
def get_pyarrow() -> Any: # pragma: no cover
"""Get pyarrow module (if already imported - else return None)."""
return sys.modules.get("pyarrow", None)
def get_numpy() -> Any:
"""Get numpy module (if already imported - else return None)."""
return sys.modules.get("numpy", None)
def get_dask() -> Any: # pragma: no cover
"""Get dask (if already imported - else return None)."""
return sys.modules.get("dask", None)
def get_dask_dataframe() -> Any:
"""Get dask.dataframe module (if already imported - else return None)."""
return sys.modules.get("dask.dataframe", None)
def get_duckdb() -> Any:
"""Get duckdb module (if already imported - else return None)."""
return sys.modules.get("duckdb", None)
def get_ibis() -> Any:
"""Get ibis module (if already imported - else return None)."""
return sys.modules.get("ibis", None)
def get_dask_expr() -> Any: # pragma: no cover
"""Get dask_expr module (if already imported - else return None)."""
if (dd := get_dask_dataframe()) is not None and hasattr(dd, "dask_expr"):
return dd.dask_expr
return sys.modules.get("dask_expr", None)
def get_pyspark() -> Any: # pragma: no cover
"""Get pyspark module (if already imported - else return None)."""
return sys.modules.get("pyspark", None)
def get_pyspark_sql() -> Any:
"""Get pyspark.sql module (if already imported - else return None)."""
return sys.modules.get("pyspark.sql", None)
def get_pyspark_connect() -> Any:
"""Get pyspark.sql.connect module (if already imported - else return None)."""
return sys.modules.get("pyspark.sql.connect", None)
def get_sqlframe() -> Any:
"""Get sqlframe module (if already imported - else return None)."""
return sys.modules.get("sqlframe", None)
def _warn_if_narwhals_df_or_lf(df: Any) -> None:
if is_narwhals_dataframe(df) or is_narwhals_lazyframe(df):
msg = (
f"You passed a `{type(df)}` to `is_pandas_dataframe`.\n\n"
"Hint: Instead of e.g. `is_pandas_dataframe(df)`, "
"did you mean `is_pandas_dataframe(df.to_native())`?"
)
issue_warning(msg, UserWarning)
def _warn_if_narwhals_series(ser: Any) -> None:
if is_narwhals_series(ser):
msg = (
f"You passed a `{type(ser)}` to `is_pandas_series`.\n\n"
"Hint: Instead of e.g. `is_pandas_series(ser)`, "
"did you mean `is_pandas_series(ser.to_native())`?"
)
issue_warning(msg, UserWarning)
def is_pandas_dataframe(df: Any) -> TypeIs[pd.DataFrame]:
"""Check whether `df` is a pandas DataFrame without importing pandas.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return ((pd := get_pandas()) is not None and isinstance(df, pd.DataFrame)) or any(
(mod := sys.modules.get(module_name, None)) is not None
and isinstance(df, mod.pandas.DataFrame)
for module_name in IMPORT_HOOKS
)
def is_pandas_series(ser: Any) -> TypeIs[pd.Series[Any]]:
"""Check whether `ser` is a pandas Series without importing pandas.
Warning:
This method cannot be called on Narwhals Series.
"""
_warn_if_narwhals_series(ser)
return ((pd := get_pandas()) is not None and isinstance(ser, pd.Series)) or any(
(mod := sys.modules.get(module_name, None)) is not None
and isinstance(ser, mod.pandas.Series)
for module_name in IMPORT_HOOKS
)
def is_pandas_index(index: Any) -> TypeIs[pd.Index[Any]]:
"""Check whether `index` is a pandas Index without importing pandas."""
return ((pd := get_pandas()) is not None and isinstance(index, pd.Index)) or any(
(mod := sys.modules.get(module_name, None)) is not None
and isinstance(index, mod.pandas.Index)
for module_name in IMPORT_HOOKS
)
def is_modin_dataframe(df: Any) -> TypeIs[mpd.DataFrame]:
"""Check whether `df` is a modin DataFrame without importing modin.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame)
def is_modin_series(ser: Any) -> TypeIs[mpd.Series]:
"""Check whether `ser` is a modin Series without importing modin.
Warning:
This method cannot be called on Narwhals Series.
"""
_warn_if_narwhals_series(ser)
return (mpd := get_modin()) is not None and isinstance(ser, mpd.Series)
def is_modin_index(index: Any) -> TypeIs[mpd.Index[Any]]: # pragma: no cover
"""Check whether `index` is a modin Index without importing modin."""
return (mpd := get_modin()) is not None and isinstance(index, mpd.Index)
def is_cudf_dataframe(df: Any) -> TypeIs[cudf.DataFrame]:
"""Check whether `df` is a cudf DataFrame without importing cudf.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return (cudf := get_cudf()) is not None and isinstance(df, cudf.DataFrame)
def is_cudf_series(ser: Any) -> TypeIs[cudf.Series[Any]]:
"""Check whether `ser` is a cudf Series without importing cudf.
Warning:
This method cannot be called on Narwhals Series.
"""
_warn_if_narwhals_series(ser)
return (cudf := get_cudf()) is not None and isinstance(ser, cudf.Series)
def is_cudf_index(index: Any) -> TypeIs[cudf.Index]:
"""Check whether `index` is a cudf Index without importing cudf."""
return (cudf := get_cudf()) is not None and isinstance(
index, cudf.Index
) # pragma: no cover
def is_cupy_scalar(obj: Any) -> bool:
return (
(cupy := get_cupy()) is not None
and isinstance(obj, cupy.ndarray)
and obj.size == 1
) # pragma: no cover
def is_dask_dataframe(df: Any) -> TypeIs[dd.DataFrame]:
"""Check whether `df` is a Dask DataFrame without importing Dask.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return (dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame)
def is_duckdb_relation(df: Any) -> TypeIs[duckdb.DuckDBPyRelation]:
"""Check whether `df` is a DuckDB Relation without importing DuckDB.
Warning:
This method cannot be called on Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return (duckdb := get_duckdb()) is not None and isinstance(
df, duckdb.DuckDBPyRelation
)
def is_ibis_table(df: Any) -> TypeIs[ibis.Table]:
"""Check whether `df` is a Ibis Table without importing Ibis.
Warning:
This method cannot be called on Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return (ibis := get_ibis()) is not None and isinstance(df, ibis.expr.types.Table)
def is_polars_dataframe(df: Any) -> TypeIs[pl.DataFrame]:
"""Check whether `df` is a Polars DataFrame without importing Polars.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return (pl := get_polars()) is not None and isinstance(df, pl.DataFrame)
def is_polars_lazyframe(df: Any) -> TypeIs[pl.LazyFrame]:
"""Check whether `df` is a Polars LazyFrame without importing Polars.
Warning:
This method cannot be called on Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return (pl := get_polars()) is not None and isinstance(df, pl.LazyFrame)
def is_polars_series(ser: Any) -> TypeIs[pl.Series]:
"""Check whether `ser` is a Polars Series without importing Polars.
Warning:
This method cannot be called on Narwhals Series.
"""
_warn_if_narwhals_series(ser)
return (pl := get_polars()) is not None and isinstance(ser, pl.Series)
def is_pyarrow_chunked_array(ser: Any) -> TypeIs[pa.ChunkedArray[Any]]:
"""Check whether `ser` is a PyArrow ChunkedArray without importing PyArrow.
Warning:
This method cannot be called on Narwhals Series.
"""
_warn_if_narwhals_series(ser)
return (pa := get_pyarrow()) is not None and isinstance(ser, pa.ChunkedArray)
def is_pyarrow_table(df: Any) -> TypeIs[pa.Table]:
"""Check whether `df` is a PyArrow Table without importing PyArrow.
Warning:
This method cannot be called on Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return (pa := get_pyarrow()) is not None and isinstance(df, pa.Table)
def is_pyarrow_scalar(obj: Any) -> TypeIs[pa.Scalar[Any]]:
return (pa := get_pyarrow()) is not None and isinstance(obj, pa.Scalar)
def is_pyspark_dataframe(df: Any) -> TypeIs[pyspark_sql.DataFrame]:
"""Check whether `df` is a PySpark DataFrame without importing PySpark.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return bool(
(pyspark_sql := get_pyspark_sql()) is not None
and isinstance(df, pyspark_sql.DataFrame)
)
def is_pyspark_connect_dataframe(df: Any) -> TypeIs[PySparkConnectDataFrame]:
"""Check whether `df` is a PySpark Connect DataFrame without importing PySpark.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
if get_pyspark_connect() is not None: # pragma: no cover
try:
from pyspark.sql.connect.dataframe import DataFrame
except ImportError:
return False
return isinstance(df, DataFrame)
return False
def is_sqlframe_dataframe(df: Any) -> TypeIs[SQLFrameDataFrame]:
"""Check whether `df` is a SQLFrame DataFrame without importing SQLFrame.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
if get_sqlframe() is not None:
from sqlframe.base.dataframe import BaseDataFrame
return isinstance(df, BaseDataFrame)
return False # pragma: no cover
def is_numpy_array(arr: Any | _NDArray[_ShapeT]) -> TypeIs[_NDArray[_ShapeT]]:
"""Check whether `arr` is a NumPy Array without importing NumPy."""
return (np := get_numpy()) is not None and isinstance(arr, np.ndarray)
def is_numpy_array_1d(arr: Any) -> TypeIs[_1DArray]:
"""Check whether `arr` is a 1D NumPy Array without importing NumPy."""
return is_numpy_array(arr) and arr.ndim == 1
def is_numpy_array_1d_int(arr: Any) -> TypeIs[_1DArrayInt]:
return (
(np := get_numpy())
and is_numpy_array_1d(arr)
and np.issubdtype(arr.dtype, np.integer)
)
def is_numpy_array_2d(arr: Any) -> TypeIs[_2DArray]:
"""Check whether `arr` is a 2D NumPy Array without importing NumPy."""
return is_numpy_array(arr) and arr.ndim == 2
def is_numpy_scalar(scalar: Any) -> TypeGuard[_NumpyScalar]:
"""Check whether `scalar` is a NumPy Scalar without importing NumPy."""
# NOTE: Needs to stay as `TypeGuard`
# - Used in `Series.__getitem__`, but not annotated
# - `TypeGuard` is *hiding* that the check introduces an intersection
return (np := get_numpy()) is not None and isinstance(scalar, np.generic)
def is_pandas_like_dataframe(df: Any) -> bool:
"""Check whether `df` is a pandas-like DataFrame without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
Warning:
This method cannot be called on a Narwhals DataFrame/LazyFrame.
"""
_warn_if_narwhals_df_or_lf(df)
return is_pandas_dataframe(df) or is_modin_dataframe(df) or is_cudf_dataframe(df)
def is_pandas_like_series(ser: Any) -> bool:
"""Check whether `ser` is a pandas-like Series without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
Warning:
This method cannot be called on Narwhals Series.
"""
_warn_if_narwhals_series(ser)
return is_pandas_series(ser) or is_modin_series(ser) or is_cudf_series(ser)
def is_pandas_like_index(index: Any) -> bool:
"""Check whether `index` is a pandas-like Index without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
"""
return (
is_pandas_index(index) or is_modin_index(index) or is_cudf_index(index)
) # pragma: no cover
def is_into_series(native_series: Any | IntoSeriesT) -> TypeIs[IntoSeriesT]:
"""Check whether `native_series` can be converted to a Narwhals Series.
Arguments:
native_series: The object to check.
Returns:
`True` if `native_series` can be converted to a Narwhals Series, `False` otherwise.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import numpy as np
>>> import narwhals as nw
>>> s_pd = pd.Series([1, 2, 3])
>>> s_pl = pl.Series([1, 2, 3])
>>> np_arr = np.array([1, 2, 3])
>>> nw.dependencies.is_into_series(s_pd)
True
>>> nw.dependencies.is_into_series(s_pl)
True
>>> nw.dependencies.is_into_series(np_arr)
False
"""
from narwhals.series import Series
return (
isinstance(native_series, Series)
or hasattr(native_series, "__narwhals_series__")
or is_polars_series(native_series)
or is_pyarrow_chunked_array(native_series)
or is_pandas_like_series(native_series)
)
def is_into_dataframe(native_dataframe: Any | IntoDataFrameT) -> TypeIs[IntoDataFrameT]:
"""Check whether `native_dataframe` can be converted to a Narwhals DataFrame.
Arguments:
native_dataframe: The object to check.
Returns:
`True` if `native_dataframe` can be converted to a Narwhals DataFrame, `False` otherwise.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import numpy as np
>>> from narwhals.dependencies import is_into_dataframe
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
>>> is_into_dataframe(df_pd)
True
>>> is_into_dataframe(df_pl)
True
>>> is_into_dataframe(np_arr)
False
"""
from narwhals.dataframe import DataFrame
return (
isinstance(native_dataframe, DataFrame)
or hasattr(native_dataframe, "__narwhals_dataframe__")
or is_polars_dataframe(native_dataframe)
or is_pyarrow_table(native_dataframe)
or is_pandas_like_dataframe(native_dataframe)
)
def is_narwhals_dataframe(
df: DataFrame[IntoDataFrameT] | Any,
) -> TypeIs[DataFrame[IntoDataFrameT]]:
"""Check whether `df` is a Narwhals DataFrame.
This is useful if you expect a user to pass in a Narwhals
DataFrame directly, and you want to catch both `narwhals.DataFrame`
and `narwhals.stable.v1.DataFrame`.
"""
from narwhals.dataframe import DataFrame
return isinstance(df, DataFrame)
def is_narwhals_lazyframe(
lf: Any | LazyFrame[IntoLazyFrameT],
) -> TypeIs[LazyFrame[IntoLazyFrameT]]:
"""Check whether `lf` is a Narwhals LazyFrame.
This is useful if you expect a user to pass in a Narwhals
LazyFrame directly, and you want to catch both `narwhals.LazyFrame`
and `narwhals.stable.v1.LazyFrame`.
"""
from narwhals.dataframe import LazyFrame
return isinstance(lf, LazyFrame)
def is_narwhals_series(ser: Any | Series[IntoSeriesT]) -> TypeIs[Series[IntoSeriesT]]:
"""Check whether `ser` is a Narwhals Series.
This is useful if you expect a user to pass in a Narwhals
Series directly, and you want to catch both `narwhals.Series`
and `narwhals.stable.v1.Series`.
"""
from narwhals.series import Series
return isinstance(ser, Series)
def is_narwhals_series_int(ser: Any | Series[IntoSeriesT]) -> TypeIs[Series[IntoSeriesT]]:
return is_narwhals_series(ser) and ser.dtype.is_integer()
__all__ = [
"get_cudf",
"get_ibis",
"get_modin",
"get_numpy",
"get_pandas",
"get_polars",
"get_pyarrow",
"is_cudf_dataframe",
"is_cudf_series",
"is_dask_dataframe",
"is_ibis_table",
"is_into_dataframe",
"is_into_series",
"is_modin_dataframe",
"is_modin_series",
"is_narwhals_dataframe",
"is_narwhals_lazyframe",
"is_narwhals_series",
"is_numpy_array",
"is_pandas_dataframe",
"is_pandas_index",
"is_pandas_like_dataframe",
"is_pandas_like_series",
"is_pandas_series",
"is_polars_dataframe",
"is_polars_lazyframe",
"is_polars_series",
"is_pyarrow_chunked_array",
"is_pyarrow_table",
]