team-10/env/Lib/site-packages/narwhals/_pandas_like/series_str.py
2025-08-02 07:34:44 +02:00

85 lines
3.6 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING, Any
from narwhals._compliant.any_namespace import StringNamespace
from narwhals._pandas_like.utils import PandasLikeSeriesNamespace, is_dtype_pyarrow
if TYPE_CHECKING:
from narwhals._pandas_like.series import PandasLikeSeries
class PandasLikeSeriesStringNamespace(
PandasLikeSeriesNamespace, StringNamespace["PandasLikeSeries"]
):
def len_chars(self) -> PandasLikeSeries:
return self.with_native(self.native.str.len())
def replace(
self, pattern: str, value: str, *, literal: bool, n: int
) -> PandasLikeSeries:
return self.with_native(
self.native.str.replace(pat=pattern, repl=value, n=n, regex=not literal)
)
def replace_all(self, pattern: str, value: str, *, literal: bool) -> PandasLikeSeries:
return self.replace(pattern, value, literal=literal, n=-1)
def strip_chars(self, characters: str | None) -> PandasLikeSeries:
return self.with_native(self.native.str.strip(characters))
def starts_with(self, prefix: str) -> PandasLikeSeries:
return self.with_native(self.native.str.startswith(prefix))
def ends_with(self, suffix: str) -> PandasLikeSeries:
return self.with_native(self.native.str.endswith(suffix))
def contains(self, pattern: str, *, literal: bool) -> PandasLikeSeries:
return self.with_native(self.native.str.contains(pat=pattern, regex=not literal))
def slice(self, offset: int, length: int | None) -> PandasLikeSeries:
stop = offset + length if length else None
return self.with_native(self.native.str.slice(start=offset, stop=stop))
def split(self, by: str) -> PandasLikeSeries:
implementation = self.implementation
if not implementation.is_cudf() and not is_dtype_pyarrow(self.native.dtype):
msg = (
"This operation requires a pyarrow-backed series. "
"Please refer to https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.maybe_convert_dtypes "
"and ensure you are using dtype_backend='pyarrow'. "
"Additionally, make sure you have pandas version 1.5+ and pyarrow installed. "
)
raise TypeError(msg)
return self.with_native(self.native.str.split(pat=by))
def to_datetime(self, format: str | None) -> PandasLikeSeries:
# If we know inputs are timezone-aware, we can pass `utc=True` for better performance.
if format and any(x in format for x in ("%z", "Z")):
return self.with_native(self._to_datetime(format, utc=True))
result = self.with_native(self._to_datetime(format, utc=False))
if (tz := getattr(result.dtype, "time_zone", None)) and tz != "UTC":
return result.dt.convert_time_zone("UTC")
return result
def _to_datetime(self, format: str | None, *, utc: bool) -> Any:
result = self.implementation.to_native_namespace().to_datetime(
self.native, format=format, utc=utc
)
return (
result.convert_dtypes(dtype_backend="pyarrow")
if is_dtype_pyarrow(self.native.dtype)
else result
)
def to_date(self, format: str | None) -> PandasLikeSeries:
return self.to_datetime(format=format).dt.date()
def to_uppercase(self) -> PandasLikeSeries:
return self.with_native(self.native.str.upper())
def to_lowercase(self) -> PandasLikeSeries:
return self.with_native(self.native.str.lower())
def zfill(self, width: int) -> PandasLikeSeries:
return self.with_native(self.native.str.zfill(width))