85 lines
3.6 KiB
Python
85 lines
3.6 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING, Any
|
|
|
|
from narwhals._compliant.any_namespace import StringNamespace
|
|
from narwhals._pandas_like.utils import PandasLikeSeriesNamespace, is_dtype_pyarrow
|
|
|
|
if TYPE_CHECKING:
|
|
from narwhals._pandas_like.series import PandasLikeSeries
|
|
|
|
|
|
class PandasLikeSeriesStringNamespace(
|
|
PandasLikeSeriesNamespace, StringNamespace["PandasLikeSeries"]
|
|
):
|
|
def len_chars(self) -> PandasLikeSeries:
|
|
return self.with_native(self.native.str.len())
|
|
|
|
def replace(
|
|
self, pattern: str, value: str, *, literal: bool, n: int
|
|
) -> PandasLikeSeries:
|
|
return self.with_native(
|
|
self.native.str.replace(pat=pattern, repl=value, n=n, regex=not literal)
|
|
)
|
|
|
|
def replace_all(self, pattern: str, value: str, *, literal: bool) -> PandasLikeSeries:
|
|
return self.replace(pattern, value, literal=literal, n=-1)
|
|
|
|
def strip_chars(self, characters: str | None) -> PandasLikeSeries:
|
|
return self.with_native(self.native.str.strip(characters))
|
|
|
|
def starts_with(self, prefix: str) -> PandasLikeSeries:
|
|
return self.with_native(self.native.str.startswith(prefix))
|
|
|
|
def ends_with(self, suffix: str) -> PandasLikeSeries:
|
|
return self.with_native(self.native.str.endswith(suffix))
|
|
|
|
def contains(self, pattern: str, *, literal: bool) -> PandasLikeSeries:
|
|
return self.with_native(self.native.str.contains(pat=pattern, regex=not literal))
|
|
|
|
def slice(self, offset: int, length: int | None) -> PandasLikeSeries:
|
|
stop = offset + length if length else None
|
|
return self.with_native(self.native.str.slice(start=offset, stop=stop))
|
|
|
|
def split(self, by: str) -> PandasLikeSeries:
|
|
implementation = self.implementation
|
|
if not implementation.is_cudf() and not is_dtype_pyarrow(self.native.dtype):
|
|
msg = (
|
|
"This operation requires a pyarrow-backed series. "
|
|
"Please refer to https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.maybe_convert_dtypes "
|
|
"and ensure you are using dtype_backend='pyarrow'. "
|
|
"Additionally, make sure you have pandas version 1.5+ and pyarrow installed. "
|
|
)
|
|
raise TypeError(msg)
|
|
return self.with_native(self.native.str.split(pat=by))
|
|
|
|
def to_datetime(self, format: str | None) -> PandasLikeSeries:
|
|
# If we know inputs are timezone-aware, we can pass `utc=True` for better performance.
|
|
if format and any(x in format for x in ("%z", "Z")):
|
|
return self.with_native(self._to_datetime(format, utc=True))
|
|
result = self.with_native(self._to_datetime(format, utc=False))
|
|
if (tz := getattr(result.dtype, "time_zone", None)) and tz != "UTC":
|
|
return result.dt.convert_time_zone("UTC")
|
|
return result
|
|
|
|
def _to_datetime(self, format: str | None, *, utc: bool) -> Any:
|
|
result = self.implementation.to_native_namespace().to_datetime(
|
|
self.native, format=format, utc=utc
|
|
)
|
|
return (
|
|
result.convert_dtypes(dtype_backend="pyarrow")
|
|
if is_dtype_pyarrow(self.native.dtype)
|
|
else result
|
|
)
|
|
|
|
def to_date(self, format: str | None) -> PandasLikeSeries:
|
|
return self.to_datetime(format=format).dt.date()
|
|
|
|
def to_uppercase(self) -> PandasLikeSeries:
|
|
return self.with_native(self.native.str.upper())
|
|
|
|
def to_lowercase(self) -> PandasLikeSeries:
|
|
return self.with_native(self.native.str.lower())
|
|
|
|
def zfill(self, width: int) -> PandasLikeSeries:
|
|
return self.with_native(self.native.str.zfill(width))
|