team-10/venv/Lib/site-packages/narwhals/_spark_like/expr_dt.py

from __future__ import annotations

from typing import TYPE_CHECKING

from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import DateTimeNamespace
from narwhals._constants import US_PER_SECOND
from narwhals._duration import Interval
from narwhals._spark_like.utils import (
    UNITS_DICT,
    fetch_session_time_zone,
    strptime_to_pyspark_format,
)
from narwhals._utils import not_implemented

if TYPE_CHECKING:
    from collections.abc import Sequence

    from sqlframe.base.column import Column

    from narwhals._spark_like.dataframe import SparkLikeLazyFrame
    from narwhals._spark_like.expr import SparkLikeExpr


class SparkLikeExprDateTimeNamespace(
    LazyExprNamespace["SparkLikeExpr"], DateTimeNamespace["SparkLikeExpr"]
):
    def _weekday(self, expr: Column) -> Column:
        # PySpark's dayofweek returns 1-7 for Sunday-Saturday
        return (self.compliant._F.dayofweek(expr) + 6) % 7

    def to_string(self, format: str) -> SparkLikeExpr:
        F = self.compliant._F  # noqa: N806

        def _to_string(expr: Column) -> Column:
            # Handle special formats
            if format == "%G-W%V":
                return self._format_iso_week(expr)
            if format == "%G-W%V-%u":
                return self._format_iso_week_with_day(expr)

            format_, suffix = self._format_microseconds(expr, format)

            # Convert Python format to PySpark format
            pyspark_fmt = strptime_to_pyspark_format(format_)

            result = F.date_format(expr, pyspark_fmt)
            if "T" in format_:
                # `strptime_to_pyspark_format` replaces "T" with " " since pyspark
                # does not support the literal "T" in `date_format`.
                # If no other spaces are in the given format, then we can revert this
                # operation, otherwise we raise an exception.
                if " " not in format_:
                    result = F.replace(result, F.lit(" "), F.lit("T"))
                else:  # pragma: no cover
                    msg = (
                        "`dt.to_string` with a format that contains both spaces and "
                        " the literal 'T' is not supported for spark-like backends."
                    )
                    raise NotImplementedError(msg)

            return F.concat(result, *suffix)

        return self.compliant._with_elementwise(_to_string)

    def date(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self.compliant._F.to_date)

    def year(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self.compliant._F.year)

    def month(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self.compliant._F.month)

    def day(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self.compliant._F.day)

    def hour(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self.compliant._F.hour)

    def minute(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self.compliant._F.minute)

    def second(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self.compliant._F.second)

    def millisecond(self) -> SparkLikeExpr:
        def _millisecond(expr: Column) -> Column:
            return self.compliant._F.floor(
                (self.compliant._F.unix_micros(expr) % US_PER_SECOND) / 1000
            )

        return self.compliant._with_elementwise(_millisecond)

    def microsecond(self) -> SparkLikeExpr:
        def _microsecond(expr: Column) -> Column:
            return self.compliant._F.unix_micros(expr) % US_PER_SECOND

        return self.compliant._with_elementwise(_microsecond)

    def nanosecond(self) -> SparkLikeExpr:
        def _nanosecond(expr: Column) -> Column:
            return (self.compliant._F.unix_micros(expr) % US_PER_SECOND) * 1000

        return self.compliant._with_elementwise(_nanosecond)

    def ordinal_day(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self.compliant._F.dayofyear)

    def weekday(self) -> SparkLikeExpr:
        return self.compliant._with_elementwise(self._weekday)

    def truncate(self, every: str) -> SparkLikeExpr:
        interval = Interval.parse(every)
        multiple, unit = interval.multiple, interval.unit
        if multiple != 1:
            msg = f"Only multiple 1 is currently supported for Spark-like.\nGot {multiple!s}."
            raise ValueError(msg)
        if unit == "ns":
            msg = "Truncating to nanoseconds is not yet supported for Spark-like."
            raise NotImplementedError(msg)
        format = UNITS_DICT[unit]

        def _truncate(expr: Column) -> Column:
            return self.compliant._F.date_trunc(format, expr)

        return self.compliant._with_elementwise(_truncate)

    def offset_by(self, by: str) -> SparkLikeExpr:
        interval = Interval.parse_no_constraints(by)
        multiple, unit = interval.multiple, interval.unit
        if unit == "ns":  # pragma: no cover
            msg = "Offsetting by nanoseconds is not yet supported for Spark-like."
            raise NotImplementedError(msg)

        F = self.compliant._F  # noqa: N806

        def _offset_by(expr: Column) -> Column:
            # https://github.com/eakmanrq/sqlframe/issues/441
            return F.timestamp_add(  # pyright: ignore[reportAttributeAccessIssue]
                UNITS_DICT[unit], F.lit(multiple), expr
            )

        return self.compliant._with_callable(_offset_by)

    def _no_op_time_zone(self, time_zone: str) -> SparkLikeExpr:  # pragma: no cover
        def func(df: SparkLikeLazyFrame) -> Sequence[Column]:
            native_series_list = self.compliant(df)
            conn_time_zone = fetch_session_time_zone(df.native.sparkSession)
            if conn_time_zone != time_zone:
                msg = (
                    "PySpark stores the time zone in the session, rather than in the "
                    f"data type, so changing the timezone to anything other than {conn_time_zone} "
                    " (the current session time zone) is not supported."
                )
                raise NotImplementedError(msg)
            return native_series_list

        return self.compliant.__class__(
            func,
            evaluate_output_names=self.compliant._evaluate_output_names,
            alias_output_names=self.compliant._alias_output_names,
            version=self.compliant._version,
            implementation=self.compliant._implementation,
        )

    def convert_time_zone(self, time_zone: str) -> SparkLikeExpr:  # pragma: no cover
        return self._no_op_time_zone(time_zone)

    def replace_time_zone(
        self, time_zone: str | None
    ) -> SparkLikeExpr:  # pragma: no cover
        if time_zone is None:
            return self.compliant._with_elementwise(
                lambda expr: expr.cast("timestamp_ntz")
            )
        else:
            return self._no_op_time_zone(time_zone)

    def _format_iso_week_with_day(self, expr: Column) -> Column:
        """Format datetime as ISO week string with day."""
        F = self.compliant._F  # noqa: N806

        year = F.date_format(expr, "yyyy")
        week = F.lpad(F.weekofyear(expr).cast("string"), 2, "0")
        day = self._weekday(expr)
        return F.concat(year, F.lit("-W"), week, F.lit("-"), day.cast("string"))

    def _format_iso_week(self, expr: Column) -> Column:
        """Format datetime as ISO week string."""
        F = self.compliant._F  # noqa: N806

        year = F.date_format(expr, "yyyy")
        week = F.lpad(F.weekofyear(expr).cast("string"), 2, "0")
        return F.concat(year, F.lit("-W"), week)

    def _format_microseconds(
        self, expr: Column, format: str
    ) -> tuple[str, tuple[Column, ...]]:
        """Format microseconds if present in format, else it's a no-op."""
        F = self.compliant._F  # noqa: N806

        suffix: tuple[Column, ...]
        if format.endswith((".%f", "%.f")):
            import re

            micros = F.unix_micros(expr) % US_PER_SECOND
            micros_str = F.lpad(micros.cast("string"), 6, "0")
            suffix = (F.lit("."), micros_str)
            format_ = re.sub(r"(.%|%.)f$", "", format)
            return format_, suffix

        return format, ()

    timestamp = not_implemented()
    total_seconds = not_implemented()
    total_minutes = not_implemented()
    total_milliseconds = not_implemented()
    total_microseconds = not_implemented()
    total_nanoseconds = not_implemented()