# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# NOTE: We ignore all mypy import-not-found errors as top-level since
# this module is optional and the SQLAlchemy dependency is not installed
# by default.
# mypy: disable-error-code="import-not-found, redundant-cast"

from __future__ import annotations

from collections import ChainMap
from copy import deepcopy
from typing import TYPE_CHECKING, Any, cast

from streamlit.connections import BaseConnection
from streamlit.connections.util import extract_from_dict
from streamlit.errors import StreamlitAPIException
from streamlit.runtime.caching import cache_data

if TYPE_CHECKING:
    from datetime import timedelta

    from pandas import DataFrame
    from sqlalchemy.engine import Connection as SQLAlchemyConnection
    from sqlalchemy.engine.base import Engine
    from sqlalchemy.orm import Session


_ALL_CONNECTION_PARAMS = {
    "url",
    "driver",
    "dialect",
    "username",
    "password",
    "host",
    "port",
    "database",
    "query",
}
_REQUIRED_CONNECTION_PARAMS = {"dialect", "username", "host"}


class SQLConnection(BaseConnection["Engine"]):
    """A connection to a SQL database using a SQLAlchemy Engine.

    Initialize this connection object using ``st.connection("sql")`` or
    ``st.connection("<name>", type="sql")``. Connection parameters for a
    SQLConnection can be specified using ``secrets.toml`` and/or ``**kwargs``.
    Possible connection parameters include:

    - ``url`` or keyword arguments for |sqlalchemy.engine.URL.create()|_, except
      ``drivername``. Use ``dialect`` and ``driver`` instead of ``drivername``.
    - Keyword arguments for |sqlalchemy.create_engine()|_, including custom
      ``connect()`` arguments used by your specific ``dialect`` or ``driver``.
    - ``autocommit``. If this is ``False`` (default), the connection operates
      in manual commit (transactional) mode. If this is ``True``, the
      connection operates in autocommit (non-transactional) mode.

    If ``url`` exists as a connection parameter, Streamlit will pass it to
    ``sqlalchemy.engine.make_url()``. Otherwise, Streamlit requires (at a
    minimum) ``dialect``, ``username``, and ``host``. Streamlit will use
    ``dialect`` and ``driver`` (if defined) to derive ``drivername``, then pass
    the relevant connection parameters to ``sqlalchemy.engine.URL.create()``.

    In addition to the default keyword arguments for ``sqlalchemy.create_engine()``,
    your dialect may accept additional keyword arguments. For example, if you
    use ``dialect="snowflake"`` with `Snowflake SQLAlchemy
    <https://github.com/snowflakedb/snowflake-sqlalchemy#key-pair-authentication-support>`_,
    you can pass a value for ``private_key`` to use key-pair authentication. If
    you use ``dialect="bigquery"`` with `Google BigQuery
    <https://github.com/googleapis/python-bigquery-sqlalchemy#authentication>`_,
    you can pass a value for ``location``.

    SQLConnection provides the ``.query()`` convenience method, which can be
    used to run simple, read-only queries with both caching and simple error
    handling/retries. More complex database interactions can be performed by
    using the ``.session`` property to receive a regular SQLAlchemy Session.

    .. Important::
        `SQLAlchemy <https://pypi.org/project/SQLAlchemy/>`_ must be installed
        in your environment to use this connection. You must also install your
        driver, such as ``pyodbc`` or ``psycopg2``.

    .. |sqlalchemy.engine.URL.create()| replace:: ``sqlalchemy.engine.URL.create()``
    .. _sqlalchemy.engine.URL.create(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.engine.URL.create
    .. |sqlalchemy.engine.make_url()| replace:: ``sqlalchemy.engine.make_url()``
    .. _sqlalchemy.engine.make_url(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.engine.make_url
    .. |sqlalchemy.create_engine()| replace:: ``sqlalchemy.create_engine()``
    .. _sqlalchemy.create_engine(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.create_engine

    Examples
    --------
    **Example 1: Configuration with URL**

    You can configure your SQL connection using Streamlit's
    `Secrets management <https://docs.streamlit.io/develop/concepts/connections/secrets-management>`_.
    The following example specifies a SQL connection URL.

    ``.streamlit/secrets.toml``:

    >>> [connections.sql]
    >>> url = "xxx+xxx://xxx:xxx@xxx:xxx/xxx"

    Your app code:

    >>> import streamlit as st
    >>>
    >>> conn = st.connection("sql")
    >>> df = conn.query("SELECT * FROM pet_owners")
    >>> st.dataframe(df)

    **Example 2: Configuration with dialect, host, and username**

    If you do not specify ``url``, you must at least specify ``dialect``,
    ``host``, and ``username`` instead. The following example also includes
    ``password``.

    ``.streamlit/secrets.toml``:

    >>> [connections.sql]
    >>> dialect = "xxx"
    >>> host = "xxx"
    >>> username = "xxx"
    >>> password = "xxx"

    Your app code:

    >>> import streamlit as st
    >>>
    >>> conn = st.connection("sql")
    >>> df = conn.query("SELECT * FROM pet_owners")
    >>> st.dataframe(df)

    **Example 3: Configuration with keyword arguments**

    You can configure your SQL connection with keyword arguments (with or
    without ``secrets.toml``). For example, if you use Microsoft Entra ID with
    a Microsoft Azure SQL server, you can quickly set up a local connection for
    development using `interactive authentication
    <https://learn.microsoft.com/en-us/sql/connect/odbc/using-azure-active-directory?view=sql-server-ver16#new-andor-modified-dsn-and-connection-string-keywords>`_.

    This example requires the `Microsoft ODBC Driver for SQL Server
    <https://learn.microsoft.com/en-us/sql/connect/odbc/microsoft-odbc-driver-for-sql-server?view=sql-server-ver16>`_
    for *Windows* in addition to the ``sqlalchemy`` and ``pyodbc`` packages for
    Python.

    >>> import streamlit as st
    >>>
    >>> conn = st.connection(
    ...     "sql",
    ...     dialect="mssql",
    ...     driver="pyodbc",
    ...     host="xxx.database.windows.net",
    ...     database="xxx",
    ...     username="xxx",
    ...     query={
    ...         "driver": "ODBC Driver 18 for SQL Server",
    ...         "authentication": "ActiveDirectoryInteractive",
    ...         "encrypt": "yes",
    ...     },
    ... )
    >>>
    >>> df = conn.query("SELECT * FROM pet_owners")
    >>> st.dataframe(df)

    """

    def _connect(self, autocommit: bool = False, **kwargs: Any) -> Engine:
        import sqlalchemy

        kwargs = deepcopy(kwargs)
        conn_param_kwargs = extract_from_dict(_ALL_CONNECTION_PARAMS, kwargs)
        conn_params = ChainMap(conn_param_kwargs, self._secrets.to_dict())

        if not len(conn_params):
            raise StreamlitAPIException(
                "Missing SQL DB connection configuration. "
                "Did you forget to set this in `secrets.toml` or as kwargs to `st.connection`?"
            )

        if "url" in conn_params:
            url = sqlalchemy.engine.make_url(conn_params["url"])
        else:
            for p in _REQUIRED_CONNECTION_PARAMS:
                if p not in conn_params:
                    raise StreamlitAPIException(f"Missing SQL DB connection param: {p}")

            drivername = conn_params["dialect"] + (
                f"+{conn_params['driver']}" if "driver" in conn_params else ""
            )

            url = sqlalchemy.engine.URL.create(
                drivername=drivername,
                username=conn_params["username"],
                password=conn_params.get("password"),
                host=conn_params["host"],
                port=int(conn_params["port"]) if "port" in conn_params else None,
                database=conn_params.get("database"),
                query=conn_params.get("query", {}),
            )

        create_engine_kwargs = ChainMap(
            kwargs, self._secrets.get("create_engine_kwargs", {})
        )
        eng = sqlalchemy.create_engine(url, **create_engine_kwargs)

        if autocommit:
            return cast("Engine", eng.execution_options(isolation_level="AUTOCOMMIT"))
        return cast("Engine", eng)

    def query(
        self,
        sql: str,
        *,  # keyword-only arguments:
        show_spinner: bool | str = "Running `sql.query(...)`.",
        ttl: float | int | timedelta | None = None,
        index_col: str | list[str] | None = None,
        chunksize: int | None = None,
        params: Any | None = None,
        **kwargs: Any,
    ) -> DataFrame:
        """Run a read-only query.

        This method implements query result caching and simple error
        handling/retries. The caching behavior is identical to that of using
        ``@st.cache_data``.

        .. note::
            Queries that are run without a specified ttl are cached indefinitely.

        All keyword arguments passed to this function are passed down to
        |pandas.read_sql|_, except ``ttl``.

        .. |pandas.read_sql| replace:: ``pandas.read_sql``
        .. _pandas.read_sql: https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html

        Parameters
        ----------
        sql : str
            The read-only SQL query to execute.
        show_spinner : boolean or string
            Enable the spinner. The default is to show a spinner when there is a
            "cache miss" and the cached resource is being created. If a string, the value
            of the show_spinner param will be used for the spinner text.
        ttl : float, int, timedelta or None
            The maximum number of seconds to keep results in the cache, or
            None if cached results should not expire. The default is None.
        index_col : str, list of str, or None
            Column(s) to set as index(MultiIndex). Default is None.
        chunksize : int or None
            If specified, return an iterator where chunksize is the number of
            rows to include in each chunk. Default is None.
        params : list, tuple, dict or None
            List of parameters to pass to the execute method. The syntax used to pass
            parameters is database driver dependent. Check your database driver
            documentation for which of the five syntax styles, described in `PEP 249
            paramstyle <https://peps.python.org/pep-0249/#paramstyle>`_, is supported.
            Default is None.
        **kwargs: dict
            Additional keyword arguments are passed to |pandas.read_sql|_.

            .. |pandas.read_sql| replace:: ``pandas.read_sql``
            .. _pandas.read_sql: https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html

        Returns
        -------
        pandas.DataFrame
            The result of running the query, formatted as a pandas DataFrame.

        Example
        -------
        >>> import streamlit as st
        >>>
        >>> conn = st.connection("sql")
        >>> df = conn.query(
        ...     "SELECT * FROM pet_owners WHERE owner = :owner",
        ...     ttl=3600,
        ...     params={"owner": "barbara"},
        ... )
        >>> st.dataframe(df)
        """

        from sqlalchemy import text
        from sqlalchemy.exc import DatabaseError, InternalError, OperationalError
        from tenacity import (
            retry,
            retry_if_exception_type,
            stop_after_attempt,
            wait_fixed,
        )

        @retry(
            after=lambda _: self.reset(),
            stop=stop_after_attempt(3),
            reraise=True,
            retry=retry_if_exception_type(
                (DatabaseError, InternalError, OperationalError)
            ),
            wait=wait_fixed(1),
        )
        def _query(
            sql: str,
            index_col: str | list[str] | None = None,
            chunksize: int | None = None,
            params: Any | None = None,
            **kwargs: Any,
        ) -> DataFrame:
            import pandas as pd

            instance = self._instance.connect()
            return cast(
                "DataFrame",
                pd.read_sql(
                    text(sql),
                    instance,
                    index_col=index_col,
                    chunksize=chunksize,
                    params=params,
                    **kwargs,
                ),
            )

        # We modify our helper function's `__qualname__` here to work around default
        # `@st.cache_data` behavior. Otherwise, `.query()` being called with different
        # `ttl` values will reset the cache with each call, and the query caches won't
        # be scoped by connection.
        ttl_str = str(  # Avoid adding extra `.` characters to `__qualname__`
            ttl
        ).replace(".", "_")
        _query.__qualname__ = f"{_query.__qualname__}_{self._connection_name}_{ttl_str}"
        _query = cache_data(
            show_spinner=show_spinner,
            ttl=ttl,
        )(_query)

        return _query(
            sql,
            index_col=index_col,
            chunksize=chunksize,
            params=params,
            **kwargs,
        )

    def connect(self) -> SQLAlchemyConnection:
        """Call ``.connect()`` on the underlying SQLAlchemy Engine, returning a new\
        connection object.

        Calling this method is equivalent to calling ``self._instance.connect()``.

        NOTE: This method should not be confused with the internal ``_connect`` method used
        to implement a Streamlit Connection.

        Returns
        -------
        sqlalchemy.engine.Connection
            A new SQLAlchemy connection object.
        """
        return self._instance.connect()

    @property
    def engine(self) -> Engine:
        """The underlying SQLAlchemy Engine.

        This is equivalent to accessing ``self._instance``.

        Returns
        -------
        sqlalchemy.engine.base.Engine
            The underlying SQLAlchemy Engine.
        """
        return self._instance

    @property
    def driver(self) -> str:
        """The name of the driver used by the underlying SQLAlchemy Engine.

        This is equivalent to accessing ``self._instance.driver``.

        Returns
        -------
        str
            The name of the driver. For example, ``"pyodbc"`` or ``"psycopg2"``.
        """
        return cast("str", self._instance.driver)

    @property
    def session(self) -> Session:
        """Return a SQLAlchemy Session.

        Users of this connection should use the contextmanager pattern for writes,
        transactions, and anything more complex than simple read queries.

        See the usage example below, which assumes we have a table ``numbers`` with a
        single integer column ``val``. The `SQLAlchemy
        <https://docs.sqlalchemy.org/en/20/orm/session_basics.html>`_ docs also contain
        much more information on the usage of sessions.

        Returns
        -------
        sqlalchemy.orm.Session
            A SQLAlchemy Session.

        Example
        -------
        >>> import streamlit as st
        >>> conn = st.connection("sql")
        >>> n = st.slider("Pick a number")
        >>> if st.button("Add the number!"):
        ...     with conn.session as session:
        ...         session.execute("INSERT INTO numbers (val) VALUES (:n);", {"n": n})
        ...         session.commit()
        """
        from sqlalchemy.orm import Session

        return Session(self._instance)