team-10/env/Lib/site-packages/streamlit/connections/sql_connection.py
2025-08-02 07:34:44 +02:00

426 lines
16 KiB
Python

# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# NOTE: We ignore all mypy import-not-found errors as top-level since
# this module is optional and the SQLAlchemy dependency is not installed
# by default.
# mypy: disable-error-code="import-not-found, redundant-cast"
from __future__ import annotations
from collections import ChainMap
from copy import deepcopy
from typing import TYPE_CHECKING, Any, cast
from streamlit.connections import BaseConnection
from streamlit.connections.util import extract_from_dict
from streamlit.errors import StreamlitAPIException
from streamlit.runtime.caching import cache_data
if TYPE_CHECKING:
from datetime import timedelta
from pandas import DataFrame
from sqlalchemy.engine import Connection as SQLAlchemyConnection
from sqlalchemy.engine.base import Engine
from sqlalchemy.orm import Session
_ALL_CONNECTION_PARAMS = {
"url",
"driver",
"dialect",
"username",
"password",
"host",
"port",
"database",
"query",
}
_REQUIRED_CONNECTION_PARAMS = {"dialect", "username", "host"}
class SQLConnection(BaseConnection["Engine"]):
"""A connection to a SQL database using a SQLAlchemy Engine.
Initialize this connection object using ``st.connection("sql")`` or
``st.connection("<name>", type="sql")``. Connection parameters for a
SQLConnection can be specified using ``secrets.toml`` and/or ``**kwargs``.
Possible connection parameters include:
- ``url`` or keyword arguments for |sqlalchemy.engine.URL.create()|_, except
``drivername``. Use ``dialect`` and ``driver`` instead of ``drivername``.
- Keyword arguments for |sqlalchemy.create_engine()|_, including custom
``connect()`` arguments used by your specific ``dialect`` or ``driver``.
- ``autocommit``. If this is ``False`` (default), the connection operates
in manual commit (transactional) mode. If this is ``True``, the
connection operates in autocommit (non-transactional) mode.
If ``url`` exists as a connection parameter, Streamlit will pass it to
``sqlalchemy.engine.make_url()``. Otherwise, Streamlit requires (at a
minimum) ``dialect``, ``username``, and ``host``. Streamlit will use
``dialect`` and ``driver`` (if defined) to derive ``drivername``, then pass
the relevant connection parameters to ``sqlalchemy.engine.URL.create()``.
In addition to the default keyword arguments for ``sqlalchemy.create_engine()``,
your dialect may accept additional keyword arguments. For example, if you
use ``dialect="snowflake"`` with `Snowflake SQLAlchemy
<https://github.com/snowflakedb/snowflake-sqlalchemy#key-pair-authentication-support>`_,
you can pass a value for ``private_key`` to use key-pair authentication. If
you use ``dialect="bigquery"`` with `Google BigQuery
<https://github.com/googleapis/python-bigquery-sqlalchemy#authentication>`_,
you can pass a value for ``location``.
SQLConnection provides the ``.query()`` convenience method, which can be
used to run simple, read-only queries with both caching and simple error
handling/retries. More complex database interactions can be performed by
using the ``.session`` property to receive a regular SQLAlchemy Session.
.. Important::
`SQLAlchemy <https://pypi.org/project/SQLAlchemy/>`_ must be installed
in your environment to use this connection. You must also install your
driver, such as ``pyodbc`` or ``psycopg2``.
.. |sqlalchemy.engine.URL.create()| replace:: ``sqlalchemy.engine.URL.create()``
.. _sqlalchemy.engine.URL.create(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.engine.URL.create
.. |sqlalchemy.engine.make_url()| replace:: ``sqlalchemy.engine.make_url()``
.. _sqlalchemy.engine.make_url(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.engine.make_url
.. |sqlalchemy.create_engine()| replace:: ``sqlalchemy.create_engine()``
.. _sqlalchemy.create_engine(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.create_engine
Examples
--------
**Example 1: Configuration with URL**
You can configure your SQL connection using Streamlit's
`Secrets management <https://docs.streamlit.io/develop/concepts/connections/secrets-management>`_.
The following example specifies a SQL connection URL.
``.streamlit/secrets.toml``:
>>> [connections.sql]
>>> url = "xxx+xxx://xxx:xxx@xxx:xxx/xxx"
Your app code:
>>> import streamlit as st
>>>
>>> conn = st.connection("sql")
>>> df = conn.query("SELECT * FROM pet_owners")
>>> st.dataframe(df)
**Example 2: Configuration with dialect, host, and username**
If you do not specify ``url``, you must at least specify ``dialect``,
``host``, and ``username`` instead. The following example also includes
``password``.
``.streamlit/secrets.toml``:
>>> [connections.sql]
>>> dialect = "xxx"
>>> host = "xxx"
>>> username = "xxx"
>>> password = "xxx"
Your app code:
>>> import streamlit as st
>>>
>>> conn = st.connection("sql")
>>> df = conn.query("SELECT * FROM pet_owners")
>>> st.dataframe(df)
**Example 3: Configuration with keyword arguments**
You can configure your SQL connection with keyword arguments (with or
without ``secrets.toml``). For example, if you use Microsoft Entra ID with
a Microsoft Azure SQL server, you can quickly set up a local connection for
development using `interactive authentication
<https://learn.microsoft.com/en-us/sql/connect/odbc/using-azure-active-directory?view=sql-server-ver16#new-andor-modified-dsn-and-connection-string-keywords>`_.
This example requires the `Microsoft ODBC Driver for SQL Server
<https://learn.microsoft.com/en-us/sql/connect/odbc/microsoft-odbc-driver-for-sql-server?view=sql-server-ver16>`_
for *Windows* in addition to the ``sqlalchemy`` and ``pyodbc`` packages for
Python.
>>> import streamlit as st
>>>
>>> conn = st.connection(
... "sql",
... dialect="mssql",
... driver="pyodbc",
... host="xxx.database.windows.net",
... database="xxx",
... username="xxx",
... query={
... "driver": "ODBC Driver 18 for SQL Server",
... "authentication": "ActiveDirectoryInteractive",
... "encrypt": "yes",
... },
... )
>>>
>>> df = conn.query("SELECT * FROM pet_owners")
>>> st.dataframe(df)
"""
def _connect(self, autocommit: bool = False, **kwargs: Any) -> Engine:
import sqlalchemy
kwargs = deepcopy(kwargs)
conn_param_kwargs = extract_from_dict(_ALL_CONNECTION_PARAMS, kwargs)
conn_params = ChainMap(conn_param_kwargs, self._secrets.to_dict())
if not len(conn_params):
raise StreamlitAPIException(
"Missing SQL DB connection configuration. "
"Did you forget to set this in `secrets.toml` or as kwargs to `st.connection`?"
)
if "url" in conn_params:
url = sqlalchemy.engine.make_url(conn_params["url"])
else:
for p in _REQUIRED_CONNECTION_PARAMS:
if p not in conn_params:
raise StreamlitAPIException(f"Missing SQL DB connection param: {p}")
drivername = conn_params["dialect"] + (
f"+{conn_params['driver']}" if "driver" in conn_params else ""
)
url = sqlalchemy.engine.URL.create(
drivername=drivername,
username=conn_params["username"],
password=conn_params.get("password"),
host=conn_params["host"],
port=int(conn_params["port"]) if "port" in conn_params else None,
database=conn_params.get("database"),
query=conn_params.get("query", {}),
)
create_engine_kwargs = ChainMap(
kwargs, self._secrets.get("create_engine_kwargs", {})
)
eng = sqlalchemy.create_engine(url, **create_engine_kwargs)
if autocommit:
return cast("Engine", eng.execution_options(isolation_level="AUTOCOMMIT"))
return cast("Engine", eng)
def query(
self,
sql: str,
*, # keyword-only arguments:
show_spinner: bool | str = "Running `sql.query(...)`.",
ttl: float | int | timedelta | None = None,
index_col: str | list[str] | None = None,
chunksize: int | None = None,
params: Any | None = None,
**kwargs: Any,
) -> DataFrame:
"""Run a read-only query.
This method implements query result caching and simple error
handling/retries. The caching behavior is identical to that of using
``@st.cache_data``.
.. note::
Queries that are run without a specified ttl are cached indefinitely.
All keyword arguments passed to this function are passed down to
|pandas.read_sql|_, except ``ttl``.
.. |pandas.read_sql| replace:: ``pandas.read_sql``
.. _pandas.read_sql: https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html
Parameters
----------
sql : str
The read-only SQL query to execute.
show_spinner : boolean or string
Enable the spinner. The default is to show a spinner when there is a
"cache miss" and the cached resource is being created. If a string, the value
of the show_spinner param will be used for the spinner text.
ttl : float, int, timedelta or None
The maximum number of seconds to keep results in the cache, or
None if cached results should not expire. The default is None.
index_col : str, list of str, or None
Column(s) to set as index(MultiIndex). Default is None.
chunksize : int or None
If specified, return an iterator where chunksize is the number of
rows to include in each chunk. Default is None.
params : list, tuple, dict or None
List of parameters to pass to the execute method. The syntax used to pass
parameters is database driver dependent. Check your database driver
documentation for which of the five syntax styles, described in `PEP 249
paramstyle <https://peps.python.org/pep-0249/#paramstyle>`_, is supported.
Default is None.
**kwargs: dict
Additional keyword arguments are passed to |pandas.read_sql|_.
.. |pandas.read_sql| replace:: ``pandas.read_sql``
.. _pandas.read_sql: https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html
Returns
-------
pandas.DataFrame
The result of running the query, formatted as a pandas DataFrame.
Example
-------
>>> import streamlit as st
>>>
>>> conn = st.connection("sql")
>>> df = conn.query(
... "SELECT * FROM pet_owners WHERE owner = :owner",
... ttl=3600,
... params={"owner": "barbara"},
... )
>>> st.dataframe(df)
"""
from sqlalchemy import text
from sqlalchemy.exc import DatabaseError, InternalError, OperationalError
from tenacity import (
retry,
retry_if_exception_type,
stop_after_attempt,
wait_fixed,
)
@retry(
after=lambda _: self.reset(),
stop=stop_after_attempt(3),
reraise=True,
retry=retry_if_exception_type(
(DatabaseError, InternalError, OperationalError)
),
wait=wait_fixed(1),
)
def _query(
sql: str,
index_col: str | list[str] | None = None,
chunksize: int | None = None,
params: Any | None = None,
**kwargs: Any,
) -> DataFrame:
import pandas as pd
instance = self._instance.connect()
return cast(
"DataFrame",
pd.read_sql(
text(sql),
instance,
index_col=index_col,
chunksize=chunksize,
params=params,
**kwargs,
),
)
# We modify our helper function's `__qualname__` here to work around default
# `@st.cache_data` behavior. Otherwise, `.query()` being called with different
# `ttl` values will reset the cache with each call, and the query caches won't
# be scoped by connection.
ttl_str = str( # Avoid adding extra `.` characters to `__qualname__`
ttl
).replace(".", "_")
_query.__qualname__ = f"{_query.__qualname__}_{self._connection_name}_{ttl_str}"
_query = cache_data(
show_spinner=show_spinner,
ttl=ttl,
)(_query)
return _query(
sql,
index_col=index_col,
chunksize=chunksize,
params=params,
**kwargs,
)
def connect(self) -> SQLAlchemyConnection:
"""Call ``.connect()`` on the underlying SQLAlchemy Engine, returning a new\
connection object.
Calling this method is equivalent to calling ``self._instance.connect()``.
NOTE: This method should not be confused with the internal ``_connect`` method used
to implement a Streamlit Connection.
Returns
-------
sqlalchemy.engine.Connection
A new SQLAlchemy connection object.
"""
return self._instance.connect()
@property
def engine(self) -> Engine:
"""The underlying SQLAlchemy Engine.
This is equivalent to accessing ``self._instance``.
Returns
-------
sqlalchemy.engine.base.Engine
The underlying SQLAlchemy Engine.
"""
return self._instance
@property
def driver(self) -> str:
"""The name of the driver used by the underlying SQLAlchemy Engine.
This is equivalent to accessing ``self._instance.driver``.
Returns
-------
str
The name of the driver. For example, ``"pyodbc"`` or ``"psycopg2"``.
"""
return cast("str", self._instance.driver)
@property
def session(self) -> Session:
"""Return a SQLAlchemy Session.
Users of this connection should use the contextmanager pattern for writes,
transactions, and anything more complex than simple read queries.
See the usage example below, which assumes we have a table ``numbers`` with a
single integer column ``val``. The `SQLAlchemy
<https://docs.sqlalchemy.org/en/20/orm/session_basics.html>`_ docs also contain
much more information on the usage of sessions.
Returns
-------
sqlalchemy.orm.Session
A SQLAlchemy Session.
Example
-------
>>> import streamlit as st
>>> conn = st.connection("sql")
>>> n = st.slider("Pick a number")
>>> if st.button("Add the number!"):
... with conn.session as session:
... session.execute("INSERT INTO numbers (val) VALUES (:n);", {"n": n})
... session.commit()
"""
from sqlalchemy.orm import Session
return Session(self._instance)