# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # NOTE: We ignore all mypy import-not-found errors as top-level since # this module is optional and the SQLAlchemy dependency is not installed # by default. # mypy: disable-error-code="import-not-found, redundant-cast" from __future__ import annotations from collections import ChainMap from copy import deepcopy from typing import TYPE_CHECKING, Any, cast from streamlit.connections import BaseConnection from streamlit.connections.util import extract_from_dict from streamlit.errors import StreamlitAPIException from streamlit.runtime.caching import cache_data if TYPE_CHECKING: from datetime import timedelta from pandas import DataFrame from sqlalchemy.engine import Connection as SQLAlchemyConnection from sqlalchemy.engine.base import Engine from sqlalchemy.orm import Session _ALL_CONNECTION_PARAMS = { "url", "driver", "dialect", "username", "password", "host", "port", "database", "query", } _REQUIRED_CONNECTION_PARAMS = {"dialect", "username", "host"} class SQLConnection(BaseConnection["Engine"]): """A connection to a SQL database using a SQLAlchemy Engine. Initialize this connection object using ``st.connection("sql")`` or ``st.connection("", type="sql")``. Connection parameters for a SQLConnection can be specified using ``secrets.toml`` and/or ``**kwargs``. Possible connection parameters include: - ``url`` or keyword arguments for |sqlalchemy.engine.URL.create()|_, except ``drivername``. Use ``dialect`` and ``driver`` instead of ``drivername``. - Keyword arguments for |sqlalchemy.create_engine()|_, including custom ``connect()`` arguments used by your specific ``dialect`` or ``driver``. - ``autocommit``. If this is ``False`` (default), the connection operates in manual commit (transactional) mode. If this is ``True``, the connection operates in autocommit (non-transactional) mode. If ``url`` exists as a connection parameter, Streamlit will pass it to ``sqlalchemy.engine.make_url()``. Otherwise, Streamlit requires (at a minimum) ``dialect``, ``username``, and ``host``. Streamlit will use ``dialect`` and ``driver`` (if defined) to derive ``drivername``, then pass the relevant connection parameters to ``sqlalchemy.engine.URL.create()``. In addition to the default keyword arguments for ``sqlalchemy.create_engine()``, your dialect may accept additional keyword arguments. For example, if you use ``dialect="snowflake"`` with `Snowflake SQLAlchemy `_, you can pass a value for ``private_key`` to use key-pair authentication. If you use ``dialect="bigquery"`` with `Google BigQuery `_, you can pass a value for ``location``. SQLConnection provides the ``.query()`` convenience method, which can be used to run simple, read-only queries with both caching and simple error handling/retries. More complex database interactions can be performed by using the ``.session`` property to receive a regular SQLAlchemy Session. .. Important:: `SQLAlchemy `_ must be installed in your environment to use this connection. You must also install your driver, such as ``pyodbc`` or ``psycopg2``. .. |sqlalchemy.engine.URL.create()| replace:: ``sqlalchemy.engine.URL.create()`` .. _sqlalchemy.engine.URL.create(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.engine.URL.create .. |sqlalchemy.engine.make_url()| replace:: ``sqlalchemy.engine.make_url()`` .. _sqlalchemy.engine.make_url(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.engine.make_url .. |sqlalchemy.create_engine()| replace:: ``sqlalchemy.create_engine()`` .. _sqlalchemy.create_engine(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.create_engine Examples -------- **Example 1: Configuration with URL** You can configure your SQL connection using Streamlit's `Secrets management `_. The following example specifies a SQL connection URL. ``.streamlit/secrets.toml``: >>> [connections.sql] >>> url = "xxx+xxx://xxx:xxx@xxx:xxx/xxx" Your app code: >>> import streamlit as st >>> >>> conn = st.connection("sql") >>> df = conn.query("SELECT * FROM pet_owners") >>> st.dataframe(df) **Example 2: Configuration with dialect, host, and username** If you do not specify ``url``, you must at least specify ``dialect``, ``host``, and ``username`` instead. The following example also includes ``password``. ``.streamlit/secrets.toml``: >>> [connections.sql] >>> dialect = "xxx" >>> host = "xxx" >>> username = "xxx" >>> password = "xxx" Your app code: >>> import streamlit as st >>> >>> conn = st.connection("sql") >>> df = conn.query("SELECT * FROM pet_owners") >>> st.dataframe(df) **Example 3: Configuration with keyword arguments** You can configure your SQL connection with keyword arguments (with or without ``secrets.toml``). For example, if you use Microsoft Entra ID with a Microsoft Azure SQL server, you can quickly set up a local connection for development using `interactive authentication `_. This example requires the `Microsoft ODBC Driver for SQL Server `_ for *Windows* in addition to the ``sqlalchemy`` and ``pyodbc`` packages for Python. >>> import streamlit as st >>> >>> conn = st.connection( ... "sql", ... dialect="mssql", ... driver="pyodbc", ... host="xxx.database.windows.net", ... database="xxx", ... username="xxx", ... query={ ... "driver": "ODBC Driver 18 for SQL Server", ... "authentication": "ActiveDirectoryInteractive", ... "encrypt": "yes", ... }, ... ) >>> >>> df = conn.query("SELECT * FROM pet_owners") >>> st.dataframe(df) """ def _connect(self, autocommit: bool = False, **kwargs: Any) -> Engine: import sqlalchemy kwargs = deepcopy(kwargs) conn_param_kwargs = extract_from_dict(_ALL_CONNECTION_PARAMS, kwargs) conn_params = ChainMap(conn_param_kwargs, self._secrets.to_dict()) if not len(conn_params): raise StreamlitAPIException( "Missing SQL DB connection configuration. " "Did you forget to set this in `secrets.toml` or as kwargs to `st.connection`?" ) if "url" in conn_params: url = sqlalchemy.engine.make_url(conn_params["url"]) else: for p in _REQUIRED_CONNECTION_PARAMS: if p not in conn_params: raise StreamlitAPIException(f"Missing SQL DB connection param: {p}") drivername = conn_params["dialect"] + ( f"+{conn_params['driver']}" if "driver" in conn_params else "" ) url = sqlalchemy.engine.URL.create( drivername=drivername, username=conn_params["username"], password=conn_params.get("password"), host=conn_params["host"], port=int(conn_params["port"]) if "port" in conn_params else None, database=conn_params.get("database"), query=conn_params.get("query", {}), ) create_engine_kwargs = ChainMap( kwargs, self._secrets.get("create_engine_kwargs", {}) ) eng = sqlalchemy.create_engine(url, **create_engine_kwargs) if autocommit: return cast("Engine", eng.execution_options(isolation_level="AUTOCOMMIT")) return cast("Engine", eng) def query( self, sql: str, *, # keyword-only arguments: show_spinner: bool | str = "Running `sql.query(...)`.", ttl: float | int | timedelta | None = None, index_col: str | list[str] | None = None, chunksize: int | None = None, params: Any | None = None, **kwargs: Any, ) -> DataFrame: """Run a read-only query. This method implements query result caching and simple error handling/retries. The caching behavior is identical to that of using ``@st.cache_data``. .. note:: Queries that are run without a specified ttl are cached indefinitely. All keyword arguments passed to this function are passed down to |pandas.read_sql|_, except ``ttl``. .. |pandas.read_sql| replace:: ``pandas.read_sql`` .. _pandas.read_sql: https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html Parameters ---------- sql : str The read-only SQL query to execute. show_spinner : boolean or string Enable the spinner. The default is to show a spinner when there is a "cache miss" and the cached resource is being created. If a string, the value of the show_spinner param will be used for the spinner text. ttl : float, int, timedelta or None The maximum number of seconds to keep results in the cache, or None if cached results should not expire. The default is None. index_col : str, list of str, or None Column(s) to set as index(MultiIndex). Default is None. chunksize : int or None If specified, return an iterator where chunksize is the number of rows to include in each chunk. Default is None. params : list, tuple, dict or None List of parameters to pass to the execute method. The syntax used to pass parameters is database driver dependent. Check your database driver documentation for which of the five syntax styles, described in `PEP 249 paramstyle `_, is supported. Default is None. **kwargs: dict Additional keyword arguments are passed to |pandas.read_sql|_. .. |pandas.read_sql| replace:: ``pandas.read_sql`` .. _pandas.read_sql: https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html Returns ------- pandas.DataFrame The result of running the query, formatted as a pandas DataFrame. Example ------- >>> import streamlit as st >>> >>> conn = st.connection("sql") >>> df = conn.query( ... "SELECT * FROM pet_owners WHERE owner = :owner", ... ttl=3600, ... params={"owner": "barbara"}, ... ) >>> st.dataframe(df) """ from sqlalchemy import text from sqlalchemy.exc import DatabaseError, InternalError, OperationalError from tenacity import ( retry, retry_if_exception_type, stop_after_attempt, wait_fixed, ) @retry( after=lambda _: self.reset(), stop=stop_after_attempt(3), reraise=True, retry=retry_if_exception_type( (DatabaseError, InternalError, OperationalError) ), wait=wait_fixed(1), ) def _query( sql: str, index_col: str | list[str] | None = None, chunksize: int | None = None, params: Any | None = None, **kwargs: Any, ) -> DataFrame: import pandas as pd instance = self._instance.connect() return cast( "DataFrame", pd.read_sql( text(sql), instance, index_col=index_col, chunksize=chunksize, params=params, **kwargs, ), ) # We modify our helper function's `__qualname__` here to work around default # `@st.cache_data` behavior. Otherwise, `.query()` being called with different # `ttl` values will reset the cache with each call, and the query caches won't # be scoped by connection. ttl_str = str( # Avoid adding extra `.` characters to `__qualname__` ttl ).replace(".", "_") _query.__qualname__ = f"{_query.__qualname__}_{self._connection_name}_{ttl_str}" _query = cache_data( show_spinner=show_spinner, ttl=ttl, )(_query) return _query( sql, index_col=index_col, chunksize=chunksize, params=params, **kwargs, ) def connect(self) -> SQLAlchemyConnection: """Call ``.connect()`` on the underlying SQLAlchemy Engine, returning a new\ connection object. Calling this method is equivalent to calling ``self._instance.connect()``. NOTE: This method should not be confused with the internal ``_connect`` method used to implement a Streamlit Connection. Returns ------- sqlalchemy.engine.Connection A new SQLAlchemy connection object. """ return self._instance.connect() @property def engine(self) -> Engine: """The underlying SQLAlchemy Engine. This is equivalent to accessing ``self._instance``. Returns ------- sqlalchemy.engine.base.Engine The underlying SQLAlchemy Engine. """ return self._instance @property def driver(self) -> str: """The name of the driver used by the underlying SQLAlchemy Engine. This is equivalent to accessing ``self._instance.driver``. Returns ------- str The name of the driver. For example, ``"pyodbc"`` or ``"psycopg2"``. """ return cast("str", self._instance.driver) @property def session(self) -> Session: """Return a SQLAlchemy Session. Users of this connection should use the contextmanager pattern for writes, transactions, and anything more complex than simple read queries. See the usage example below, which assumes we have a table ``numbers`` with a single integer column ``val``. The `SQLAlchemy `_ docs also contain much more information on the usage of sessions. Returns ------- sqlalchemy.orm.Session A SQLAlchemy Session. Example ------- >>> import streamlit as st >>> conn = st.connection("sql") >>> n = st.slider("Pick a number") >>> if st.button("Add the number!"): ... with conn.session as session: ... session.execute("INSERT INTO numbers (val) VALUES (:n);", {"n": n}) ... session.commit() """ from sqlalchemy.orm import Session return Session(self._instance)