team-10/env/Lib/site-packages/streamlit/elements/widgets/data_editor.py
2025-08-02 07:34:44 +02:00

1021 lines
37 KiB
Python

# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import json
from dataclasses import dataclass
from decimal import Decimal
from typing import (
TYPE_CHECKING,
Any,
Final,
Literal,
TypedDict,
TypeVar,
Union,
cast,
overload,
)
from typing_extensions import TypeAlias
from streamlit import dataframe_util
from streamlit import logger as _logger
from streamlit.elements.lib.column_config_utils import (
INDEX_IDENTIFIER,
ColumnConfigMapping,
ColumnConfigMappingInput,
ColumnDataKind,
DataframeSchema,
apply_data_specific_configs,
determine_dataframe_schema,
is_type_compatible,
marshall_column_config,
process_config_mapping,
update_column_config,
)
from streamlit.elements.lib.form_utils import current_form_id
from streamlit.elements.lib.pandas_styler_utils import marshall_styler
from streamlit.elements.lib.policies import check_widget_policies
from streamlit.elements.lib.utils import Key, compute_and_register_element_id, to_key
from streamlit.errors import StreamlitAPIException
from streamlit.proto.Arrow_pb2 import Arrow as ArrowProto
from streamlit.runtime.metrics_util import gather_metrics
from streamlit.runtime.scriptrunner_utils.script_run_context import get_script_run_ctx
from streamlit.runtime.state import (
WidgetArgs,
WidgetCallback,
WidgetKwargs,
register_widget,
)
from streamlit.type_util import is_type
from streamlit.util import calc_md5
if TYPE_CHECKING:
from collections.abc import Iterable, Mapping
import numpy as np
import pandas as pd
import pyarrow as pa
from pandas.io.formats.style import Styler
from streamlit.delta_generator import DeltaGenerator
_LOGGER: Final = _logger.get_logger(__name__)
# All formats that support direct editing, meaning that these
# formats will be returned with the same type when used with data_editor.
EditableData = TypeVar(
"EditableData",
bound=Union[
dataframe_util.DataFrameGenericAlias[Any], # covers DataFrame and Series
tuple[Any],
list[Any],
set[Any],
dict[str, Any],
# TODO(lukasmasuch): Add support for np.ndarray
# but it is not possible with np.ndarray.
# NDArray[Any] works, but is only available in numpy>1.20.
# TODO(lukasmasuch): Add support for pa.Table typing
# pa.Table does not work since it is a C-based class resulting in Any
],
)
# All data types supported by the data editor.
DataTypes: TypeAlias = Union[
"pd.DataFrame",
"pd.Series",
"pd.Index",
"Styler",
"pa.Table",
"np.ndarray[Any, np.dtype[np.float64]]",
tuple[Any],
list[Any],
set[Any],
dict[str, Any],
]
class EditingState(TypedDict, total=False):
"""
A dictionary representing the current state of the data editor.
Attributes
----------
edited_rows : Dict[int, Dict[str, str | int | float | bool | None]]
An hierarchical mapping of edited cells based on:
row position -> column name -> value.
added_rows : List[Dict[str, str | int | float | bool | None]]
A list of added rows, where each row is a mapping from column name to
the cell value.
deleted_rows : List[int]
A list of deleted rows, where each row is the numerical position of
the deleted row.
"""
edited_rows: dict[int, dict[str, str | int | float | bool | None]]
added_rows: list[dict[str, str | int | float | bool | None]]
deleted_rows: list[int]
@dataclass
class DataEditorSerde:
"""DataEditorSerde is used to serialize and deserialize the data editor state."""
def deserialize(self, ui_value: str | None) -> EditingState:
data_editor_state: EditingState = (
{
"edited_rows": {},
"added_rows": [],
"deleted_rows": [],
}
if ui_value is None
else json.loads(ui_value)
)
# Make sure that all editing state keys are present:
if "edited_rows" not in data_editor_state:
data_editor_state["edited_rows"] = {}
if "deleted_rows" not in data_editor_state:
data_editor_state["deleted_rows"] = []
if "added_rows" not in data_editor_state:
data_editor_state["added_rows"] = []
# Convert the keys (numerical row positions) to integers.
# The keys are strings because they are serialized to JSON.
data_editor_state["edited_rows"] = {
int(k): v for k, v in data_editor_state["edited_rows"].items()
}
return data_editor_state
def serialize(self, editing_state: EditingState) -> str:
return json.dumps(editing_state, default=str)
def _parse_value(
value: str | int | float | bool | None,
column_data_kind: ColumnDataKind,
) -> Any:
"""Convert a value to the correct type.
Parameters
----------
value : str | int | float | bool | None
The value to convert.
column_data_kind : ColumnDataKind
The determined data kind of the column. The column data kind refers to the
shared data type of the values in the column (e.g. int, float, str).
Returns
-------
The converted value.
"""
if value is None:
return None
import pandas as pd
try:
if column_data_kind == ColumnDataKind.STRING:
return str(value)
if column_data_kind == ColumnDataKind.INTEGER:
return int(value)
if column_data_kind == ColumnDataKind.FLOAT:
return float(value)
if column_data_kind == ColumnDataKind.BOOLEAN:
return bool(value)
if column_data_kind == ColumnDataKind.DECIMAL:
# Decimal theoretically can also be initialized via number values.
# However, using number values here seems to cause issues with Arrow
# serialization, once you try to render the returned dataframe.
return Decimal(str(value))
if column_data_kind == ColumnDataKind.TIMEDELTA:
return pd.Timedelta(value)
if column_data_kind in [
ColumnDataKind.DATETIME,
ColumnDataKind.DATE,
ColumnDataKind.TIME,
]:
datetime_value = pd.Timestamp(value)
if datetime_value is pd.NaT:
return None
if column_data_kind == ColumnDataKind.DATETIME:
return datetime_value
if column_data_kind == ColumnDataKind.DATE:
return datetime_value.date()
if column_data_kind == ColumnDataKind.TIME:
return datetime_value.time()
except (ValueError, pd.errors.ParserError) as ex:
_LOGGER.warning(
"Failed to parse value %s as %s.",
value,
column_data_kind,
exc_info=ex,
)
return None
return value
def _apply_cell_edits(
df: pd.DataFrame,
edited_rows: Mapping[int, Mapping[str, str | int | float | bool | None]],
dataframe_schema: DataframeSchema,
) -> None:
"""Apply cell edits to the provided dataframe (inplace).
Parameters
----------
df : pd.DataFrame
The dataframe to apply the cell edits to.
edited_rows : Mapping[int, Mapping[str, str | int | float | bool | None]]
A hierarchical mapping based on row position -> column name -> value
dataframe_schema: DataframeSchema
The schema of the dataframe.
"""
for row_id, row_changes in edited_rows.items():
row_pos = int(row_id)
for col_name, value in row_changes.items():
if col_name == INDEX_IDENTIFIER:
# The edited cell is part of the index
# TODO(lukasmasuch): To support multi-index in the future:
# use a tuple of values here instead of a single value
old_idx_value = df.index[row_pos]
new_idx_value = _parse_value(value, dataframe_schema[INDEX_IDENTIFIER])
df.rename(
index={old_idx_value: new_idx_value},
inplace=True, # noqa: PD002
)
else:
col_pos = df.columns.get_loc(col_name)
df.iloc[row_pos, col_pos] = _parse_value(
value, dataframe_schema[col_name]
)
def _parse_added_row(
df: pd.DataFrame,
added_row: dict[str, Any],
dataframe_schema: DataframeSchema,
) -> tuple[Any, list[Any]]:
"""Parse the added row into an optional index value and a list of row values."""
index_value = None
new_row: list[Any] = [None for _ in range(df.shape[1])]
for col_name, value in added_row.items():
if col_name == INDEX_IDENTIFIER:
# TODO(lukasmasuch): To support multi-index in the future:
# use a tuple of values here instead of a single value
index_value = _parse_value(value, dataframe_schema[INDEX_IDENTIFIER])
else:
col_pos = cast("int", df.columns.get_loc(col_name))
new_row[col_pos] = _parse_value(value, dataframe_schema[col_name])
return index_value, new_row
def _apply_row_additions(
df: pd.DataFrame,
added_rows: list[dict[str, Any]],
dataframe_schema: DataframeSchema,
) -> None:
"""Apply row additions to the provided dataframe (inplace).
Parameters
----------
df : pd.DataFrame
The dataframe to apply the row additions to.
added_rows : List[Dict[str, Any]]
A list of row additions. Each row addition is a dictionary with the
column position as key and the new cell value as value.
dataframe_schema: DataframeSchema
The schema of the dataframe.
"""
if not added_rows:
return
import pandas as pd
index_type: Literal["range", "integer", "other"] = "other"
# This is only used if the dataframe has a range or integer index that can be
# auto incremented:
index_stop: int | None = None
index_step: int | None = None
if isinstance(df.index, pd.RangeIndex):
# Extract metadata from the range index:
index_type = "range"
index_stop = cast("int", df.index.stop)
index_step = cast("int", df.index.step)
elif isinstance(df.index, pd.Index) and pd.api.types.is_integer_dtype(
df.index.dtype
):
# Get highest integer value and increment it by 1 to get unique index value.
index_type = "integer"
index_stop = 0 if df.index.empty else df.index.max() + 1
index_step = 1
for added_row in added_rows:
index_value, new_row = _parse_added_row(df, added_row, dataframe_schema)
if index_value is not None and index_type != "range":
# Case 1: Non-range index with an explicitly provided index value
# Add row using the user-provided index value.
# This handles any type of index that cannot be auto incremented.
# Note: this just overwrites the row in case the index value
# already exists. In the future, it would be better to
# require users to provide unique non-None values for the index with
# some kind of visual indications.
df.loc[index_value, :] = new_row
continue
if index_stop is not None and index_step is not None:
# Case 2: Range or integer index that can be auto incremented.
# Add row using the next value in the sequence
df.loc[index_stop, :] = new_row
# Increment to the next range index value
index_stop += index_step
continue
# Row cannot be added -> skip it and log a warning.
_LOGGER.warning(
"Cannot automatically add row for the index "
"of type %s without an explicit index value. Row addition skipped.",
type(df.index).__name__,
)
def _apply_row_deletions(df: pd.DataFrame, deleted_rows: list[int]) -> None:
"""Apply row deletions to the provided dataframe (inplace).
Parameters
----------
df : pd.DataFrame
The dataframe to apply the row deletions to.
deleted_rows : List[int]
A list of row numbers to delete.
"""
# Drop rows based in numeric row positions
df.drop(df.index[deleted_rows], inplace=True) # noqa: PD002
def _apply_dataframe_edits(
df: pd.DataFrame,
data_editor_state: EditingState,
dataframe_schema: DataframeSchema,
) -> None:
"""Apply edits to the provided dataframe (inplace).
This includes cell edits, row additions and row deletions.
Parameters
----------
df : pd.DataFrame
The dataframe to apply the edits to.
data_editor_state : EditingState
The editing state of the data editor component.
dataframe_schema: DataframeSchema
The schema of the dataframe.
"""
if data_editor_state.get("edited_rows"):
_apply_cell_edits(df, data_editor_state["edited_rows"], dataframe_schema)
if data_editor_state.get("deleted_rows"):
_apply_row_deletions(df, data_editor_state["deleted_rows"])
if data_editor_state.get("added_rows"):
# The addition of new rows needs to happen after the deletion to not have
# unexpected side-effects, like https://github.com/streamlit/streamlit/issues/8854
_apply_row_additions(df, data_editor_state["added_rows"], dataframe_schema)
def _is_supported_index(df_index: pd.Index) -> bool:
"""Check if the index is supported by the data editor component.
Parameters
----------
df_index : pd.Index
The index to check.
Returns
-------
bool
True if the index is supported, False otherwise.
"""
import pandas as pd
return (
type(df_index)
in [
pd.RangeIndex,
pd.Index,
pd.DatetimeIndex,
pd.CategoricalIndex,
# Interval type isn't editable currently:
# pd.IntervalIndex,
# Period type isn't editable currently:
# pd.PeriodIndex,
]
# We need to check these index types without importing, since they are
# deprecated and planned to be removed soon.
or is_type(df_index, "pandas.core.indexes.numeric.Int64Index")
or is_type(df_index, "pandas.core.indexes.numeric.Float64Index")
or is_type(df_index, "pandas.core.indexes.numeric.UInt64Index")
)
def _fix_column_headers(data_df: pd.DataFrame) -> None:
"""Fix the column headers of the provided dataframe inplace to work
correctly for data editing.
"""
import pandas as pd
if isinstance(data_df.columns, pd.MultiIndex):
# Flatten hierarchical column headers to a single level:
data_df.columns = [
"_".join(map(str, header)) for header in data_df.columns.to_flat_index()
]
elif pd.api.types.infer_dtype(data_df.columns) != "string":
# If the column names are not all strings, we need to convert them to strings
# to avoid issues with editing:
data_df.rename(
columns={column: str(column) for column in data_df.columns},
inplace=True, # noqa: PD002
)
def _check_column_names(data_df: pd.DataFrame) -> None:
"""Check if the column names in the provided dataframe are valid.
It's not allowed to have duplicate column names or column names that are
named ``_index``. If the column names are not valid, a ``StreamlitAPIException``
is raised.
"""
if data_df.columns.empty:
return
# Check if the column names are unique and raise an exception if not.
# Add the names of the duplicated columns to the exception message.
duplicated_columns = data_df.columns[data_df.columns.duplicated()]
if len(duplicated_columns) > 0:
raise StreamlitAPIException(
f"All column names are required to be unique for usage with data editor. "
f"The following column names are duplicated: {list(duplicated_columns)}. "
f"Please rename the duplicated columns in the provided data."
)
# Check if the column names are not named "_index" and raise an exception if so.
if INDEX_IDENTIFIER in data_df.columns:
raise StreamlitAPIException(
f"The column name '{INDEX_IDENTIFIER}' is reserved for the index column "
f"and can't be used for data columns. Please rename the column in the "
f"provided data."
)
def _check_type_compatibilities(
data_df: pd.DataFrame,
columns_config: ColumnConfigMapping,
dataframe_schema: DataframeSchema,
) -> None:
"""Check column type to data type compatibility.
Iterates the index and all columns of the dataframe to check if
the configured column types are compatible with the underlying data types.
Parameters
----------
data_df : pd.DataFrame
The dataframe to check the type compatibilities for.
columns_config : ColumnConfigMapping
A mapping of column to column configurations.
dataframe_schema : DataframeSchema
The schema of the dataframe.
Raises
------
StreamlitAPIException
If a configured column type is editable and not compatible with the
underlying data type.
"""
# TODO(lukasmasuch): Update this here to support multi-index in the future:
indices = [(INDEX_IDENTIFIER, data_df.index)]
for column in indices + list(data_df.items()):
column_name, _ = column
column_data_kind = dataframe_schema[column_name]
# TODO(lukasmasuch): support column config via numerical index here?
if column_name in columns_config:
column_config = columns_config[column_name]
if column_config.get("disabled") is True:
# Disabled columns are not checked for compatibility.
# This might change in the future.
continue
type_config = column_config.get("type_config")
if type_config is None:
continue
configured_column_type = type_config.get("type")
if configured_column_type is None:
continue
if is_type_compatible(configured_column_type, column_data_kind) is False:
raise StreamlitAPIException(
f"The configured column type `{configured_column_type}` for column "
f"`{column_name}` is not compatible for editing the underlying "
f"data type `{column_data_kind}`.\n\nYou have following options to "
f"fix this: 1) choose a compatible type 2) disable the column "
f"3) convert the column into a compatible data type."
)
class DataEditorMixin:
@overload
def data_editor(
self,
data: EditableData,
*,
width: int | None = None,
height: int | None = None,
use_container_width: bool | None = None,
hide_index: bool | None = None,
column_order: Iterable[str] | None = None,
column_config: ColumnConfigMappingInput | None = None,
num_rows: Literal["fixed", "dynamic"] = "fixed",
disabled: bool | Iterable[str] = False,
key: Key | None = None,
on_change: WidgetCallback | None = None,
args: WidgetArgs | None = None,
kwargs: WidgetKwargs | None = None,
row_height: int | None = None,
) -> EditableData:
pass
@overload
def data_editor(
self,
data: Any,
*,
width: int | None = None,
height: int | None = None,
use_container_width: bool | None = None,
hide_index: bool | None = None,
column_order: Iterable[str] | None = None,
column_config: ColumnConfigMappingInput | None = None,
num_rows: Literal["fixed", "dynamic"] = "fixed",
disabled: bool | Iterable[str] = False,
key: Key | None = None,
on_change: WidgetCallback | None = None,
args: WidgetArgs | None = None,
kwargs: WidgetKwargs | None = None,
row_height: int | None = None,
) -> pd.DataFrame:
pass
@gather_metrics("data_editor")
def data_editor(
self,
data: DataTypes,
*,
width: int | None = None,
height: int | None = None,
use_container_width: bool | None = None,
hide_index: bool | None = None,
column_order: Iterable[str] | None = None,
column_config: ColumnConfigMappingInput | None = None,
num_rows: Literal["fixed", "dynamic"] = "fixed",
disabled: bool | Iterable[str] = False,
key: Key | None = None,
on_change: WidgetCallback | None = None,
args: WidgetArgs | None = None,
kwargs: WidgetKwargs | None = None,
row_height: int | None = None,
) -> DataTypes:
"""Display a data editor widget.
The data editor widget allows you to edit dataframes and many other data structures in a table-like UI.
Parameters
----------
data : Anything supported by st.dataframe
The data to edit in the data editor.
.. note::
- Styles from ``pandas.Styler`` will only be applied to non-editable columns.
- Text and number formatting from ``column_config`` always takes
precedence over text and number formatting from ``pandas.Styler``.
- Mixing data types within a column can make the column uneditable.
- Additionally, the following data types are not yet supported for editing:
``complex``, ``list``, ``tuple``, ``bytes``, ``bytearray``,
``memoryview``, ``dict``, ``set``, ``frozenset``,
``fractions.Fraction``, ``pandas.Interval``, and
``pandas.Period``.
- To prevent overflow in JavaScript, columns containing
``datetime.timedelta`` and ``pandas.Timedelta`` values will
default to uneditable, but this can be changed through column
configuration.
width : int or None
Desired width of the data editor expressed in pixels. If ``width``
is ``None`` (default), Streamlit sets the data editor width to fit
its contents up to the width of the parent container. If ``width``
is greater than the width of the parent container, Streamlit sets
the data editor width to match the width of the parent container.
height : int or None
Desired height of the data editor expressed in pixels. If ``height``
is ``None`` (default), Streamlit sets the height to show at most
ten rows. Vertical scrolling within the data editor element is
enabled when the height does not accommodate all rows.
use_container_width : bool
Whether to override ``width`` with the width of the parent
container. If this is ``True`` (default), Streamlit sets the width
of the data editor to match the width of the parent container. If
this is ``False``, Streamlit sets the data editor's width according
to ``width``.
hide_index : bool or None
Whether to hide the index column(s). If ``hide_index`` is ``None``
(default), the visibility of index columns is automatically
determined based on the data.
column_order : Iterable of str or None
Specifies the display order of columns. This also affects which columns are
visible. For example, ``column_order=("col2", "col1")`` will display 'col2'
first, followed by 'col1', and will hide all other non-index columns. If
None (default), the order is inherited from the original data structure.
column_config : dict or None
Configures how columns are displayed, e.g. their title, visibility, type, or
format, as well as editing properties such as min/max value or step.
This needs to be a dictionary where each key is a column name and the value
is one of:
- ``None`` to hide the column.
- A string to set the display label of the column.
- One of the column types defined under ``st.column_config``, e.g.
``st.column_config.NumberColumn("Dollar values", format="$ %d")`` to show
a column as dollar amounts. See more info on the available column types
and config options `here <https://docs.streamlit.io/develop/api-reference/data/st.column_config>`_.
To configure the index column(s), use ``_index`` as the column name.
num_rows : "fixed" or "dynamic"
Specifies if the user can add and delete rows in the data editor.
If "fixed", the user cannot add or delete rows. If "dynamic", the user can
add and delete rows in the data editor, but column sorting is disabled.
Defaults to "fixed".
disabled : bool or Iterable of str
Controls the editing of columns. If True, editing is disabled for all columns.
If an Iterable of column names is provided (e.g., ``disabled=("col1", "col2"))``,
only the specified columns will be disabled for editing. If False (default),
all columns that support editing are editable.
key : str
An optional string to use as the unique key for this widget. If this
is omitted, a key will be generated for the widget based on its
content. No two widgets may have the same key.
on_change : callable
An optional callback invoked when this data_editor's value changes.
args : tuple
An optional tuple of args to pass to the callback.
kwargs : dict
An optional dict of kwargs to pass to the callback.
row_height : int or None
The height of each row in the data editor in pixels. If ``row_height``
is ``None`` (default), Streamlit will use a default row height,
which fits one line of text.
Returns
-------
pandas.DataFrame, pandas.Series, pyarrow.Table, numpy.ndarray, list, set, tuple, or dict.
The edited data. The edited data is returned in its original data type if
it corresponds to any of the supported return types. All other data types
are returned as a ``pandas.DataFrame``.
Examples
--------
>>> import streamlit as st
>>> import pandas as pd
>>>
>>> df = pd.DataFrame(
>>> [
>>> {"command": "st.selectbox", "rating": 4, "is_widget": True},
>>> {"command": "st.balloons", "rating": 5, "is_widget": False},
>>> {"command": "st.time_input", "rating": 3, "is_widget": True},
>>> ]
>>> )
>>> edited_df = st.data_editor(df)
>>>
>>> favorite_command = edited_df.loc[edited_df["rating"].idxmax()]["command"]
>>> st.markdown(f"Your favorite command is **{favorite_command}** 🎈")
.. output::
https://doc-data-editor.streamlit.app/
height: 350px
You can also allow the user to add and delete rows by setting ``num_rows`` to "dynamic":
>>> import streamlit as st
>>> import pandas as pd
>>>
>>> df = pd.DataFrame(
>>> [
>>> {"command": "st.selectbox", "rating": 4, "is_widget": True},
>>> {"command": "st.balloons", "rating": 5, "is_widget": False},
>>> {"command": "st.time_input", "rating": 3, "is_widget": True},
>>> ]
>>> )
>>> edited_df = st.data_editor(df, num_rows="dynamic")
>>>
>>> favorite_command = edited_df.loc[edited_df["rating"].idxmax()]["command"]
>>> st.markdown(f"Your favorite command is **{favorite_command}** 🎈")
.. output::
https://doc-data-editor1.streamlit.app/
height: 450px
Or you can customize the data editor via ``column_config``, ``hide_index``,
``column_order``, or ``disabled``:
>>> import pandas as pd
>>> import streamlit as st
>>>
>>> df = pd.DataFrame(
>>> [
>>> {"command": "st.selectbox", "rating": 4, "is_widget": True},
>>> {"command": "st.balloons", "rating": 5, "is_widget": False},
>>> {"command": "st.time_input", "rating": 3, "is_widget": True},
>>> ]
>>> )
>>> edited_df = st.data_editor(
>>> df,
>>> column_config={
>>> "command": "Streamlit Command",
>>> "rating": st.column_config.NumberColumn(
>>> "Your rating",
>>> help="How much do you like this command (1-5)?",
>>> min_value=1,
>>> max_value=5,
>>> step=1,
>>> format="%d",
>>> ),
>>> "is_widget": "Widget ?",
>>> },
>>> disabled=["command", "is_widget"],
>>> hide_index=True,
>>> )
>>>
>>> favorite_command = edited_df.loc[edited_df["rating"].idxmax()]["command"]
>>> st.markdown(f"Your favorite command is **{favorite_command}** 🎈")
.. output::
https://doc-data-editor-config.streamlit.app/
height: 350px
"""
# Lazy-loaded import
import pandas as pd
import pyarrow as pa
key = to_key(key)
check_widget_policies(
self.dg,
key,
on_change,
default_value=None,
writes_allowed=False,
)
if column_order is not None:
column_order = list(column_order)
column_config_mapping: ColumnConfigMapping = {}
data_format = dataframe_util.determine_data_format(data)
if data_format == dataframe_util.DataFormat.UNKNOWN:
raise StreamlitAPIException(
f"The data type ({type(data).__name__}) or format is not supported by "
"the data editor. Please convert your data into a Pandas Dataframe or "
"another supported data format."
)
# The dataframe should always be a copy of the original data
# since we will apply edits directly to it.
data_df = dataframe_util.convert_anything_to_pandas_df(data, ensure_copy=True)
# Check if the index is supported.
if not _is_supported_index(data_df.index):
raise StreamlitAPIException(
f"The type of the dataframe index - {type(data_df.index).__name__} - is not "
"yet supported by the data editor."
)
# Check if the column names are valid and unique.
_check_column_names(data_df)
# Convert the user provided column config into the frontend compatible format:
column_config_mapping = process_config_mapping(column_config)
# Deactivate editing for columns that are not compatible with arrow
for column_name, column_data in data_df.items():
if dataframe_util.is_colum_type_arrow_incompatible(column_data):
update_column_config(
column_config_mapping, column_name, {"disabled": True}
)
# Convert incompatible type to string
data_df[column_name] = column_data.astype("string")
apply_data_specific_configs(column_config_mapping, data_format)
# Fix the column headers to work correctly for data editing:
_fix_column_headers(data_df)
has_range_index = isinstance(data_df.index, pd.RangeIndex)
if not has_range_index:
# If the index is not a range index, we will configure it as required
# since the user is required to provide a (unique) value for editing.
update_column_config(
column_config_mapping, INDEX_IDENTIFIER, {"required": True}
)
if hide_index is None and has_range_index and num_rows == "dynamic":
# Temporary workaround:
# We hide range indices if num_rows is dynamic.
# since the current way of handling this index during editing is a
# bit confusing. The user can still decide to show the index by
# setting hide_index explicitly to False.
hide_index = True
if hide_index is not None:
update_column_config(
column_config_mapping, INDEX_IDENTIFIER, {"hidden": hide_index}
)
# If disabled not a boolean, we assume it is a list of columns to disable.
# This gets translated into the columns configuration:
if not isinstance(disabled, bool):
for column in disabled:
update_column_config(column_config_mapping, column, {"disabled": True})
# Convert the dataframe to an arrow table which is used as the main
# serialization format for sending the data to the frontend.
# We also utilize the arrow schema to determine the data kinds of every column.
arrow_table = pa.Table.from_pandas(data_df)
# Determine the dataframe schema which is required for parsing edited values
# and for checking type compatibilities.
dataframe_schema = determine_dataframe_schema(data_df, arrow_table.schema)
# Check if all configured column types are compatible with the underlying data.
# Throws an exception if any of the configured types are incompatible.
_check_type_compatibilities(data_df, column_config_mapping, dataframe_schema)
arrow_bytes = dataframe_util.convert_arrow_table_to_arrow_bytes(arrow_table)
# We want to do this as early as possible to avoid introducing nondeterminism,
# but it isn't clear how much processing is needed to have the data in a
# format that will hash consistently, so we do it late here to have it
# as close as possible to how it used to be.
ctx = get_script_run_ctx()
element_id = compute_and_register_element_id(
"data_editor",
user_key=key,
form_id=current_form_id(self.dg),
dg=self.dg,
data=arrow_bytes,
width=width,
height=height,
use_container_width=use_container_width,
column_order=column_order,
column_config_mapping=str(column_config_mapping),
num_rows=num_rows,
row_height=row_height,
)
proto = ArrowProto()
proto.id = element_id
if use_container_width is None:
# If use_container_width was not explicitly set by the user, we set
# it to True if width was not set explicitly, and False otherwise.
use_container_width = width is None
proto.use_container_width = use_container_width
if width:
proto.width = width
if height:
proto.height = height
if row_height:
proto.row_height = row_height
if column_order:
proto.column_order[:] = column_order
# Only set disabled to true if it is actually true
# It can also be a list of columns, which should result in false here.
proto.disabled = disabled is True
proto.editing_mode = (
ArrowProto.EditingMode.DYNAMIC
if num_rows == "dynamic"
else ArrowProto.EditingMode.FIXED
)
proto.form_id = current_form_id(self.dg)
if dataframe_util.is_pandas_styler(data):
# Pandas styler will only work for non-editable/disabled columns.
# Get first 10 chars of md5 hash of the key or delta path as styler uuid
# and set it as styler uuid.
# We are only using the first 10 chars to keep the uuid short since
# it will be used for all the cells in the dataframe. Therefore, this
# might have a significant impact on the message size. 10 chars
# should be good enough to avoid potential collisions in this case.
# Even on collisions, there should not be a big issue with the
# rendering in the data editor.
styler_uuid = calc_md5(key or self.dg._get_delta_path_str())[:10]
data.set_uuid(styler_uuid)
marshall_styler(proto, data, styler_uuid)
proto.data = arrow_bytes
marshall_column_config(proto, column_config_mapping)
serde = DataEditorSerde()
widget_state = register_widget(
proto.id,
on_change_handler=on_change,
args=args,
kwargs=kwargs,
deserializer=serde.deserialize,
serializer=serde.serialize,
ctx=ctx,
value_type="string_value",
)
_apply_dataframe_edits(data_df, widget_state.value, dataframe_schema)
self.dg._enqueue("arrow_data_frame", proto)
return dataframe_util.convert_pandas_df_to_data_format(data_df, data_format)
@property
def dg(self) -> DeltaGenerator:
"""Get our DeltaGenerator."""
return cast("DeltaGenerator", self)