1021 lines
37 KiB
Python
1021 lines
37 KiB
Python
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass
|
|
from decimal import Decimal
|
|
from typing import (
|
|
TYPE_CHECKING,
|
|
Any,
|
|
Final,
|
|
Literal,
|
|
TypedDict,
|
|
TypeVar,
|
|
Union,
|
|
cast,
|
|
overload,
|
|
)
|
|
|
|
from typing_extensions import TypeAlias
|
|
|
|
from streamlit import dataframe_util
|
|
from streamlit import logger as _logger
|
|
from streamlit.elements.lib.column_config_utils import (
|
|
INDEX_IDENTIFIER,
|
|
ColumnConfigMapping,
|
|
ColumnConfigMappingInput,
|
|
ColumnDataKind,
|
|
DataframeSchema,
|
|
apply_data_specific_configs,
|
|
determine_dataframe_schema,
|
|
is_type_compatible,
|
|
marshall_column_config,
|
|
process_config_mapping,
|
|
update_column_config,
|
|
)
|
|
from streamlit.elements.lib.form_utils import current_form_id
|
|
from streamlit.elements.lib.pandas_styler_utils import marshall_styler
|
|
from streamlit.elements.lib.policies import check_widget_policies
|
|
from streamlit.elements.lib.utils import Key, compute_and_register_element_id, to_key
|
|
from streamlit.errors import StreamlitAPIException
|
|
from streamlit.proto.Arrow_pb2 import Arrow as ArrowProto
|
|
from streamlit.runtime.metrics_util import gather_metrics
|
|
from streamlit.runtime.scriptrunner_utils.script_run_context import get_script_run_ctx
|
|
from streamlit.runtime.state import (
|
|
WidgetArgs,
|
|
WidgetCallback,
|
|
WidgetKwargs,
|
|
register_widget,
|
|
)
|
|
from streamlit.type_util import is_type
|
|
from streamlit.util import calc_md5
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Iterable, Mapping
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pyarrow as pa
|
|
from pandas.io.formats.style import Styler
|
|
|
|
from streamlit.delta_generator import DeltaGenerator
|
|
|
|
_LOGGER: Final = _logger.get_logger(__name__)
|
|
|
|
# All formats that support direct editing, meaning that these
|
|
# formats will be returned with the same type when used with data_editor.
|
|
EditableData = TypeVar(
|
|
"EditableData",
|
|
bound=Union[
|
|
dataframe_util.DataFrameGenericAlias[Any], # covers DataFrame and Series
|
|
tuple[Any],
|
|
list[Any],
|
|
set[Any],
|
|
dict[str, Any],
|
|
# TODO(lukasmasuch): Add support for np.ndarray
|
|
# but it is not possible with np.ndarray.
|
|
# NDArray[Any] works, but is only available in numpy>1.20.
|
|
# TODO(lukasmasuch): Add support for pa.Table typing
|
|
# pa.Table does not work since it is a C-based class resulting in Any
|
|
],
|
|
)
|
|
|
|
|
|
# All data types supported by the data editor.
|
|
DataTypes: TypeAlias = Union[
|
|
"pd.DataFrame",
|
|
"pd.Series",
|
|
"pd.Index",
|
|
"Styler",
|
|
"pa.Table",
|
|
"np.ndarray[Any, np.dtype[np.float64]]",
|
|
tuple[Any],
|
|
list[Any],
|
|
set[Any],
|
|
dict[str, Any],
|
|
]
|
|
|
|
|
|
class EditingState(TypedDict, total=False):
|
|
"""
|
|
A dictionary representing the current state of the data editor.
|
|
|
|
Attributes
|
|
----------
|
|
edited_rows : Dict[int, Dict[str, str | int | float | bool | None]]
|
|
An hierarchical mapping of edited cells based on:
|
|
row position -> column name -> value.
|
|
|
|
added_rows : List[Dict[str, str | int | float | bool | None]]
|
|
A list of added rows, where each row is a mapping from column name to
|
|
the cell value.
|
|
|
|
deleted_rows : List[int]
|
|
A list of deleted rows, where each row is the numerical position of
|
|
the deleted row.
|
|
"""
|
|
|
|
edited_rows: dict[int, dict[str, str | int | float | bool | None]]
|
|
added_rows: list[dict[str, str | int | float | bool | None]]
|
|
deleted_rows: list[int]
|
|
|
|
|
|
@dataclass
|
|
class DataEditorSerde:
|
|
"""DataEditorSerde is used to serialize and deserialize the data editor state."""
|
|
|
|
def deserialize(self, ui_value: str | None) -> EditingState:
|
|
data_editor_state: EditingState = (
|
|
{
|
|
"edited_rows": {},
|
|
"added_rows": [],
|
|
"deleted_rows": [],
|
|
}
|
|
if ui_value is None
|
|
else json.loads(ui_value)
|
|
)
|
|
|
|
# Make sure that all editing state keys are present:
|
|
if "edited_rows" not in data_editor_state:
|
|
data_editor_state["edited_rows"] = {}
|
|
|
|
if "deleted_rows" not in data_editor_state:
|
|
data_editor_state["deleted_rows"] = []
|
|
|
|
if "added_rows" not in data_editor_state:
|
|
data_editor_state["added_rows"] = []
|
|
|
|
# Convert the keys (numerical row positions) to integers.
|
|
# The keys are strings because they are serialized to JSON.
|
|
data_editor_state["edited_rows"] = {
|
|
int(k): v for k, v in data_editor_state["edited_rows"].items()
|
|
}
|
|
return data_editor_state
|
|
|
|
def serialize(self, editing_state: EditingState) -> str:
|
|
return json.dumps(editing_state, default=str)
|
|
|
|
|
|
def _parse_value(
|
|
value: str | int | float | bool | None,
|
|
column_data_kind: ColumnDataKind,
|
|
) -> Any:
|
|
"""Convert a value to the correct type.
|
|
|
|
Parameters
|
|
----------
|
|
value : str | int | float | bool | None
|
|
The value to convert.
|
|
|
|
column_data_kind : ColumnDataKind
|
|
The determined data kind of the column. The column data kind refers to the
|
|
shared data type of the values in the column (e.g. int, float, str).
|
|
|
|
Returns
|
|
-------
|
|
The converted value.
|
|
"""
|
|
if value is None:
|
|
return None
|
|
|
|
import pandas as pd
|
|
|
|
try:
|
|
if column_data_kind == ColumnDataKind.STRING:
|
|
return str(value)
|
|
|
|
if column_data_kind == ColumnDataKind.INTEGER:
|
|
return int(value)
|
|
|
|
if column_data_kind == ColumnDataKind.FLOAT:
|
|
return float(value)
|
|
|
|
if column_data_kind == ColumnDataKind.BOOLEAN:
|
|
return bool(value)
|
|
|
|
if column_data_kind == ColumnDataKind.DECIMAL:
|
|
# Decimal theoretically can also be initialized via number values.
|
|
# However, using number values here seems to cause issues with Arrow
|
|
# serialization, once you try to render the returned dataframe.
|
|
return Decimal(str(value))
|
|
|
|
if column_data_kind == ColumnDataKind.TIMEDELTA:
|
|
return pd.Timedelta(value)
|
|
|
|
if column_data_kind in [
|
|
ColumnDataKind.DATETIME,
|
|
ColumnDataKind.DATE,
|
|
ColumnDataKind.TIME,
|
|
]:
|
|
datetime_value = pd.Timestamp(value)
|
|
|
|
if datetime_value is pd.NaT:
|
|
return None
|
|
|
|
if column_data_kind == ColumnDataKind.DATETIME:
|
|
return datetime_value
|
|
|
|
if column_data_kind == ColumnDataKind.DATE:
|
|
return datetime_value.date()
|
|
|
|
if column_data_kind == ColumnDataKind.TIME:
|
|
return datetime_value.time()
|
|
|
|
except (ValueError, pd.errors.ParserError) as ex:
|
|
_LOGGER.warning(
|
|
"Failed to parse value %s as %s.",
|
|
value,
|
|
column_data_kind,
|
|
exc_info=ex,
|
|
)
|
|
return None
|
|
return value
|
|
|
|
|
|
def _apply_cell_edits(
|
|
df: pd.DataFrame,
|
|
edited_rows: Mapping[int, Mapping[str, str | int | float | bool | None]],
|
|
dataframe_schema: DataframeSchema,
|
|
) -> None:
|
|
"""Apply cell edits to the provided dataframe (inplace).
|
|
|
|
Parameters
|
|
----------
|
|
df : pd.DataFrame
|
|
The dataframe to apply the cell edits to.
|
|
|
|
edited_rows : Mapping[int, Mapping[str, str | int | float | bool | None]]
|
|
A hierarchical mapping based on row position -> column name -> value
|
|
|
|
dataframe_schema: DataframeSchema
|
|
The schema of the dataframe.
|
|
"""
|
|
for row_id, row_changes in edited_rows.items():
|
|
row_pos = int(row_id)
|
|
for col_name, value in row_changes.items():
|
|
if col_name == INDEX_IDENTIFIER:
|
|
# The edited cell is part of the index
|
|
# TODO(lukasmasuch): To support multi-index in the future:
|
|
# use a tuple of values here instead of a single value
|
|
old_idx_value = df.index[row_pos]
|
|
new_idx_value = _parse_value(value, dataframe_schema[INDEX_IDENTIFIER])
|
|
df.rename(
|
|
index={old_idx_value: new_idx_value},
|
|
inplace=True, # noqa: PD002
|
|
)
|
|
else:
|
|
col_pos = df.columns.get_loc(col_name)
|
|
df.iloc[row_pos, col_pos] = _parse_value(
|
|
value, dataframe_schema[col_name]
|
|
)
|
|
|
|
|
|
def _parse_added_row(
|
|
df: pd.DataFrame,
|
|
added_row: dict[str, Any],
|
|
dataframe_schema: DataframeSchema,
|
|
) -> tuple[Any, list[Any]]:
|
|
"""Parse the added row into an optional index value and a list of row values."""
|
|
index_value = None
|
|
new_row: list[Any] = [None for _ in range(df.shape[1])]
|
|
for col_name, value in added_row.items():
|
|
if col_name == INDEX_IDENTIFIER:
|
|
# TODO(lukasmasuch): To support multi-index in the future:
|
|
# use a tuple of values here instead of a single value
|
|
index_value = _parse_value(value, dataframe_schema[INDEX_IDENTIFIER])
|
|
else:
|
|
col_pos = cast("int", df.columns.get_loc(col_name))
|
|
new_row[col_pos] = _parse_value(value, dataframe_schema[col_name])
|
|
|
|
return index_value, new_row
|
|
|
|
|
|
def _apply_row_additions(
|
|
df: pd.DataFrame,
|
|
added_rows: list[dict[str, Any]],
|
|
dataframe_schema: DataframeSchema,
|
|
) -> None:
|
|
"""Apply row additions to the provided dataframe (inplace).
|
|
|
|
Parameters
|
|
----------
|
|
df : pd.DataFrame
|
|
The dataframe to apply the row additions to.
|
|
|
|
added_rows : List[Dict[str, Any]]
|
|
A list of row additions. Each row addition is a dictionary with the
|
|
column position as key and the new cell value as value.
|
|
|
|
dataframe_schema: DataframeSchema
|
|
The schema of the dataframe.
|
|
"""
|
|
|
|
if not added_rows:
|
|
return
|
|
|
|
import pandas as pd
|
|
|
|
index_type: Literal["range", "integer", "other"] = "other"
|
|
# This is only used if the dataframe has a range or integer index that can be
|
|
# auto incremented:
|
|
index_stop: int | None = None
|
|
index_step: int | None = None
|
|
|
|
if isinstance(df.index, pd.RangeIndex):
|
|
# Extract metadata from the range index:
|
|
index_type = "range"
|
|
index_stop = cast("int", df.index.stop)
|
|
index_step = cast("int", df.index.step)
|
|
elif isinstance(df.index, pd.Index) and pd.api.types.is_integer_dtype(
|
|
df.index.dtype
|
|
):
|
|
# Get highest integer value and increment it by 1 to get unique index value.
|
|
index_type = "integer"
|
|
index_stop = 0 if df.index.empty else df.index.max() + 1
|
|
index_step = 1
|
|
|
|
for added_row in added_rows:
|
|
index_value, new_row = _parse_added_row(df, added_row, dataframe_schema)
|
|
|
|
if index_value is not None and index_type != "range":
|
|
# Case 1: Non-range index with an explicitly provided index value
|
|
# Add row using the user-provided index value.
|
|
# This handles any type of index that cannot be auto incremented.
|
|
|
|
# Note: this just overwrites the row in case the index value
|
|
# already exists. In the future, it would be better to
|
|
# require users to provide unique non-None values for the index with
|
|
# some kind of visual indications.
|
|
df.loc[index_value, :] = new_row
|
|
continue
|
|
|
|
if index_stop is not None and index_step is not None:
|
|
# Case 2: Range or integer index that can be auto incremented.
|
|
# Add row using the next value in the sequence
|
|
df.loc[index_stop, :] = new_row
|
|
# Increment to the next range index value
|
|
index_stop += index_step
|
|
continue
|
|
|
|
# Row cannot be added -> skip it and log a warning.
|
|
_LOGGER.warning(
|
|
"Cannot automatically add row for the index "
|
|
"of type %s without an explicit index value. Row addition skipped.",
|
|
type(df.index).__name__,
|
|
)
|
|
|
|
|
|
def _apply_row_deletions(df: pd.DataFrame, deleted_rows: list[int]) -> None:
|
|
"""Apply row deletions to the provided dataframe (inplace).
|
|
|
|
Parameters
|
|
----------
|
|
df : pd.DataFrame
|
|
The dataframe to apply the row deletions to.
|
|
|
|
deleted_rows : List[int]
|
|
A list of row numbers to delete.
|
|
"""
|
|
# Drop rows based in numeric row positions
|
|
df.drop(df.index[deleted_rows], inplace=True) # noqa: PD002
|
|
|
|
|
|
def _apply_dataframe_edits(
|
|
df: pd.DataFrame,
|
|
data_editor_state: EditingState,
|
|
dataframe_schema: DataframeSchema,
|
|
) -> None:
|
|
"""Apply edits to the provided dataframe (inplace).
|
|
|
|
This includes cell edits, row additions and row deletions.
|
|
|
|
Parameters
|
|
----------
|
|
df : pd.DataFrame
|
|
The dataframe to apply the edits to.
|
|
|
|
data_editor_state : EditingState
|
|
The editing state of the data editor component.
|
|
|
|
dataframe_schema: DataframeSchema
|
|
The schema of the dataframe.
|
|
"""
|
|
if data_editor_state.get("edited_rows"):
|
|
_apply_cell_edits(df, data_editor_state["edited_rows"], dataframe_schema)
|
|
|
|
if data_editor_state.get("deleted_rows"):
|
|
_apply_row_deletions(df, data_editor_state["deleted_rows"])
|
|
|
|
if data_editor_state.get("added_rows"):
|
|
# The addition of new rows needs to happen after the deletion to not have
|
|
# unexpected side-effects, like https://github.com/streamlit/streamlit/issues/8854
|
|
_apply_row_additions(df, data_editor_state["added_rows"], dataframe_schema)
|
|
|
|
|
|
def _is_supported_index(df_index: pd.Index) -> bool:
|
|
"""Check if the index is supported by the data editor component.
|
|
|
|
Parameters
|
|
----------
|
|
df_index : pd.Index
|
|
The index to check.
|
|
|
|
Returns
|
|
-------
|
|
bool
|
|
True if the index is supported, False otherwise.
|
|
"""
|
|
import pandas as pd
|
|
|
|
return (
|
|
type(df_index)
|
|
in [
|
|
pd.RangeIndex,
|
|
pd.Index,
|
|
pd.DatetimeIndex,
|
|
pd.CategoricalIndex,
|
|
# Interval type isn't editable currently:
|
|
# pd.IntervalIndex,
|
|
# Period type isn't editable currently:
|
|
# pd.PeriodIndex,
|
|
]
|
|
# We need to check these index types without importing, since they are
|
|
# deprecated and planned to be removed soon.
|
|
or is_type(df_index, "pandas.core.indexes.numeric.Int64Index")
|
|
or is_type(df_index, "pandas.core.indexes.numeric.Float64Index")
|
|
or is_type(df_index, "pandas.core.indexes.numeric.UInt64Index")
|
|
)
|
|
|
|
|
|
def _fix_column_headers(data_df: pd.DataFrame) -> None:
|
|
"""Fix the column headers of the provided dataframe inplace to work
|
|
correctly for data editing.
|
|
"""
|
|
import pandas as pd
|
|
|
|
if isinstance(data_df.columns, pd.MultiIndex):
|
|
# Flatten hierarchical column headers to a single level:
|
|
data_df.columns = [
|
|
"_".join(map(str, header)) for header in data_df.columns.to_flat_index()
|
|
]
|
|
elif pd.api.types.infer_dtype(data_df.columns) != "string":
|
|
# If the column names are not all strings, we need to convert them to strings
|
|
# to avoid issues with editing:
|
|
data_df.rename(
|
|
columns={column: str(column) for column in data_df.columns},
|
|
inplace=True, # noqa: PD002
|
|
)
|
|
|
|
|
|
def _check_column_names(data_df: pd.DataFrame) -> None:
|
|
"""Check if the column names in the provided dataframe are valid.
|
|
|
|
It's not allowed to have duplicate column names or column names that are
|
|
named ``_index``. If the column names are not valid, a ``StreamlitAPIException``
|
|
is raised.
|
|
"""
|
|
|
|
if data_df.columns.empty:
|
|
return
|
|
|
|
# Check if the column names are unique and raise an exception if not.
|
|
# Add the names of the duplicated columns to the exception message.
|
|
duplicated_columns = data_df.columns[data_df.columns.duplicated()]
|
|
if len(duplicated_columns) > 0:
|
|
raise StreamlitAPIException(
|
|
f"All column names are required to be unique for usage with data editor. "
|
|
f"The following column names are duplicated: {list(duplicated_columns)}. "
|
|
f"Please rename the duplicated columns in the provided data."
|
|
)
|
|
|
|
# Check if the column names are not named "_index" and raise an exception if so.
|
|
if INDEX_IDENTIFIER in data_df.columns:
|
|
raise StreamlitAPIException(
|
|
f"The column name '{INDEX_IDENTIFIER}' is reserved for the index column "
|
|
f"and can't be used for data columns. Please rename the column in the "
|
|
f"provided data."
|
|
)
|
|
|
|
|
|
def _check_type_compatibilities(
|
|
data_df: pd.DataFrame,
|
|
columns_config: ColumnConfigMapping,
|
|
dataframe_schema: DataframeSchema,
|
|
) -> None:
|
|
"""Check column type to data type compatibility.
|
|
|
|
Iterates the index and all columns of the dataframe to check if
|
|
the configured column types are compatible with the underlying data types.
|
|
|
|
Parameters
|
|
----------
|
|
data_df : pd.DataFrame
|
|
The dataframe to check the type compatibilities for.
|
|
|
|
columns_config : ColumnConfigMapping
|
|
A mapping of column to column configurations.
|
|
|
|
dataframe_schema : DataframeSchema
|
|
The schema of the dataframe.
|
|
|
|
Raises
|
|
------
|
|
StreamlitAPIException
|
|
If a configured column type is editable and not compatible with the
|
|
underlying data type.
|
|
"""
|
|
# TODO(lukasmasuch): Update this here to support multi-index in the future:
|
|
indices = [(INDEX_IDENTIFIER, data_df.index)]
|
|
|
|
for column in indices + list(data_df.items()):
|
|
column_name, _ = column
|
|
column_data_kind = dataframe_schema[column_name]
|
|
|
|
# TODO(lukasmasuch): support column config via numerical index here?
|
|
if column_name in columns_config:
|
|
column_config = columns_config[column_name]
|
|
if column_config.get("disabled") is True:
|
|
# Disabled columns are not checked for compatibility.
|
|
# This might change in the future.
|
|
continue
|
|
|
|
type_config = column_config.get("type_config")
|
|
|
|
if type_config is None:
|
|
continue
|
|
|
|
configured_column_type = type_config.get("type")
|
|
|
|
if configured_column_type is None:
|
|
continue
|
|
|
|
if is_type_compatible(configured_column_type, column_data_kind) is False:
|
|
raise StreamlitAPIException(
|
|
f"The configured column type `{configured_column_type}` for column "
|
|
f"`{column_name}` is not compatible for editing the underlying "
|
|
f"data type `{column_data_kind}`.\n\nYou have following options to "
|
|
f"fix this: 1) choose a compatible type 2) disable the column "
|
|
f"3) convert the column into a compatible data type."
|
|
)
|
|
|
|
|
|
class DataEditorMixin:
|
|
@overload
|
|
def data_editor(
|
|
self,
|
|
data: EditableData,
|
|
*,
|
|
width: int | None = None,
|
|
height: int | None = None,
|
|
use_container_width: bool | None = None,
|
|
hide_index: bool | None = None,
|
|
column_order: Iterable[str] | None = None,
|
|
column_config: ColumnConfigMappingInput | None = None,
|
|
num_rows: Literal["fixed", "dynamic"] = "fixed",
|
|
disabled: bool | Iterable[str] = False,
|
|
key: Key | None = None,
|
|
on_change: WidgetCallback | None = None,
|
|
args: WidgetArgs | None = None,
|
|
kwargs: WidgetKwargs | None = None,
|
|
row_height: int | None = None,
|
|
) -> EditableData:
|
|
pass
|
|
|
|
@overload
|
|
def data_editor(
|
|
self,
|
|
data: Any,
|
|
*,
|
|
width: int | None = None,
|
|
height: int | None = None,
|
|
use_container_width: bool | None = None,
|
|
hide_index: bool | None = None,
|
|
column_order: Iterable[str] | None = None,
|
|
column_config: ColumnConfigMappingInput | None = None,
|
|
num_rows: Literal["fixed", "dynamic"] = "fixed",
|
|
disabled: bool | Iterable[str] = False,
|
|
key: Key | None = None,
|
|
on_change: WidgetCallback | None = None,
|
|
args: WidgetArgs | None = None,
|
|
kwargs: WidgetKwargs | None = None,
|
|
row_height: int | None = None,
|
|
) -> pd.DataFrame:
|
|
pass
|
|
|
|
@gather_metrics("data_editor")
|
|
def data_editor(
|
|
self,
|
|
data: DataTypes,
|
|
*,
|
|
width: int | None = None,
|
|
height: int | None = None,
|
|
use_container_width: bool | None = None,
|
|
hide_index: bool | None = None,
|
|
column_order: Iterable[str] | None = None,
|
|
column_config: ColumnConfigMappingInput | None = None,
|
|
num_rows: Literal["fixed", "dynamic"] = "fixed",
|
|
disabled: bool | Iterable[str] = False,
|
|
key: Key | None = None,
|
|
on_change: WidgetCallback | None = None,
|
|
args: WidgetArgs | None = None,
|
|
kwargs: WidgetKwargs | None = None,
|
|
row_height: int | None = None,
|
|
) -> DataTypes:
|
|
"""Display a data editor widget.
|
|
|
|
The data editor widget allows you to edit dataframes and many other data structures in a table-like UI.
|
|
|
|
Parameters
|
|
----------
|
|
data : Anything supported by st.dataframe
|
|
The data to edit in the data editor.
|
|
|
|
.. note::
|
|
- Styles from ``pandas.Styler`` will only be applied to non-editable columns.
|
|
- Text and number formatting from ``column_config`` always takes
|
|
precedence over text and number formatting from ``pandas.Styler``.
|
|
- Mixing data types within a column can make the column uneditable.
|
|
- Additionally, the following data types are not yet supported for editing:
|
|
``complex``, ``list``, ``tuple``, ``bytes``, ``bytearray``,
|
|
``memoryview``, ``dict``, ``set``, ``frozenset``,
|
|
``fractions.Fraction``, ``pandas.Interval``, and
|
|
``pandas.Period``.
|
|
- To prevent overflow in JavaScript, columns containing
|
|
``datetime.timedelta`` and ``pandas.Timedelta`` values will
|
|
default to uneditable, but this can be changed through column
|
|
configuration.
|
|
|
|
width : int or None
|
|
Desired width of the data editor expressed in pixels. If ``width``
|
|
is ``None`` (default), Streamlit sets the data editor width to fit
|
|
its contents up to the width of the parent container. If ``width``
|
|
is greater than the width of the parent container, Streamlit sets
|
|
the data editor width to match the width of the parent container.
|
|
|
|
height : int or None
|
|
Desired height of the data editor expressed in pixels. If ``height``
|
|
is ``None`` (default), Streamlit sets the height to show at most
|
|
ten rows. Vertical scrolling within the data editor element is
|
|
enabled when the height does not accommodate all rows.
|
|
|
|
use_container_width : bool
|
|
Whether to override ``width`` with the width of the parent
|
|
container. If this is ``True`` (default), Streamlit sets the width
|
|
of the data editor to match the width of the parent container. If
|
|
this is ``False``, Streamlit sets the data editor's width according
|
|
to ``width``.
|
|
|
|
hide_index : bool or None
|
|
Whether to hide the index column(s). If ``hide_index`` is ``None``
|
|
(default), the visibility of index columns is automatically
|
|
determined based on the data.
|
|
|
|
column_order : Iterable of str or None
|
|
Specifies the display order of columns. This also affects which columns are
|
|
visible. For example, ``column_order=("col2", "col1")`` will display 'col2'
|
|
first, followed by 'col1', and will hide all other non-index columns. If
|
|
None (default), the order is inherited from the original data structure.
|
|
|
|
column_config : dict or None
|
|
Configures how columns are displayed, e.g. their title, visibility, type, or
|
|
format, as well as editing properties such as min/max value or step.
|
|
This needs to be a dictionary where each key is a column name and the value
|
|
is one of:
|
|
|
|
- ``None`` to hide the column.
|
|
|
|
- A string to set the display label of the column.
|
|
|
|
- One of the column types defined under ``st.column_config``, e.g.
|
|
``st.column_config.NumberColumn("Dollar values", format="$ %d")`` to show
|
|
a column as dollar amounts. See more info on the available column types
|
|
and config options `here <https://docs.streamlit.io/develop/api-reference/data/st.column_config>`_.
|
|
|
|
To configure the index column(s), use ``_index`` as the column name.
|
|
|
|
num_rows : "fixed" or "dynamic"
|
|
Specifies if the user can add and delete rows in the data editor.
|
|
If "fixed", the user cannot add or delete rows. If "dynamic", the user can
|
|
add and delete rows in the data editor, but column sorting is disabled.
|
|
Defaults to "fixed".
|
|
|
|
disabled : bool or Iterable of str
|
|
Controls the editing of columns. If True, editing is disabled for all columns.
|
|
If an Iterable of column names is provided (e.g., ``disabled=("col1", "col2"))``,
|
|
only the specified columns will be disabled for editing. If False (default),
|
|
all columns that support editing are editable.
|
|
|
|
key : str
|
|
An optional string to use as the unique key for this widget. If this
|
|
is omitted, a key will be generated for the widget based on its
|
|
content. No two widgets may have the same key.
|
|
|
|
on_change : callable
|
|
An optional callback invoked when this data_editor's value changes.
|
|
|
|
args : tuple
|
|
An optional tuple of args to pass to the callback.
|
|
|
|
kwargs : dict
|
|
An optional dict of kwargs to pass to the callback.
|
|
|
|
row_height : int or None
|
|
The height of each row in the data editor in pixels. If ``row_height``
|
|
is ``None`` (default), Streamlit will use a default row height,
|
|
which fits one line of text.
|
|
|
|
Returns
|
|
-------
|
|
pandas.DataFrame, pandas.Series, pyarrow.Table, numpy.ndarray, list, set, tuple, or dict.
|
|
The edited data. The edited data is returned in its original data type if
|
|
it corresponds to any of the supported return types. All other data types
|
|
are returned as a ``pandas.DataFrame``.
|
|
|
|
Examples
|
|
--------
|
|
>>> import streamlit as st
|
|
>>> import pandas as pd
|
|
>>>
|
|
>>> df = pd.DataFrame(
|
|
>>> [
|
|
>>> {"command": "st.selectbox", "rating": 4, "is_widget": True},
|
|
>>> {"command": "st.balloons", "rating": 5, "is_widget": False},
|
|
>>> {"command": "st.time_input", "rating": 3, "is_widget": True},
|
|
>>> ]
|
|
>>> )
|
|
>>> edited_df = st.data_editor(df)
|
|
>>>
|
|
>>> favorite_command = edited_df.loc[edited_df["rating"].idxmax()]["command"]
|
|
>>> st.markdown(f"Your favorite command is **{favorite_command}** 🎈")
|
|
|
|
.. output::
|
|
https://doc-data-editor.streamlit.app/
|
|
height: 350px
|
|
|
|
You can also allow the user to add and delete rows by setting ``num_rows`` to "dynamic":
|
|
|
|
>>> import streamlit as st
|
|
>>> import pandas as pd
|
|
>>>
|
|
>>> df = pd.DataFrame(
|
|
>>> [
|
|
>>> {"command": "st.selectbox", "rating": 4, "is_widget": True},
|
|
>>> {"command": "st.balloons", "rating": 5, "is_widget": False},
|
|
>>> {"command": "st.time_input", "rating": 3, "is_widget": True},
|
|
>>> ]
|
|
>>> )
|
|
>>> edited_df = st.data_editor(df, num_rows="dynamic")
|
|
>>>
|
|
>>> favorite_command = edited_df.loc[edited_df["rating"].idxmax()]["command"]
|
|
>>> st.markdown(f"Your favorite command is **{favorite_command}** 🎈")
|
|
|
|
.. output::
|
|
https://doc-data-editor1.streamlit.app/
|
|
height: 450px
|
|
|
|
Or you can customize the data editor via ``column_config``, ``hide_index``,
|
|
``column_order``, or ``disabled``:
|
|
|
|
>>> import pandas as pd
|
|
>>> import streamlit as st
|
|
>>>
|
|
>>> df = pd.DataFrame(
|
|
>>> [
|
|
>>> {"command": "st.selectbox", "rating": 4, "is_widget": True},
|
|
>>> {"command": "st.balloons", "rating": 5, "is_widget": False},
|
|
>>> {"command": "st.time_input", "rating": 3, "is_widget": True},
|
|
>>> ]
|
|
>>> )
|
|
>>> edited_df = st.data_editor(
|
|
>>> df,
|
|
>>> column_config={
|
|
>>> "command": "Streamlit Command",
|
|
>>> "rating": st.column_config.NumberColumn(
|
|
>>> "Your rating",
|
|
>>> help="How much do you like this command (1-5)?",
|
|
>>> min_value=1,
|
|
>>> max_value=5,
|
|
>>> step=1,
|
|
>>> format="%d ⭐",
|
|
>>> ),
|
|
>>> "is_widget": "Widget ?",
|
|
>>> },
|
|
>>> disabled=["command", "is_widget"],
|
|
>>> hide_index=True,
|
|
>>> )
|
|
>>>
|
|
>>> favorite_command = edited_df.loc[edited_df["rating"].idxmax()]["command"]
|
|
>>> st.markdown(f"Your favorite command is **{favorite_command}** 🎈")
|
|
|
|
|
|
.. output::
|
|
https://doc-data-editor-config.streamlit.app/
|
|
height: 350px
|
|
|
|
"""
|
|
# Lazy-loaded import
|
|
import pandas as pd
|
|
import pyarrow as pa
|
|
|
|
key = to_key(key)
|
|
|
|
check_widget_policies(
|
|
self.dg,
|
|
key,
|
|
on_change,
|
|
default_value=None,
|
|
writes_allowed=False,
|
|
)
|
|
|
|
if column_order is not None:
|
|
column_order = list(column_order)
|
|
|
|
column_config_mapping: ColumnConfigMapping = {}
|
|
|
|
data_format = dataframe_util.determine_data_format(data)
|
|
if data_format == dataframe_util.DataFormat.UNKNOWN:
|
|
raise StreamlitAPIException(
|
|
f"The data type ({type(data).__name__}) or format is not supported by "
|
|
"the data editor. Please convert your data into a Pandas Dataframe or "
|
|
"another supported data format."
|
|
)
|
|
|
|
# The dataframe should always be a copy of the original data
|
|
# since we will apply edits directly to it.
|
|
data_df = dataframe_util.convert_anything_to_pandas_df(data, ensure_copy=True)
|
|
|
|
# Check if the index is supported.
|
|
if not _is_supported_index(data_df.index):
|
|
raise StreamlitAPIException(
|
|
f"The type of the dataframe index - {type(data_df.index).__name__} - is not "
|
|
"yet supported by the data editor."
|
|
)
|
|
|
|
# Check if the column names are valid and unique.
|
|
_check_column_names(data_df)
|
|
|
|
# Convert the user provided column config into the frontend compatible format:
|
|
column_config_mapping = process_config_mapping(column_config)
|
|
|
|
# Deactivate editing for columns that are not compatible with arrow
|
|
for column_name, column_data in data_df.items():
|
|
if dataframe_util.is_colum_type_arrow_incompatible(column_data):
|
|
update_column_config(
|
|
column_config_mapping, column_name, {"disabled": True}
|
|
)
|
|
# Convert incompatible type to string
|
|
data_df[column_name] = column_data.astype("string")
|
|
|
|
apply_data_specific_configs(column_config_mapping, data_format)
|
|
|
|
# Fix the column headers to work correctly for data editing:
|
|
_fix_column_headers(data_df)
|
|
|
|
has_range_index = isinstance(data_df.index, pd.RangeIndex)
|
|
|
|
if not has_range_index:
|
|
# If the index is not a range index, we will configure it as required
|
|
# since the user is required to provide a (unique) value for editing.
|
|
update_column_config(
|
|
column_config_mapping, INDEX_IDENTIFIER, {"required": True}
|
|
)
|
|
|
|
if hide_index is None and has_range_index and num_rows == "dynamic":
|
|
# Temporary workaround:
|
|
# We hide range indices if num_rows is dynamic.
|
|
# since the current way of handling this index during editing is a
|
|
# bit confusing. The user can still decide to show the index by
|
|
# setting hide_index explicitly to False.
|
|
hide_index = True
|
|
|
|
if hide_index is not None:
|
|
update_column_config(
|
|
column_config_mapping, INDEX_IDENTIFIER, {"hidden": hide_index}
|
|
)
|
|
|
|
# If disabled not a boolean, we assume it is a list of columns to disable.
|
|
# This gets translated into the columns configuration:
|
|
if not isinstance(disabled, bool):
|
|
for column in disabled:
|
|
update_column_config(column_config_mapping, column, {"disabled": True})
|
|
|
|
# Convert the dataframe to an arrow table which is used as the main
|
|
# serialization format for sending the data to the frontend.
|
|
# We also utilize the arrow schema to determine the data kinds of every column.
|
|
arrow_table = pa.Table.from_pandas(data_df)
|
|
|
|
# Determine the dataframe schema which is required for parsing edited values
|
|
# and for checking type compatibilities.
|
|
dataframe_schema = determine_dataframe_schema(data_df, arrow_table.schema)
|
|
|
|
# Check if all configured column types are compatible with the underlying data.
|
|
# Throws an exception if any of the configured types are incompatible.
|
|
_check_type_compatibilities(data_df, column_config_mapping, dataframe_schema)
|
|
|
|
arrow_bytes = dataframe_util.convert_arrow_table_to_arrow_bytes(arrow_table)
|
|
|
|
# We want to do this as early as possible to avoid introducing nondeterminism,
|
|
# but it isn't clear how much processing is needed to have the data in a
|
|
# format that will hash consistently, so we do it late here to have it
|
|
# as close as possible to how it used to be.
|
|
ctx = get_script_run_ctx()
|
|
element_id = compute_and_register_element_id(
|
|
"data_editor",
|
|
user_key=key,
|
|
form_id=current_form_id(self.dg),
|
|
dg=self.dg,
|
|
data=arrow_bytes,
|
|
width=width,
|
|
height=height,
|
|
use_container_width=use_container_width,
|
|
column_order=column_order,
|
|
column_config_mapping=str(column_config_mapping),
|
|
num_rows=num_rows,
|
|
row_height=row_height,
|
|
)
|
|
|
|
proto = ArrowProto()
|
|
proto.id = element_id
|
|
|
|
if use_container_width is None:
|
|
# If use_container_width was not explicitly set by the user, we set
|
|
# it to True if width was not set explicitly, and False otherwise.
|
|
use_container_width = width is None
|
|
|
|
proto.use_container_width = use_container_width
|
|
|
|
if width:
|
|
proto.width = width
|
|
if height:
|
|
proto.height = height
|
|
|
|
if row_height:
|
|
proto.row_height = row_height
|
|
|
|
if column_order:
|
|
proto.column_order[:] = column_order
|
|
|
|
# Only set disabled to true if it is actually true
|
|
# It can also be a list of columns, which should result in false here.
|
|
proto.disabled = disabled is True
|
|
|
|
proto.editing_mode = (
|
|
ArrowProto.EditingMode.DYNAMIC
|
|
if num_rows == "dynamic"
|
|
else ArrowProto.EditingMode.FIXED
|
|
)
|
|
|
|
proto.form_id = current_form_id(self.dg)
|
|
|
|
if dataframe_util.is_pandas_styler(data):
|
|
# Pandas styler will only work for non-editable/disabled columns.
|
|
# Get first 10 chars of md5 hash of the key or delta path as styler uuid
|
|
# and set it as styler uuid.
|
|
# We are only using the first 10 chars to keep the uuid short since
|
|
# it will be used for all the cells in the dataframe. Therefore, this
|
|
# might have a significant impact on the message size. 10 chars
|
|
# should be good enough to avoid potential collisions in this case.
|
|
# Even on collisions, there should not be a big issue with the
|
|
# rendering in the data editor.
|
|
styler_uuid = calc_md5(key or self.dg._get_delta_path_str())[:10]
|
|
data.set_uuid(styler_uuid)
|
|
marshall_styler(proto, data, styler_uuid)
|
|
|
|
proto.data = arrow_bytes
|
|
|
|
marshall_column_config(proto, column_config_mapping)
|
|
|
|
serde = DataEditorSerde()
|
|
|
|
widget_state = register_widget(
|
|
proto.id,
|
|
on_change_handler=on_change,
|
|
args=args,
|
|
kwargs=kwargs,
|
|
deserializer=serde.deserialize,
|
|
serializer=serde.serialize,
|
|
ctx=ctx,
|
|
value_type="string_value",
|
|
)
|
|
|
|
_apply_dataframe_edits(data_df, widget_state.value, dataframe_schema)
|
|
self.dg._enqueue("arrow_data_frame", proto)
|
|
return dataframe_util.convert_pandas_df_to_data_format(data_df, data_format)
|
|
|
|
@property
|
|
def dg(self) -> DeltaGenerator:
|
|
"""Get our DeltaGenerator."""
|
|
return cast("DeltaGenerator", self)
|