# Copyright 2018-2022 Streamlit Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from collections.abc import Iterable from typing import Any, Dict, List, Mapping, Optional, Union, cast, TYPE_CHECKING from typing import TypeVar from numpy import ndarray from pandas import DataFrame from pandas.io.formats.style import Styler import pyarrow as pa from streamlit import type_util from streamlit.proto.Arrow_pb2 import Arrow as ArrowProto if TYPE_CHECKING: from streamlit.delta_generator import DeltaGenerator Data = Union[DataFrame, Styler, pa.Table, ndarray, Iterable, Dict[str, List[Any]], None] class ArrowMixin: def _arrow_dataframe( self, data: Data = None, width: Optional[int] = None, height: Optional[int] = None, ) -> "DeltaGenerator": """Display a dataframe as an interactive table. Parameters ---------- data : pandas.DataFrame, pandas.Styler, pyarrow.Table, numpy.ndarray, Iterable, dict, or None The data to display. If 'data' is a pandas.Styler, it will be used to style its underlying DataFrame. width : int or None Desired width of the UI element expressed in pixels. If None, a default width based on the page width is used. height : int or None Desired height of the UI element expressed in pixels. If None, a default height is used. Examples -------- >>> df = pd.DataFrame( ... np.random.randn(50, 20), ... columns=('col %d' % i for i in range(20))) ... >>> st._arrow_dataframe(df) >>> st._arrow_dataframe(df, 200, 100) You can also pass a Pandas Styler object to change the style of the rendered DataFrame: >>> df = pd.DataFrame( ... np.random.randn(10, 20), ... columns=('col %d' % i for i in range(20))) ... >>> st._arrow_dataframe(df.style.highlight_max(axis=0)) """ # If pandas.Styler uuid is not provided, a hash of the position # of the element will be used. This will cause a rerender of the table # when the position of the element is changed. delta_path = self.dg._get_delta_path_str() default_uuid = str(hash(delta_path)) proto = ArrowProto() marshall(proto, data, default_uuid) return self.dg._enqueue( delta_type="arrow_data_frame", element_proto=proto, element_width=width, element_height=height, ) def _arrow_table(self, data: Data = None) -> "DeltaGenerator": """Display a static table. This differs from `st._arrow_dataframe` in that the table in this case is static: its entire contents are laid out directly on the page. Parameters ---------- data : pandas.DataFrame, pandas.Styler, pyarrow.Table, numpy.ndarray, Iterable, dict, or None The table data. Example ------- >>> df = pd.DataFrame( ... np.random.randn(10, 5), ... columns=("col %d" % i for i in range(5))) ... >>> st._arrow_table(df) """ # If pandas.Styler uuid is not provided, a hash of the position # of the element will be used. This will cause a rerender of the table # when the position of the element is changed. delta_path = self.dg._get_delta_path_str() default_uuid = str(hash(delta_path)) proto = ArrowProto() marshall(proto, data, default_uuid) return self.dg._enqueue("arrow_table", proto) @property def dg(self) -> "DeltaGenerator": """Get our DeltaGenerator.""" return cast("DeltaGenerator", self) def marshall(proto: ArrowProto, data: Data, default_uuid: Optional[str] = None) -> None: """Marshall pandas.DataFrame into an Arrow proto. Parameters ---------- proto : proto.Arrow Output. The protobuf for Streamlit Arrow proto. data : pandas.DataFrame, pandas.Styler, pyarrow.Table, numpy.ndarray, Iterable, dict, or None Something that is or can be converted to a dataframe. default_uuid : Optional[str] If pandas.Styler UUID is not provided, this value will be used. This attribute is optional and only used for pandas.Styler, other elements (e.g. charts) can ignore it. """ if type_util.is_pandas_styler(data): # default_uuid is a string only if the data is a `Styler`, # and `None` otherwise. assert isinstance( default_uuid, str ), "Default UUID must be a string for Styler data." _marshall_styler(proto, data, default_uuid) if isinstance(data, pa.Table): proto.data = type_util.pyarrow_table_to_bytes(data) else: df = type_util.convert_anything_to_df(data) proto.data = type_util.data_frame_to_bytes(df) def _marshall_styler(proto: ArrowProto, styler: Styler, default_uuid: str) -> None: """Marshall pandas.Styler into an Arrow proto. Parameters ---------- proto : proto.Arrow Output. The protobuf for Streamlit Arrow proto. styler : pandas.Styler Helps style a DataFrame or Series according to the data with HTML and CSS. default_uuid : str If pandas.Styler uuid is not provided, this value will be used. """ # pandas.Styler uuid should be set before _compute is called. _marshall_uuid(proto, styler, default_uuid) # We're using protected members of pandas.Styler to get styles, # which is not ideal and could break if the interface changes. styler._compute() # In Pandas 1.3.0, styler._translate() signature was changed. # 2 arguments were added: sparse_index and sparse_columns. # The functionality that they provide is not yet supported. if type_util.is_pandas_version_less_than("1.3.0"): pandas_styles = styler._translate() else: pandas_styles = styler._translate(False, False) _marshall_caption(proto, styler) _marshall_styles(proto, styler, pandas_styles) _marshall_display_values(proto, styler.data, pandas_styles) def _marshall_uuid(proto: ArrowProto, styler: Styler, default_uuid: str) -> None: """Marshall pandas.Styler uuid into an Arrow proto. Parameters ---------- proto : proto.Arrow Output. The protobuf for Streamlit Arrow proto. styler : pandas.Styler Helps style a DataFrame or Series according to the data with HTML and CSS. default_uuid : str If pandas.Styler uuid is not provided, this value will be used. """ if styler.uuid is None: styler.set_uuid(default_uuid) proto.styler.uuid = str(styler.uuid) def _marshall_caption(proto: ArrowProto, styler: Styler) -> None: """Marshall pandas.Styler caption into an Arrow proto. Parameters ---------- proto : proto.Arrow Output. The protobuf for Streamlit Arrow proto. styler : pandas.Styler Helps style a DataFrame or Series according to the data with HTML and CSS. """ if styler.caption is not None: proto.styler.caption = styler.caption def _marshall_styles( proto: ArrowProto, styler: Styler, styles: Mapping[str, Any] ) -> None: """Marshall pandas.Styler styles into an Arrow proto. Parameters ---------- proto : proto.Arrow Output. The protobuf for Streamlit Arrow proto. styler : pandas.Styler Helps style a DataFrame or Series according to the data with HTML and CSS. styles : dict pandas.Styler translated styles. """ css_rules = [] if "table_styles" in styles: table_styles = styles["table_styles"] table_styles = _trim_pandas_styles(table_styles) for style in table_styles: # styles in "table_styles" have a space # between the uuid and selector. rule = _pandas_style_to_css( "table_styles", style, styler.uuid, separator=" " ) css_rules.append(rule) if "cellstyle" in styles: cellstyle = styles["cellstyle"] cellstyle = _trim_pandas_styles(cellstyle) for style in cellstyle: rule = _pandas_style_to_css("cell_style", style, styler.uuid) css_rules.append(rule) if len(css_rules) > 0: proto.styler.styles = "\n".join(css_rules) M = TypeVar("M", bound=Mapping[str, Any]) def _trim_pandas_styles(styles: List[M]) -> List[M]: """Filter out empty styles. Every cell will have a class, but the list of props may just be [['', '']]. Parameters ---------- styles : list pandas.Styler translated styles. """ return [x for x in styles if any(any(y) for y in x["props"])] def _pandas_style_to_css( style_type: str, style: Mapping[str, Any], uuid: str, separator: str = "", ) -> str: """Convert pandas.Styler translated style to CSS. Parameters ---------- style_type : str Either "table_styles" or "cell_style". style : dict pandas.Styler translated style. uuid : str pandas.Styler uuid. separator : str A string separator used between table and cell selectors. """ declarations = [] for css_property, css_value in style["props"]: declaration = css_property.strip() + ": " + css_value.strip() declarations.append(declaration) table_selector = f"#T_{uuid}" # In pandas < 1.1.0 # translated_style["cellstyle"] has the following shape: # [ # { # "props": [["color", " black"], ["background-color", "orange"], ["", ""]], # "selector": "row0_col0" # } # ... # ] # # In pandas >= 1.1.0 # translated_style["cellstyle"] has the following shape: # [ # { # "props": [("color", " black"), ("background-color", "orange"), ("", "")], # "selectors": ["row0_col0"] # } # ... # ] if style_type == "table_styles" or ( style_type == "cell_style" and type_util.is_pandas_version_less_than("1.1.0") ): cell_selectors = [style["selector"]] else: cell_selectors = style["selectors"] selectors = [] for cell_selector in cell_selectors: selectors.append(table_selector + separator + cell_selector) selector = ", ".join(selectors) declaration_block = "; ".join(declarations) rule_set = selector + " { " + declaration_block + " }" return rule_set def _marshall_display_values( proto: ArrowProto, df: DataFrame, styles: Mapping[str, Any] ) -> None: """Marshall pandas.Styler display values into an Arrow proto. Parameters ---------- proto : proto.Arrow Output. The protobuf for Streamlit Arrow proto. df : pandas.DataFrame A dataframe with original values. styles : dict pandas.Styler translated styles. """ new_df = _use_display_values(df, styles) proto.styler.display_values = type_util.data_frame_to_bytes(new_df) def _use_display_values(df: DataFrame, styles: Mapping[str, Any]) -> DataFrame: """Create a new pandas.DataFrame where display values are used instead of original ones. Parameters ---------- df : pandas.DataFrame A dataframe with original values. styles : dict pandas.Styler translated styles. """ import re # If values in a column are not of the same type, Arrow # serialization would fail. Thus, we need to cast all values # of the dataframe to strings before assigning them display values. new_df = df.astype(str) cell_selector_regex = re.compile(r"row(\d+)_col(\d+)") if "body" in styles: rows = styles["body"] for row in rows: for cell in row: match = cell_selector_regex.match(cell["id"]) if match: r, c = map(int, match.groups()) new_df.iat[r, c] = str(cell["display_value"]) return new_df