# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import annotations import re import textwrap from typing import TYPE_CHECKING, Final from streamlit.errors import StreamlitAPIException if TYPE_CHECKING: from streamlit.type_util import SupportsStr _ALPHANUMERIC_CHAR_REGEX: Final = re.compile(r"^[a-zA-Z0-9_&\-\. ]+$") def clean_text(text: SupportsStr) -> str: """Convert an object to text, dedent it, and strip whitespace.""" return textwrap.dedent(str(text)).strip() def _contains_special_chars(text: str) -> bool: """Check if a string contains any special chars. Special chars in that case are all chars that are not alphanumeric, underscore, hyphen or whitespace. """ return re.match(_ALPHANUMERIC_CHAR_REGEX, text) is None if text else False def is_emoji(text: str) -> bool: """Check if input string is a valid emoji.""" if not _contains_special_chars(text): return False from streamlit.emojis import ALL_EMOJIS return text.replace("\U0000fe0f", "") in ALL_EMOJIS def is_material_icon(maybe_icon: str) -> bool: """Check if input string is a valid Material icon.""" from streamlit.material_icon_names import ALL_MATERIAL_ICONS return maybe_icon in ALL_MATERIAL_ICONS def validate_icon_or_emoji(icon: str | None) -> str: """Validate an icon or emoji and return it in normalized format if valid.""" if icon is not None and icon.startswith(":material"): return validate_material_icon(icon) return validate_emoji(icon) def validate_emoji(maybe_emoji: str | None) -> str: if maybe_emoji is None: return "" if is_emoji(maybe_emoji): return maybe_emoji raise StreamlitAPIException( f'The value "{maybe_emoji}" is not a valid emoji. Shortcodes are not allowed, ' "please use a single character instead." ) def validate_material_icon(maybe_material_icon: str | None) -> str: """Validate a Material icon shortcode and return the icon in normalized format if valid. """ supported_icon_packs = [ "material", ] if maybe_material_icon is None: return "" icon_regex = r"^\s*:(.+)\/(.+):\s*$" icon_match = re.match(icon_regex, maybe_material_icon) # Since our markdown processing needs to change the `/` to `_` in order to # correctly render the icon, we need to add a zero-width space before the # `/` to avoid this transformation here. invisible_white_space = "\u200b" if not icon_match: raise StreamlitAPIException( f'The value `"{maybe_material_icon.replace("/", invisible_white_space + "/")}"` is ' "not a valid Material icon. Please use a Material icon shortcode like " f"**`:material{invisible_white_space}/thumb_up:`**" ) pack_name, icon_name = icon_match.groups() if ( pack_name not in supported_icon_packs or not icon_name or not is_material_icon(icon_name) ): raise StreamlitAPIException( f'The value `"{maybe_material_icon.replace("/", invisible_white_space + "/")}"` is not a ' "valid Material icon. Please use a Material icon shortcode like " f"**`:material{invisible_white_space}/thumb_up:`**." ) return f":{pack_name}/{icon_name}:" def extract_leading_emoji(text: str) -> tuple[str, str]: """Return a tuple containing the first emoji found in the given string and the rest of the string (minus an optional separator between the two). """ if not _contains_special_chars(text): # If the string only contains basic alphanumerical chars and/or # underscores, hyphen & whitespaces, then it's guaranteed that there # is no emoji in the string. return "", text from streamlit.emojis import EMOJI_EXTRACTION_REGEX re_match = re.search(EMOJI_EXTRACTION_REGEX, text) if re_match is None: return "", text return re_match.group(1), re_match.group(2) def max_char_sequence(string: str, char: str) -> int: """Returns the count of the max sequence of a given char in a string.""" max_sequence = 0 current_sequence = 0 for c in string: if c == char: current_sequence += 1 max_sequence = max(max_sequence, current_sequence) else: current_sequence = 0 return max_sequence TEXTCHARS: Final = bytearray( {7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F} ) def is_binary_string(inp: bytes) -> bool: """Guess if an input bytesarray can be encoded as a string.""" # From https://stackoverflow.com/a/7392391 return bool(inp.translate(None, TEXTCHARS)) def simplify_number(num: int) -> str: """Simplifies number into Human readable format, returns str.""" num_converted = float(f"{num:.2g}") magnitude = 0 while abs(num_converted) >= 1000: magnitude += 1 num_converted /= 1000.0 return "{}{}".format( f"{num_converted:f}".rstrip("0").rstrip("."), ["", "k", "m", "b", "t"][magnitude], ) _OBJ_MEM_ADDRESS: Final = re.compile( r"^\<[a-zA-Z_]+[a-zA-Z0-9<>._ ]* at 0x[0-9a-f]+\>$" ) def is_mem_address_str(string: str) -> bool: """Returns True if the string looks like .""" return bool(_OBJ_MEM_ADDRESS.match(string)) def to_snake_case(camel_case_str: str) -> str: """Converts UpperCamelCase and lowerCamelCase to snake_case. Examples -------- fooBar -> foo_bar BazBang -> baz_bang """ s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", camel_case_str) return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()