200 lines
6.1 KiB
Python
200 lines
6.1 KiB
Python
![]() |
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
import re
|
||
|
import textwrap
|
||
|
from typing import TYPE_CHECKING, Final
|
||
|
|
||
|
from streamlit.errors import StreamlitAPIException
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from streamlit.type_util import SupportsStr
|
||
|
|
||
|
_ALPHANUMERIC_CHAR_REGEX: Final = re.compile(r"^[a-zA-Z0-9_&\-\. ]+$")
|
||
|
|
||
|
|
||
|
def clean_text(text: SupportsStr) -> str:
|
||
|
"""Convert an object to text, dedent it, and strip whitespace."""
|
||
|
return textwrap.dedent(str(text)).strip()
|
||
|
|
||
|
|
||
|
def _contains_special_chars(text: str) -> bool:
|
||
|
"""Check if a string contains any special chars.
|
||
|
|
||
|
Special chars in that case are all chars that are not
|
||
|
alphanumeric, underscore, hyphen or whitespace.
|
||
|
"""
|
||
|
return re.match(_ALPHANUMERIC_CHAR_REGEX, text) is None if text else False
|
||
|
|
||
|
|
||
|
def is_emoji(text: str) -> bool:
|
||
|
"""Check if input string is a valid emoji."""
|
||
|
if not _contains_special_chars(text):
|
||
|
return False
|
||
|
|
||
|
from streamlit.emojis import ALL_EMOJIS
|
||
|
|
||
|
return text.replace("\U0000fe0f", "") in ALL_EMOJIS
|
||
|
|
||
|
|
||
|
def is_material_icon(maybe_icon: str) -> bool:
|
||
|
"""Check if input string is a valid Material icon."""
|
||
|
from streamlit.material_icon_names import ALL_MATERIAL_ICONS
|
||
|
|
||
|
return maybe_icon in ALL_MATERIAL_ICONS
|
||
|
|
||
|
|
||
|
def validate_icon_or_emoji(icon: str | None) -> str:
|
||
|
"""Validate an icon or emoji and return it in normalized format if valid."""
|
||
|
if icon is not None and icon.startswith(":material"):
|
||
|
return validate_material_icon(icon)
|
||
|
return validate_emoji(icon)
|
||
|
|
||
|
|
||
|
def validate_emoji(maybe_emoji: str | None) -> str:
|
||
|
if maybe_emoji is None:
|
||
|
return ""
|
||
|
|
||
|
if is_emoji(maybe_emoji):
|
||
|
return maybe_emoji
|
||
|
raise StreamlitAPIException(
|
||
|
f'The value "{maybe_emoji}" is not a valid emoji. Shortcodes are not allowed, '
|
||
|
"please use a single character instead."
|
||
|
)
|
||
|
|
||
|
|
||
|
def validate_material_icon(maybe_material_icon: str | None) -> str:
|
||
|
"""Validate a Material icon shortcode and return the icon in
|
||
|
normalized format if valid.
|
||
|
"""
|
||
|
|
||
|
supported_icon_packs = [
|
||
|
"material",
|
||
|
]
|
||
|
|
||
|
if maybe_material_icon is None:
|
||
|
return ""
|
||
|
|
||
|
icon_regex = r"^\s*:(.+)\/(.+):\s*$"
|
||
|
icon_match = re.match(icon_regex, maybe_material_icon)
|
||
|
# Since our markdown processing needs to change the `/` to `_` in order to
|
||
|
# correctly render the icon, we need to add a zero-width space before the
|
||
|
# `/` to avoid this transformation here.
|
||
|
invisible_white_space = "\u200b"
|
||
|
|
||
|
if not icon_match:
|
||
|
raise StreamlitAPIException(
|
||
|
f'The value `"{maybe_material_icon.replace("/", invisible_white_space + "/")}"` is '
|
||
|
"not a valid Material icon. Please use a Material icon shortcode like "
|
||
|
f"**`:material{invisible_white_space}/thumb_up:`**"
|
||
|
)
|
||
|
|
||
|
pack_name, icon_name = icon_match.groups()
|
||
|
|
||
|
if (
|
||
|
pack_name not in supported_icon_packs
|
||
|
or not icon_name
|
||
|
or not is_material_icon(icon_name)
|
||
|
):
|
||
|
raise StreamlitAPIException(
|
||
|
f'The value `"{maybe_material_icon.replace("/", invisible_white_space + "/")}"` is not a '
|
||
|
"valid Material icon. Please use a Material icon shortcode like "
|
||
|
f"**`:material{invisible_white_space}/thumb_up:`**."
|
||
|
)
|
||
|
|
||
|
return f":{pack_name}/{icon_name}:"
|
||
|
|
||
|
|
||
|
def extract_leading_emoji(text: str) -> tuple[str, str]:
|
||
|
"""Return a tuple containing the first emoji found in the given string and
|
||
|
the rest of the string (minus an optional separator between the two).
|
||
|
"""
|
||
|
|
||
|
if not _contains_special_chars(text):
|
||
|
# If the string only contains basic alphanumerical chars and/or
|
||
|
# underscores, hyphen & whitespaces, then it's guaranteed that there
|
||
|
# is no emoji in the string.
|
||
|
return "", text
|
||
|
|
||
|
from streamlit.emojis import EMOJI_EXTRACTION_REGEX
|
||
|
|
||
|
re_match = re.search(EMOJI_EXTRACTION_REGEX, text)
|
||
|
if re_match is None:
|
||
|
return "", text
|
||
|
|
||
|
return re_match.group(1), re_match.group(2)
|
||
|
|
||
|
|
||
|
def max_char_sequence(string: str, char: str) -> int:
|
||
|
"""Returns the count of the max sequence of a given char in a string."""
|
||
|
max_sequence = 0
|
||
|
current_sequence = 0
|
||
|
for c in string:
|
||
|
if c == char:
|
||
|
current_sequence += 1
|
||
|
max_sequence = max(max_sequence, current_sequence)
|
||
|
else:
|
||
|
current_sequence = 0
|
||
|
|
||
|
return max_sequence
|
||
|
|
||
|
|
||
|
TEXTCHARS: Final = bytearray(
|
||
|
{7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F}
|
||
|
)
|
||
|
|
||
|
|
||
|
def is_binary_string(inp: bytes) -> bool:
|
||
|
"""Guess if an input bytesarray can be encoded as a string."""
|
||
|
# From https://stackoverflow.com/a/7392391
|
||
|
return bool(inp.translate(None, TEXTCHARS))
|
||
|
|
||
|
|
||
|
def simplify_number(num: int) -> str:
|
||
|
"""Simplifies number into Human readable format, returns str."""
|
||
|
num_converted = float(f"{num:.2g}")
|
||
|
magnitude = 0
|
||
|
while abs(num_converted) >= 1000:
|
||
|
magnitude += 1
|
||
|
num_converted /= 1000.0
|
||
|
return "{}{}".format(
|
||
|
f"{num_converted:f}".rstrip("0").rstrip("."),
|
||
|
["", "k", "m", "b", "t"][magnitude],
|
||
|
)
|
||
|
|
||
|
|
||
|
_OBJ_MEM_ADDRESS: Final = re.compile(
|
||
|
r"^\<[a-zA-Z_]+[a-zA-Z0-9<>._ ]* at 0x[0-9a-f]+\>$"
|
||
|
)
|
||
|
|
||
|
|
||
|
def is_mem_address_str(string: str) -> bool:
|
||
|
"""Returns True if the string looks like <foo blarg at 0x15ee6f9a0>."""
|
||
|
return bool(_OBJ_MEM_ADDRESS.match(string))
|
||
|
|
||
|
|
||
|
def to_snake_case(camel_case_str: str) -> str:
|
||
|
"""Converts UpperCamelCase and lowerCamelCase to snake_case.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
fooBar -> foo_bar
|
||
|
BazBang -> baz_bang
|
||
|
|
||
|
"""
|
||
|
s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", camel_case_str)
|
||
|
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
|