team-10/venv/Lib/site-packages/sympy/parsing/latex/__init__.py
2025-08-02 02:00:33 +02:00

204 lines
8.8 KiB
Python

from sympy.external import import_module
from sympy.utilities.decorator import doctest_depends_on
from re import compile as rcompile
from sympy.parsing.latex.lark import LarkLaTeXParser, TransformToSymPyExpr, parse_latex_lark # noqa
from .errors import LaTeXParsingError # noqa
IGNORE_L = r"\s*[{]*\s*"
IGNORE_R = r"\s*[}]*\s*"
NO_LEFT = r"(?<!\\left)"
BEGIN_AMS_MAT = r"\\begin{matrix}"
END_AMS_MAT = r"\\end{matrix}"
BEGIN_ARR = r"\\begin{array}{.*?}"
END_ARR = r"\\end{array}"
# begin_delim_regex: end_delim_regex
MATRIX_DELIMS = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\)",
fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\)",
fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\]",
fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\]",
fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\|",
fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\|",
r"\\begin{pmatrix}": r"\\end{pmatrix}",
r"\\begin{bmatrix}": r"\\end{bmatrix}",
r"\\begin{vmatrix}": r"\\end{vmatrix}",
fr"\\left\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\)",
fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\)",
fr"\\left\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\]",
fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\]",
fr"\\left\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\|",
fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\|"
}
MATRIX_DELIMS_INV = {v: k for k, v in MATRIX_DELIMS.items()}
# begin_delim_regex: ideal_begin_delim_representative
BEGIN_DELIM_REPR = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": "\\left(\\begin{matrix}",
fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": "(\\begin{matrix}",
fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": "\\left[\\begin{matrix}",
fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": "[\\begin{matrix}",
fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": "\\left|\\begin{matrix}",
fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": "|\\begin{matrix}",
r"\\begin{pmatrix}": "\\begin{pmatrix}",
r"\\begin{bmatrix}": "\\begin{bmatrix}",
r"\\begin{vmatrix}": "\\begin{vmatrix}",
fr"\\left\({IGNORE_L}{BEGIN_ARR}": "\\left(\\begin{array}{COLUMN_SPECIFIERS}",
fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": "(\\begin{array}{COLUMN_SPECIFIERS}",
fr"\\left\[{IGNORE_L}{BEGIN_ARR}": "\\left[\\begin{array}{COLUMN_SPECIFIERS}",
fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": "[\\begin{array}{COLUMN_SPECIFIERS}",
fr"\\left\|{IGNORE_L}{BEGIN_ARR}": "\\left|\\begin{array}{COLUMN_SPECIFIERS}",
fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": "|\\begin{array}{COLUMN_SPECIFIERS}"
}
# end_delim_regex: ideal_end_delim_representative
END_DELIM_REPR = {fr"{END_AMS_MAT}{IGNORE_R}\\right\)": "\\end{matrix}\\right)",
fr"{END_AMS_MAT}{IGNORE_R}\)": "\\end{matrix})",
fr"{END_AMS_MAT}{IGNORE_R}\\right\]": "\\end{matrix}\\right]",
fr"{END_AMS_MAT}{IGNORE_R}\]": "\\end{matrix}]",
fr"{END_AMS_MAT}{IGNORE_R}\\right\|": "\\end{matrix}\\right|",
fr"{END_AMS_MAT}{IGNORE_R}\|": "\\end{matrix}|",
r"\\end{pmatrix}": "\\end{pmatrix}",
r"\\end{bmatrix}": "\\end{bmatrix}",
r"\\end{vmatrix}": "\\end{vmatrix}",
fr"{END_ARR}{IGNORE_R}\\right\)": "\\end{array}\\right)",
fr"{END_ARR}{IGNORE_R}\)": "\\end{array})",
fr"{END_ARR}{IGNORE_R}\\right\]": "\\end{array}\\right]",
fr"{END_ARR}{IGNORE_R}\]": "\\end{array}]",
fr"{END_ARR}{IGNORE_R}\\right\|": "\\end{array}\\right|",
fr"{END_ARR}{IGNORE_R}\|": "\\end{array}|"
}
def check_matrix_delimiters(latex_str):
"""Report mismatched, excess, or missing matrix delimiters."""
spans = []
for begin_delim in MATRIX_DELIMS:
end_delim = MATRIX_DELIMS[begin_delim]
p = rcompile(begin_delim)
q = rcompile(end_delim)
spans.extend([(*m.span(), m.group(),
begin_delim) for m in p.finditer(latex_str)])
spans.extend([(*m.span(), m.group(),
end_delim) for m in q.finditer(latex_str)])
spans.sort(key=(lambda x: x[0]))
if len(spans) % 2 == 1:
# Odd number of delimiters; therefore something
# is wrong. We do not complain yet; let's see if
# we can pinpoint the actual error.
spans.append((None, None, None, None))
spans = [(*x, *y) for (x, y) in zip(spans[::2], spans[1::2])]
for x in spans:
# x is supposed to be an 8-tuple of the following form:
#
# (begin_delim_span_start, begin_delim_span_end,
# begin_delim_match, begin_delim_regex,
# end_delim_span_start, end_delim_span_end,
# end_delim_match, end_delim_regex)
sellipsis = "..."
s = x[0] - 10
if s < 0:
s = 0
sellipsis = ""
eellipsis = "..."
e = x[1] + 10
if e > len(latex_str):
e = len(latex_str)
eellipsis = ""
if x[3] in END_DELIM_REPR:
err = (f"Extra '{x[2]}' at index {x[0]} or "
"missing corresponding "
f"'{BEGIN_DELIM_REPR[MATRIX_DELIMS_INV[x[3]]]}' "
f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
f"{eellipsis}")
raise LaTeXParsingError(err)
if x[7] is None:
err = (f"Extra '{x[2]}' at index {x[0]} or "
"missing corresponding "
f"'{END_DELIM_REPR[MATRIX_DELIMS[x[3]]]}' "
f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
f"{eellipsis}")
raise LaTeXParsingError(err)
correct_end_regex = MATRIX_DELIMS[x[3]]
sellipsis = "..." if x[0] > 0 else ""
eellipsis = "..." if x[5] < len(latex_str) else ""
if x[7] != correct_end_regex:
err = ("Expected "
f"'{END_DELIM_REPR[correct_end_regex]}' "
f"to close the '{x[2]}' at index {x[0]} but "
f"found '{x[6]}' at index {x[4]} of LaTeX "
f"string instead: {sellipsis}{latex_str[x[0]:x[5]]}"
f"{eellipsis}")
raise LaTeXParsingError(err)
__doctest_requires__ = {('parse_latex',): ['antlr4', 'lark']}
@doctest_depends_on(modules=('antlr4', 'lark'))
def parse_latex(s, strict=False, backend="antlr"):
r"""Converts the input LaTeX string ``s`` to a SymPy ``Expr``.
Parameters
==========
s : str
The LaTeX string to parse. In Python source containing LaTeX,
*raw strings* (denoted with ``r"``, like this one) are preferred,
as LaTeX makes liberal use of the ``\`` character, which would
trigger escaping in normal Python strings.
backend : str, optional
Currently, there are two backends supported: ANTLR, and Lark.
The default setting is to use the ANTLR backend, which can be
changed to Lark if preferred.
Use ``backend="antlr"`` for the ANTLR-based parser, and
``backend="lark"`` for the Lark-based parser.
The ``backend`` option is case-sensitive, and must be in
all lowercase.
strict : bool, optional
This option is only available with the ANTLR backend.
If True, raise an exception if the string cannot be parsed as
valid LaTeX. If False, try to recover gracefully from common
mistakes.
Examples
========
>>> from sympy.parsing.latex import parse_latex
>>> expr = parse_latex(r"\frac {1 + \sqrt {\a}} {\b}")
>>> expr
(sqrt(a) + 1)/b
>>> expr.evalf(4, subs=dict(a=5, b=2))
1.618
>>> func = parse_latex(r"\int_1^\alpha \dfrac{\mathrm{d}t}{t}", backend="lark")
>>> func.evalf(subs={"alpha": 2})
0.693147180559945
"""
check_matrix_delimiters(s)
if backend == "antlr":
_latex = import_module(
'sympy.parsing.latex._parse_latex_antlr',
import_kwargs={'fromlist': ['X']})
if _latex is not None:
return _latex.parse_latex(s, strict)
elif backend == "lark":
return parse_latex_lark(s)
else:
raise NotImplementedError(f"Using the '{backend}' backend in the LaTeX" \
" parser is not supported.")