team-10/env/Lib/site-packages/scipy/_lib/cobyqa/subsolvers/optim.py
2025-08-02 07:34:44 +02:00

1203 lines
44 KiB
Python

import inspect
import numpy as np
from scipy.linalg import qr
from ..utils import get_arrays_tol
TINY = np.finfo(float).tiny
EPS = np.finfo(float).eps
def tangential_byrd_omojokun(grad, hess_prod, xl, xu, delta, debug, **kwargs):
r"""
Minimize approximately a quadratic function subject to bound constraints in
a trust region.
This function solves approximately
.. math::
\min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
s^{\mathsf{T}} H s \quad \text{s.t.} \quad
\left\{ \begin{array}{l}
l \le s \le u\\
\lVert s \rVert \le \Delta,
\end{array} \right.
using an active-set variation of the truncated conjugate gradient method.
Parameters
----------
grad : `numpy.ndarray`, shape (n,)
Gradient :math:`g` as shown above.
hess_prod : callable
Product of the Hessian matrix :math:`H` with any vector.
``hess_prod(s) -> `numpy.ndarray`, shape (n,)``
returns the product :math:`H s`.
xl : `numpy.ndarray`, shape (n,)
Lower bounds :math:`l` as shown above.
xu : `numpy.ndarray`, shape (n,)
Upper bounds :math:`u` as shown above.
delta : float
Trust-region radius :math:`\Delta` as shown above.
debug : bool
Whether to make debugging tests during the execution.
Returns
-------
`numpy.ndarray`, shape (n,)
Approximate solution :math:`s`.
Other Parameters
----------------
improve_tcg : bool, optional
If True, a solution generated by the truncated conjugate gradient
method that is on the boundary of the trust region is improved by
moving around the trust-region boundary on the two-dimensional space
spanned by the solution and the gradient of the quadratic function at
the solution (default is True).
Notes
-----
This function implements Algorithm 6.2 of [1]_. It is assumed that the
origin is feasible with respect to the bound constraints and that `delta`
is finite and positive.
References
----------
.. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
and Software*. PhD thesis, Department of Applied Mathematics, The Hong
Kong Polytechnic University, Hong Kong, China, 2022. URL:
https://theses.lib.polyu.edu.hk/handle/200/12294.
"""
if debug:
assert isinstance(grad, np.ndarray) and grad.ndim == 1
assert inspect.signature(hess_prod).bind(grad)
assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
assert isinstance(delta, float)
assert isinstance(debug, bool)
tol = get_arrays_tol(xl, xu)
assert np.all(xl <= tol)
assert np.all(xu >= -tol)
assert np.isfinite(delta) and delta > 0.0
xl = np.minimum(xl, 0.0)
xu = np.maximum(xu, 0.0)
# Copy the arrays that may be modified by the code below.
n = grad.size
grad = np.copy(grad)
grad_orig = np.copy(grad)
# Calculate the initial active set.
free_bd = ((xl < 0.0) | (grad < 0.0)) & ((xu > 0.0) | (grad > 0.0))
# Set the initial iterate and the initial search direction.
step = np.zeros_like(grad)
sd = np.zeros_like(step)
sd[free_bd] = -grad[free_bd]
k = 0
reduct = 0.0
boundary_reached = False
while k < np.count_nonzero(free_bd):
# Stop the computations if sd is not a descent direction.
grad_sd = grad @ sd
if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
break
# Set alpha_tr to the step size for the trust-region constraint.
try:
alpha_tr = _alpha_tr(step, sd, delta)
except ZeroDivisionError:
break
# Stop the computations if a step along sd is expected to give a
# relatively small reduction in the objective function.
if -alpha_tr * grad_sd <= 1e-8 * reduct:
break
# Set alpha_quad to the step size for the minimization problem.
hess_sd = hess_prod(sd)
curv_sd = sd @ hess_sd
if curv_sd > TINY * abs(grad_sd):
alpha_quad = max(-grad_sd / curv_sd, 0.0)
else:
alpha_quad = np.inf
# Stop the computations if the reduction in the objective function
# provided by an unconstrained step is small.
alpha = min(alpha_tr, alpha_quad)
if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
break
# Set alpha_bd to the step size for the bound constraints.
i_xl = (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
i_xu = (xu < np.inf) & (sd > TINY * np.abs(xu - step))
all_alpha_xl = np.full_like(step, np.inf)
all_alpha_xu = np.full_like(step, np.inf)
all_alpha_xl[i_xl] = np.maximum(
(xl[i_xl] - step[i_xl]) / sd[i_xl],
0.0,
)
all_alpha_xu[i_xu] = np.maximum(
(xu[i_xu] - step[i_xu]) / sd[i_xu],
0.0,
)
alpha_xl = np.min(all_alpha_xl)
alpha_xu = np.min(all_alpha_xu)
alpha_bd = min(alpha_xl, alpha_xu)
# Update the iterate.
alpha = min(alpha, alpha_bd)
if alpha > 0.0:
step[free_bd] = np.clip(
step[free_bd] + alpha * sd[free_bd],
xl[free_bd],
xu[free_bd],
)
grad += alpha * hess_sd
reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
if alpha < min(alpha_tr, alpha_bd):
# The current iteration is a conjugate gradient iteration. Update
# the search direction so that it is conjugate (with respect to H)
# to all the previous search directions.
beta = (grad[free_bd] @ hess_sd[free_bd]) / curv_sd
sd[free_bd] = beta * sd[free_bd] - grad[free_bd]
sd[~free_bd] = 0.0
k += 1
elif alpha < alpha_tr:
# The iterate is restricted by a bound constraint. Add this bound
# constraint to the active set, and restart the calculations.
if alpha_xl <= alpha:
i_new = np.argmin(all_alpha_xl)
step[i_new] = xl[i_new]
else:
i_new = np.argmin(all_alpha_xu)
step[i_new] = xu[i_new]
free_bd[i_new] = False
sd[free_bd] = -grad[free_bd]
sd[~free_bd] = 0.0
k = 0
else:
# The current iterate is on the trust-region boundary. Add all the
# active bounds to the working set to prepare for the improvement
# of the solution, and stop the iterations.
if alpha_xl <= alpha:
i_new = _argmin(all_alpha_xl)
step[i_new] = xl[i_new]
free_bd[i_new] = False
if alpha_xu <= alpha:
i_new = _argmin(all_alpha_xu)
step[i_new] = xu[i_new]
free_bd[i_new] = False
boundary_reached = True
break
# Attempt to improve the solution on the trust-region boundary.
if kwargs.get("improve_tcg", True) and boundary_reached:
step_base = np.copy(step)
step_comparator = grad_orig @ step_base + 0.5 * step_base @ hess_prod(
step_base
)
while np.count_nonzero(free_bd) > 0:
# Check whether a substantial reduction in the objective function
# is possible, and set the search direction.
step_sq = step[free_bd] @ step[free_bd]
grad_sq = grad[free_bd] @ grad[free_bd]
grad_step = grad[free_bd] @ step[free_bd]
grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
sd[~free_bd] = 0.0
if grad_sd >= -1e-8 * reduct or np.any(
grad_sd >= -TINY * np.abs(sd[free_bd])
):
break
sd[free_bd] /= -grad_sd
# Calculate an upper bound for the tangent of half the angle theta
# of this alternative iteration. The step will be updated as:
# step = cos(theta) * step + sin(theta) * sd.
temp_xl = np.zeros(n)
temp_xu = np.zeros(n)
temp_xl[free_bd] = (
step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
)
temp_xu[free_bd] = (
step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
)
temp_xl[temp_xl > 0.0] = (
np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
)
temp_xu[temp_xu > 0.0] = (
np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
)
dist_xl = np.maximum(step - xl, 0.0)
dist_xu = np.maximum(xu - step, 0.0)
i_xl = temp_xl > TINY * dist_xl
i_xu = temp_xu > TINY * dist_xu
all_t_xl = np.ones(n)
all_t_xu = np.ones(n)
all_t_xl[i_xl] = np.minimum(
all_t_xl[i_xl],
dist_xl[i_xl] / temp_xl[i_xl],
)
all_t_xu[i_xu] = np.minimum(
all_t_xu[i_xu],
dist_xu[i_xu] / temp_xu[i_xu],
)
t_xl = np.min(all_t_xl)
t_xu = np.min(all_t_xu)
t_bd = min(t_xl, t_xu)
# Calculate some curvature information.
hess_step = hess_prod(step)
hess_sd = hess_prod(sd)
curv_step = step @ hess_step
curv_sd = sd @ hess_sd
curv_step_sd = step @ hess_sd
# For a range of equally spaced values of tan(0.5 * theta),
# calculate the reduction in the objective function that would be
# obtained by accepting the corresponding angle.
n_samples = 20
n_samples = int((n_samples - 3) * t_bd + 3)
t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
all_reduct = sin_values * (
grad_step * t_samples
- grad_sd
- t_samples * curv_step
+ sin_values
* (t_samples * curv_step_sd - 0.5 * (curv_sd - curv_step))
)
if np.all(all_reduct <= 0.0):
# No reduction in the objective function is obtained.
break
# Accept the angle that provides the largest reduction in the
# objective function, and update the iterate.
i_max = np.argmax(all_reduct)
cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
1.0 + t_samples[i_max] ** 2.0
)
step[free_bd] = (
cos_value * step[free_bd] + sin_values[i_max] * sd[free_bd]
)
grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
reduct += all_reduct[i_max]
# If the above angle is restricted by bound constraints, add them
# to the working set, and restart the alternative iteration.
# Otherwise, the calculations are terminated.
if t_bd < 1.0 and i_max == n_samples - 1:
if t_xl <= t_bd:
i_new = _argmin(all_t_xl)
step[i_new] = xl[i_new]
free_bd[i_new] = False
if t_xu <= t_bd:
i_new = _argmin(all_t_xu)
step[i_new] = xu[i_new]
free_bd[i_new] = False
else:
break
# Ensure that the alternative iteration improves the objective
# function.
if grad_orig @ step + 0.5 * step @ hess_prod(step) > step_comparator:
step = step_base
if debug:
assert np.all(xl <= step)
assert np.all(step <= xu)
assert np.linalg.norm(step) < 1.1 * delta
return step
def constrained_tangential_byrd_omojokun(
grad,
hess_prod,
xl,
xu,
aub,
bub,
aeq,
delta,
debug,
**kwargs,
):
r"""
Minimize approximately a quadratic function subject to bound and linear
constraints in a trust region.
This function solves approximately
.. math::
\min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
s^{\mathsf{T}} H s \quad \text{s.t.} \quad
\left\{ \begin{array}{l}
l \le s \le u,\\
A_{\scriptscriptstyle I} s \le b_{\scriptscriptstyle I},\\
A_{\scriptscriptstyle E} s = 0,\\
\lVert s \rVert \le \Delta,
\end{array} \right.
using an active-set variation of the truncated conjugate gradient method.
Parameters
----------
grad : `numpy.ndarray`, shape (n,)
Gradient :math:`g` as shown above.
hess_prod : callable
Product of the Hessian matrix :math:`H` with any vector.
``hess_prod(s) -> `numpy.ndarray`, shape (n,)``
returns the product :math:`H s`.
xl : `numpy.ndarray`, shape (n,)
Lower bounds :math:`l` as shown above.
xu : `numpy.ndarray`, shape (n,)
Upper bounds :math:`u` as shown above.
aub : `numpy.ndarray`, shape (m_linear_ub, n)
Coefficient matrix :math:`A_{\scriptscriptstyle I}` as shown above.
bub : `numpy.ndarray`, shape (m_linear_ub,)
Right-hand side :math:`b_{\scriptscriptstyle I}` as shown above.
aeq : `numpy.ndarray`, shape (m_linear_eq, n)
Coefficient matrix :math:`A_{\scriptscriptstyle E}` as shown above.
delta : float
Trust-region radius :math:`\Delta` as shown above.
debug : bool
Whether to make debugging tests during the execution.
Returns
-------
`numpy.ndarray`, shape (n,)
Approximate solution :math:`s`.
Other Parameters
----------------
improve_tcg : bool, optional
If True, a solution generated by the truncated conjugate gradient
method that is on the boundary of the trust region is improved by
moving around the trust-region boundary on the two-dimensional space
spanned by the solution and the gradient of the quadratic function at
the solution (default is True).
Notes
-----
This function implements Algorithm 6.3 of [1]_. It is assumed that the
origin is feasible with respect to the bound and linear constraints, and
that `delta` is finite and positive.
References
----------
.. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
and Software*. PhD thesis, Department of Applied Mathematics, The Hong
Kong Polytechnic University, Hong Kong, China, 2022. URL:
https://theses.lib.polyu.edu.hk/handle/200/12294.
"""
if debug:
assert isinstance(grad, np.ndarray) and grad.ndim == 1
assert inspect.signature(hess_prod).bind(grad)
assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
assert (
isinstance(aub, np.ndarray)
and aub.ndim == 2
and aub.shape[1] == grad.size
)
assert (
isinstance(bub, np.ndarray)
and bub.ndim == 1
and bub.size == aub.shape[0]
)
assert (
isinstance(aeq, np.ndarray)
and aeq.ndim == 2
and aeq.shape[1] == grad.size
)
assert isinstance(delta, float)
assert isinstance(debug, bool)
tol = get_arrays_tol(xl, xu)
assert np.all(xl <= tol)
assert np.all(xu >= -tol)
assert np.all(bub >= -tol)
assert np.isfinite(delta) and delta > 0.0
xl = np.minimum(xl, 0.0)
xu = np.maximum(xu, 0.0)
bub = np.maximum(bub, 0.0)
# Copy the arrays that may be modified by the code below.
n = grad.size
grad = np.copy(grad)
grad_orig = np.copy(grad)
# Calculate the initial active set.
free_xl = (xl < 0.0) | (grad < 0.0)
free_xu = (xu > 0.0) | (grad > 0.0)
free_ub = (bub > 0.0) | (aub @ grad > 0.0)
n_act, q = qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub)
# Set the initial iterate and the initial search direction.
step = np.zeros_like(grad)
sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
resid = np.copy(bub)
k = 0
reduct = 0.0
boundary_reached = False
while k < n - n_act:
# Stop the computations if sd is not a descent direction.
grad_sd = grad @ sd
if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
break
# Set alpha_tr to the step size for the trust-region constraint.
try:
alpha_tr = _alpha_tr(step, sd, delta)
except ZeroDivisionError:
break
# Stop the computations if a step along sd is expected to give a
# relatively small reduction in the objective function.
if -alpha_tr * grad_sd <= 1e-8 * reduct:
break
# Set alpha_quad to the step size for the minimization problem.
hess_sd = hess_prod(sd)
curv_sd = sd @ hess_sd
if curv_sd > TINY * abs(grad_sd):
alpha_quad = max(-grad_sd / curv_sd, 0.0)
else:
alpha_quad = np.inf
# Stop the computations if the reduction in the objective function
# provided by an unconstrained step is small.
alpha = min(alpha_tr, alpha_quad)
if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
break
# Set alpha_bd to the step size for the bound constraints.
i_xl = free_xl & (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
i_xu = free_xu & (xu < np.inf) & (sd > TINY * np.abs(xu - step))
all_alpha_xl = np.full_like(step, np.inf)
all_alpha_xu = np.full_like(step, np.inf)
all_alpha_xl[i_xl] = np.maximum(
(xl[i_xl] - step[i_xl]) / sd[i_xl],
0.0,
)
all_alpha_xu[i_xu] = np.maximum(
(xu[i_xu] - step[i_xu]) / sd[i_xu],
0.0,
)
alpha_xl = np.min(all_alpha_xl)
alpha_xu = np.min(all_alpha_xu)
alpha_bd = min(alpha_xl, alpha_xu)
# Set alpha_ub to the step size for the linear constraints.
aub_sd = aub @ sd
i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
all_alpha_ub = np.full_like(bub, np.inf)
all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
alpha_ub = np.min(all_alpha_ub, initial=np.inf)
# Update the iterate.
alpha = min(alpha, alpha_bd, alpha_ub)
if alpha > 0.0:
step = np.clip(step + alpha * sd, xl, xu)
grad += alpha * hess_sd
resid = np.maximum(0.0, resid - alpha * aub_sd)
reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
if alpha < min(alpha_tr, alpha_bd, alpha_ub):
# The current iteration is a conjugate gradient iteration. Update
# the search direction so that it is conjugate (with respect to H)
# to all the previous search directions.
grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
beta = (grad_proj @ hess_sd) / curv_sd
sd = beta * sd - grad_proj
k += 1
elif alpha < alpha_tr:
# The iterate is restricted by a bound/linear constraint. Add this
# constraint to the active set, and restart the calculations.
if alpha_xl <= alpha:
i_new = np.argmin(all_alpha_xl)
step[i_new] = xl[i_new]
free_xl[i_new] = False
elif alpha_xu <= alpha:
i_new = np.argmin(all_alpha_xu)
step[i_new] = xu[i_new]
free_xu[i_new] = False
else:
i_new = np.argmin(all_alpha_ub)
free_ub[i_new] = False
n_act, q = qr_tangential_byrd_omojokun(
aub,
aeq,
free_xl,
free_xu,
free_ub,
)
sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
k = 0
else:
# The current iterate is on the trust-region boundary. Add all the
# active bound/linear constraints to the working set to prepare for
# the improvement of the solution, and stop the iterations.
if alpha_xl <= alpha:
i_new = _argmin(all_alpha_xl)
step[i_new] = xl[i_new]
free_xl[i_new] = False
if alpha_xu <= alpha:
i_new = _argmin(all_alpha_xu)
step[i_new] = xu[i_new]
free_xu[i_new] = False
if alpha_ub <= alpha:
i_new = _argmin(all_alpha_ub)
free_ub[i_new] = False
n_act, q = qr_tangential_byrd_omojokun(
aub,
aeq,
free_xl,
free_xu,
free_ub,
)
boundary_reached = True
break
# Attempt to improve the solution on the trust-region boundary.
if kwargs.get("improve_tcg", True) and boundary_reached and n_act < n:
step_base = np.copy(step)
while n_act < n:
# Check whether a substantial reduction in the objective function
# is possible, and set the search direction.
step_proj = q[:, n_act:] @ (q[:, n_act:].T @ step)
grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
step_sq = step_proj @ step_proj
grad_sq = grad_proj @ grad_proj
grad_step = grad_proj @ step_proj
grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
sd = q[:, n_act:] @ (
q[:, n_act:].T @ (grad_step * step - step_sq * grad)
)
if grad_sd >= -1e-8 * reduct or np.any(
grad_sd >= -TINY * np.abs(sd)
):
break
sd /= -grad_sd
# Calculate an upper bound for the tangent of half the angle theta
# of this alternative iteration for the bound constraints. The step
# will be updated as:
# step += (cos(theta) - 1) * step_proj + sin(theta) * sd.
temp_xl = np.zeros(n)
temp_xu = np.zeros(n)
dist_xl = np.maximum(step - xl, 0.0)
dist_xu = np.maximum(xu - step, 0.0)
temp_xl[free_xl] = sd[free_xl] ** 2.0 - dist_xl[free_xl] * (
dist_xl[free_xl] - 2.0 * step_proj[free_xl]
)
temp_xu[free_xu] = sd[free_xu] ** 2.0 - dist_xu[free_xu] * (
dist_xu[free_xu] + 2.0 * step_proj[free_xu]
)
temp_xl[temp_xl > 0.0] = (
np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
)
temp_xu[temp_xu > 0.0] = (
np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
)
i_xl = temp_xl > TINY * dist_xl
i_xu = temp_xu > TINY * dist_xu
all_t_xl = np.ones(n)
all_t_xu = np.ones(n)
all_t_xl[i_xl] = np.minimum(
all_t_xl[i_xl],
dist_xl[i_xl] / temp_xl[i_xl],
)
all_t_xu[i_xu] = np.minimum(
all_t_xu[i_xu],
dist_xu[i_xu] / temp_xu[i_xu],
)
t_xl = np.min(all_t_xl)
t_xu = np.min(all_t_xu)
t_bd = min(t_xl, t_xu)
# Calculate an upper bound for the tangent of half the angle theta
# of this alternative iteration for the linear constraints.
temp_ub = np.zeros_like(resid)
aub_step = aub @ step_proj
aub_sd = aub @ sd
temp_ub[free_ub] = aub_sd[free_ub] ** 2.0 - resid[free_ub] * (
resid[free_ub] + 2.0 * aub_step[free_ub]
)
temp_ub[temp_ub > 0.0] = (
np.sqrt(temp_ub[temp_ub > 0.0]) + aub_sd[temp_ub > 0.0]
)
i_ub = temp_ub > TINY * resid
all_t_ub = np.ones_like(resid)
all_t_ub[i_ub] = np.minimum(
all_t_ub[i_ub],
resid[i_ub] / temp_ub[i_ub],
)
t_ub = np.min(all_t_ub, initial=1.0)
t_min = min(t_bd, t_ub)
# Calculate some curvature information.
hess_step = hess_prod(step_proj)
hess_sd = hess_prod(sd)
curv_step = step_proj @ hess_step
curv_sd = sd @ hess_sd
curv_step_sd = step_proj @ hess_sd
# For a range of equally spaced values of tan(0.5 * theta),
# calculate the reduction in the objective function that would be
# obtained by accepting the corresponding angle.
n_samples = 20
n_samples = int((n_samples - 3) * t_min + 3)
t_samples = np.linspace(t_min / n_samples, t_min, n_samples)
sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
all_reduct = sin_values * (
grad_step * t_samples
- grad_sd
- sin_values
* (
0.5 * t_samples**2.0 * curv_step
- 2.0 * t_samples * curv_step_sd
+ 0.5 * curv_sd
)
)
if np.all(all_reduct <= 0.0):
# No reduction in the objective function is obtained.
break
# Accept the angle that provides the largest reduction in the
# objective function, and update the iterate.
i_max = np.argmax(all_reduct)
cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
1.0 + t_samples[i_max] ** 2.0
)
step = np.clip(
step + (cos_value - 1.0) * step_proj + sin_values[i_max] * sd,
xl,
xu,
)
grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
resid = np.maximum(
0.0,
resid
- (cos_value - 1.0) * aub_step
- sin_values[i_max] * aub_sd,
)
reduct += all_reduct[i_max]
# If the above angle is restricted by bound constraints, add them
# to the working set, and restart the alternative iteration.
# Otherwise, the calculations are terminated.
if t_min < 1.0 and i_max == n_samples - 1:
if t_xl <= t_min:
i_new = _argmin(all_t_xl)
step[i_new] = xl[i_new]
free_xl[i_new] = False
if t_xu <= t_min:
i_new = _argmin(all_t_xu)
step[i_new] = xu[i_new]
free_xl[i_new] = False
if t_ub <= t_min:
i_new = _argmin(all_t_ub)
free_ub[i_new] = False
n_act, q = qr_tangential_byrd_omojokun(
aub,
aeq,
free_xl,
free_xu,
free_ub,
)
else:
break
# Ensure that the alternative iteration improves the objective
# function.
if grad_orig @ step + 0.5 * step @ hess_prod(
step
) > grad_orig @ step_base + 0.5 * step_base @ hess_prod(step_base):
step = step_base
if debug:
tol = get_arrays_tol(xl, xu)
assert np.all(xl <= step)
assert np.all(step <= xu)
assert np.all(aub @ step <= bub + tol)
assert np.all(np.abs(aeq @ step) <= tol)
assert np.linalg.norm(step) < 1.1 * delta
return step
def normal_byrd_omojokun(aub, bub, aeq, beq, xl, xu, delta, debug, **kwargs):
r"""
Minimize approximately a linear constraint violation subject to bound
constraints in a trust region.
This function solves approximately
.. math::
\min_{s \in \mathbb{R}^n} \quad \frac{1}{2} \big( \lVert \max \{
A_{\scriptscriptstyle I} s - b_{\scriptscriptstyle I}, 0 \} \rVert^2 +
\lVert A_{\scriptscriptstyle E} s - b_{\scriptscriptstyle E} \rVert^2
\big) \quad \text{s.t.}
\quad
\left\{ \begin{array}{l}
l \le s \le u,\\
\lVert s \rVert \le \Delta,
\end{array} \right.
using a variation of the truncated conjugate gradient method.
Parameters
----------
aub : `numpy.ndarray`, shape (m_linear_ub, n)
Matrix :math:`A_{\scriptscriptstyle I}` as shown above.
bub : `numpy.ndarray`, shape (m_linear_ub,)
Vector :math:`b_{\scriptscriptstyle I}` as shown above.
aeq : `numpy.ndarray`, shape (m_linear_eq, n)
Matrix :math:`A_{\scriptscriptstyle E}` as shown above.
beq : `numpy.ndarray`, shape (m_linear_eq,)
Vector :math:`b_{\scriptscriptstyle E}` as shown above.
xl : `numpy.ndarray`, shape (n,)
Lower bounds :math:`l` as shown above.
xu : `numpy.ndarray`, shape (n,)
Upper bounds :math:`u` as shown above.
delta : float
Trust-region radius :math:`\Delta` as shown above.
debug : bool
Whether to make debugging tests during the execution.
Returns
-------
`numpy.ndarray`, shape (n,)
Approximate solution :math:`s`.
Other Parameters
----------------
improve_tcg : bool, optional
If True, a solution generated by the truncated conjugate gradient
method that is on the boundary of the trust region is improved by
moving around the trust-region boundary on the two-dimensional space
spanned by the solution and the gradient of the quadratic function at
the solution (default is True).
Notes
-----
This function implements Algorithm 6.4 of [1]_. It is assumed that the
origin is feasible with respect to the bound constraints and that `delta`
is finite and positive.
References
----------
.. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
and Software*. PhD thesis, Department of Applied Mathematics, The Hong
Kong Polytechnic University, Hong Kong, China, 2022. URL:
https://theses.lib.polyu.edu.hk/handle/200/12294.
"""
if debug:
assert isinstance(aub, np.ndarray) and aub.ndim == 2
assert (
isinstance(bub, np.ndarray)
and bub.ndim == 1
and bub.size == aub.shape[0]
)
assert (
isinstance(aeq, np.ndarray)
and aeq.ndim == 2
and aeq.shape[1] == aub.shape[1]
)
assert (
isinstance(beq, np.ndarray)
and beq.ndim == 1
and beq.size == aeq.shape[0]
)
assert isinstance(xl, np.ndarray) and xl.shape == (aub.shape[1],)
assert isinstance(xu, np.ndarray) and xu.shape == (aub.shape[1],)
assert isinstance(delta, float)
assert isinstance(debug, bool)
tol = get_arrays_tol(xl, xu)
assert np.all(xl <= tol)
assert np.all(xu >= -tol)
assert np.isfinite(delta) and delta > 0.0
xl = np.minimum(xl, 0.0)
xu = np.maximum(xu, 0.0)
# Calculate the initial active set.
m_linear_ub, n = aub.shape
grad = np.r_[aeq.T @ -beq, np.maximum(0.0, -bub)]
free_xl = (xl < 0.0) | (grad[:n] < 0.0)
free_xu = (xu > 0.0) | (grad[:n] > 0.0)
free_slack = bub < 0.0
free_ub = (bub > 0.0) | (aub @ grad[:n] - grad[n:] > 0.0)
n_act, q = qr_normal_byrd_omojokun(
aub,
free_xl,
free_xu,
free_slack,
free_ub,
)
# Calculate an upper bound on the norm of the slack variables. It is not
# used in the original algorithm, but it may prevent undesired behaviors
# engendered by computer rounding errors.
delta_slack = np.sqrt(beq @ beq + grad[n:] @ grad[n:])
# Set the initial iterate and the initial search direction.
step = np.zeros(n)
sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
resid = bub + grad[n:]
k = 0
reduct = 0.0
boundary_reached = False
while k < n + m_linear_ub - n_act:
# Stop the computations if sd is not a descent direction.
grad_sd = grad @ sd
if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
break
# Set alpha_tr to the step size for the trust-region constraint.
try:
alpha_tr = _alpha_tr(step, sd[:n], delta)
except ZeroDivisionError:
alpha_tr = np.inf
# Prevent undesired behaviors engendered by computer rounding errors by
# considering the trust-region constraint on the slack variables.
try:
alpha_tr = min(alpha_tr, _alpha_tr(grad[n:], sd[n:], delta_slack))
except ZeroDivisionError:
pass
# Stop the computations if a step along sd is expected to give a
# relatively small reduction in the objective function.
if -alpha_tr * grad_sd <= 1e-8 * reduct:
break
# Set alpha_quad to the step size for the minimization problem.
hess_sd = np.r_[aeq.T @ (aeq @ sd[:n]), sd[n:]]
curv_sd = sd @ hess_sd
if curv_sd > TINY * abs(grad_sd):
alpha_quad = max(-grad_sd / curv_sd, 0.0)
else:
alpha_quad = np.inf
# Stop the computations if the reduction in the objective function
# provided by an unconstrained step is small.
alpha = min(alpha_tr, alpha_quad)
if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
break
# Set alpha_bd to the step size for the bound constraints.
i_xl = free_xl & (xl > -np.inf) & (sd[:n] < -TINY * np.abs(xl - step))
i_xu = free_xu & (xu < np.inf) & (sd[:n] > TINY * np.abs(xu - step))
i_slack = free_slack & (sd[n:] < -TINY * np.abs(grad[n:]))
all_alpha_xl = np.full_like(step, np.inf)
all_alpha_xu = np.full_like(step, np.inf)
all_alpha_slack = np.full_like(bub, np.inf)
all_alpha_xl[i_xl] = np.maximum(
(xl[i_xl] - step[i_xl]) / sd[:n][i_xl],
0.0,
)
all_alpha_xu[i_xu] = np.maximum(
(xu[i_xu] - step[i_xu]) / sd[:n][i_xu],
0.0,
)
all_alpha_slack[i_slack] = np.maximum(
-grad[n:][i_slack] / sd[n:][i_slack],
0.0,
)
alpha_xl = np.min(all_alpha_xl)
alpha_xu = np.min(all_alpha_xu)
alpha_slack = np.min(all_alpha_slack, initial=np.inf)
alpha_bd = min(alpha_xl, alpha_xu, alpha_slack)
# Set alpha_ub to the step size for the linear constraints.
aub_sd = aub @ sd[:n] - sd[n:]
i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
all_alpha_ub = np.full_like(bub, np.inf)
all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
alpha_ub = np.min(all_alpha_ub, initial=np.inf)
# Update the iterate.
alpha = min(alpha, alpha_bd, alpha_ub)
if alpha > 0.0:
step = np.clip(step + alpha * sd[:n], xl, xu)
grad += alpha * hess_sd
resid = np.maximum(0.0, resid - alpha * aub_sd)
reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
if alpha < min(alpha_tr, alpha_bd, alpha_ub):
# The current iteration is a conjugate gradient iteration. Update
# the search direction so that it is conjugate (with respect to H)
# to all the previous search directions.
grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
beta = (grad_proj @ hess_sd) / curv_sd
sd = beta * sd - grad_proj
k += 1
elif alpha < alpha_tr:
# The iterate is restricted by a bound/linear constraint. Add this
# constraint to the active set, and restart the calculations.
if alpha_xl <= alpha:
i_new = np.argmin(all_alpha_xl)
step[i_new] = xl[i_new]
free_xl[i_new] = False
elif alpha_xu <= alpha:
i_new = np.argmin(all_alpha_xu)
step[i_new] = xu[i_new]
free_xu[i_new] = False
elif alpha_slack <= alpha:
i_new = np.argmin(all_alpha_slack)
free_slack[i_new] = False
else:
i_new = np.argmin(all_alpha_ub)
free_ub[i_new] = False
n_act, q = qr_normal_byrd_omojokun(
aub, free_xl, free_xu, free_slack, free_ub
)
sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
k = 0
else:
# The current iterate is on the trust-region boundary. Add all the
# active bound constraints to the working set to prepare for the
# improvement of the solution, and stop the iterations.
if alpha_xl <= alpha:
i_new = _argmin(all_alpha_xl)
step[i_new] = xl[i_new]
free_xl[i_new] = False
if alpha_xu <= alpha:
i_new = _argmin(all_alpha_xu)
step[i_new] = xu[i_new]
free_xu[i_new] = False
boundary_reached = True
break
# Attempt to improve the solution on the trust-region boundary.
if kwargs.get("improve_tcg", True) and boundary_reached:
step_base = np.copy(step)
free_bd = free_xl & free_xu
grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
aeq @ step - beq
)
sd = np.zeros(n)
while np.count_nonzero(free_bd) > 0:
# Check whether a substantial reduction in the objective function
# is possible, and set the search direction.
step_sq = step[free_bd] @ step[free_bd]
grad_sq = grad[free_bd] @ grad[free_bd]
grad_step = grad[free_bd] @ step[free_bd]
grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
sd[~free_bd] = 0.0
if grad_sd >= -1e-8 * reduct or np.any(
grad_sd >= -TINY * np.abs(sd[free_bd])
):
break
sd[free_bd] /= -grad_sd
# Calculate an upper bound for the tangent of half the angle theta
# of this alternative iteration. The step will be updated as:
# step = cos(theta) * step + sin(theta) * sd.
temp_xl = np.zeros(n)
temp_xu = np.zeros(n)
temp_xl[free_bd] = (
step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
)
temp_xu[free_bd] = (
step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
)
temp_xl[temp_xl > 0.0] = (
np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
)
temp_xu[temp_xu > 0.0] = (
np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
)
dist_xl = np.maximum(step - xl, 0.0)
dist_xu = np.maximum(xu - step, 0.0)
i_xl = temp_xl > TINY * dist_xl
i_xu = temp_xu > TINY * dist_xu
all_t_xl = np.ones(n)
all_t_xu = np.ones(n)
all_t_xl[i_xl] = np.minimum(
all_t_xl[i_xl],
dist_xl[i_xl] / temp_xl[i_xl],
)
all_t_xu[i_xu] = np.minimum(
all_t_xu[i_xu],
dist_xu[i_xu] / temp_xu[i_xu],
)
t_xl = np.min(all_t_xl)
t_xu = np.min(all_t_xu)
t_bd = min(t_xl, t_xu)
# For a range of equally spaced values of tan(0.5 * theta),
# calculate the reduction in the objective function that would be
# obtained by accepting the corresponding angle.
n_samples = 20
n_samples = int((n_samples - 3) * t_bd + 3)
t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
resid_ub = np.maximum(aub @ step - bub, 0.0)
resid_eq = aeq @ step - beq
step_proj = np.copy(step)
step_proj[~free_bd] = 0.0
all_reduct = np.empty(n_samples)
for i in range(n_samples):
sin_value = 2.0 * t_samples[i] / (1.0 + t_samples[i] ** 2.0)
step_alt = np.clip(
step + sin_value * (sd - t_samples[i] * step_proj),
xl,
xu,
)
resid_ub_alt = np.maximum(aub @ step_alt - bub, 0.0)
resid_eq_alt = aeq @ step_alt - beq
all_reduct[i] = 0.5 * (
resid_ub @ resid_ub
+ resid_eq @ resid_eq
- resid_ub_alt @ resid_ub_alt
- resid_eq_alt @ resid_eq_alt
)
if np.all(all_reduct <= 0.0):
# No reduction in the objective function is obtained.
break
# Accept the angle that provides the largest reduction in the
# objective function, and update the iterate.
i_max = np.argmax(all_reduct)
cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
1.0 + t_samples[i_max] ** 2.0
)
sin_value = (2.0 * t_samples[i_max]
/ (1.0 + t_samples[i_max] ** 2.0))
step[free_bd] = cos_value * step[free_bd] + sin_value * sd[free_bd]
grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
aeq @ step - beq
)
reduct += all_reduct[i_max]
# If the above angle is restricted by bound constraints, add them
# to the working set, and restart the alternative iteration.
# Otherwise, the calculations are terminated.
if t_bd < 1.0 and i_max == n_samples - 1:
if t_xl <= t_bd:
i_new = _argmin(all_t_xl)
step[i_new] = xl[i_new]
free_bd[i_new] = False
if t_xu <= t_bd:
i_new = _argmin(all_t_xu)
step[i_new] = xu[i_new]
free_bd[i_new] = False
else:
break
# Ensure that the alternative iteration improves the objective
# function.
resid_ub = np.maximum(aub @ step - bub, 0.0)
resid_ub_base = np.maximum(aub @ step_base - bub, 0.0)
resid_eq = aeq @ step - beq
resid_eq_base = aeq @ step_base - beq
if (
resid_ub @ resid_ub + resid_eq @ resid_eq
> resid_ub_base @ resid_ub_base + resid_eq_base @ resid_eq_base
):
step = step_base
if debug:
assert np.all(xl <= step)
assert np.all(step <= xu)
assert np.linalg.norm(step) < 1.1 * delta
return step
def qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub):
n = free_xl.size
identity = np.eye(n)
q, r, _ = qr(
np.block(
[
[aeq],
[aub[~free_ub, :]],
[-identity[~free_xl, :]],
[identity[~free_xu, :]],
]
).T,
pivoting=True,
)
n_act = np.count_nonzero(
np.abs(np.diag(r))
>= 10.0
* EPS
* n
* np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
)
return n_act, q
def qr_normal_byrd_omojokun(aub, free_xl, free_xu, free_slack, free_ub):
m_linear_ub, n = aub.shape
identity_n = np.eye(n)
identity_m = np.eye(m_linear_ub)
q, r, _ = qr(
np.block(
[
[
aub[~free_ub, :],
-identity_m[~free_ub, :],
],
[
np.zeros((m_linear_ub - np.count_nonzero(free_slack), n)),
-identity_m[~free_slack, :],
],
[
-identity_n[~free_xl, :],
np.zeros((n - np.count_nonzero(free_xl), m_linear_ub)),
],
[
identity_n[~free_xu, :],
np.zeros((n - np.count_nonzero(free_xu), m_linear_ub)),
],
]
).T,
pivoting=True,
)
n_act = np.count_nonzero(
np.abs(np.diag(r))
>= 10.0
* EPS
* (n + m_linear_ub)
* np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
)
return n_act, q
def _alpha_tr(step, sd, delta):
step_sd = step @ sd
sd_sq = sd @ sd
dist_tr_sq = delta**2.0 - step @ step
temp = np.sqrt(max(step_sd**2.0 + sd_sq * dist_tr_sq, 0.0))
if step_sd <= 0.0 and sd_sq > TINY * abs(temp - step_sd):
alpha_tr = max((temp - step_sd) / sd_sq, 0.0)
elif abs(temp + step_sd) > TINY * dist_tr_sq:
alpha_tr = max(dist_tr_sq / (temp + step_sd), 0.0)
else:
raise ZeroDivisionError
return alpha_tr
def _argmax(x):
return np.flatnonzero(x >= np.max(x))
def _argmin(x):
return np.flatnonzero(x <= np.min(x))