1203 lines
44 KiB
Python
1203 lines
44 KiB
Python
import inspect
|
|
|
|
import numpy as np
|
|
from scipy.linalg import qr
|
|
|
|
from ..utils import get_arrays_tol
|
|
|
|
|
|
TINY = np.finfo(float).tiny
|
|
EPS = np.finfo(float).eps
|
|
|
|
|
|
def tangential_byrd_omojokun(grad, hess_prod, xl, xu, delta, debug, **kwargs):
|
|
r"""
|
|
Minimize approximately a quadratic function subject to bound constraints in
|
|
a trust region.
|
|
|
|
This function solves approximately
|
|
|
|
.. math::
|
|
|
|
\min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
|
|
s^{\mathsf{T}} H s \quad \text{s.t.} \quad
|
|
\left\{ \begin{array}{l}
|
|
l \le s \le u\\
|
|
\lVert s \rVert \le \Delta,
|
|
\end{array} \right.
|
|
|
|
using an active-set variation of the truncated conjugate gradient method.
|
|
|
|
Parameters
|
|
----------
|
|
grad : `numpy.ndarray`, shape (n,)
|
|
Gradient :math:`g` as shown above.
|
|
hess_prod : callable
|
|
Product of the Hessian matrix :math:`H` with any vector.
|
|
|
|
``hess_prod(s) -> `numpy.ndarray`, shape (n,)``
|
|
|
|
returns the product :math:`H s`.
|
|
xl : `numpy.ndarray`, shape (n,)
|
|
Lower bounds :math:`l` as shown above.
|
|
xu : `numpy.ndarray`, shape (n,)
|
|
Upper bounds :math:`u` as shown above.
|
|
delta : float
|
|
Trust-region radius :math:`\Delta` as shown above.
|
|
debug : bool
|
|
Whether to make debugging tests during the execution.
|
|
|
|
Returns
|
|
-------
|
|
`numpy.ndarray`, shape (n,)
|
|
Approximate solution :math:`s`.
|
|
|
|
Other Parameters
|
|
----------------
|
|
improve_tcg : bool, optional
|
|
If True, a solution generated by the truncated conjugate gradient
|
|
method that is on the boundary of the trust region is improved by
|
|
moving around the trust-region boundary on the two-dimensional space
|
|
spanned by the solution and the gradient of the quadratic function at
|
|
the solution (default is True).
|
|
|
|
Notes
|
|
-----
|
|
This function implements Algorithm 6.2 of [1]_. It is assumed that the
|
|
origin is feasible with respect to the bound constraints and that `delta`
|
|
is finite and positive.
|
|
|
|
References
|
|
----------
|
|
.. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
|
|
and Software*. PhD thesis, Department of Applied Mathematics, The Hong
|
|
Kong Polytechnic University, Hong Kong, China, 2022. URL:
|
|
https://theses.lib.polyu.edu.hk/handle/200/12294.
|
|
"""
|
|
if debug:
|
|
assert isinstance(grad, np.ndarray) and grad.ndim == 1
|
|
assert inspect.signature(hess_prod).bind(grad)
|
|
assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
|
|
assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
|
|
assert isinstance(delta, float)
|
|
assert isinstance(debug, bool)
|
|
tol = get_arrays_tol(xl, xu)
|
|
assert np.all(xl <= tol)
|
|
assert np.all(xu >= -tol)
|
|
assert np.isfinite(delta) and delta > 0.0
|
|
xl = np.minimum(xl, 0.0)
|
|
xu = np.maximum(xu, 0.0)
|
|
|
|
# Copy the arrays that may be modified by the code below.
|
|
n = grad.size
|
|
grad = np.copy(grad)
|
|
grad_orig = np.copy(grad)
|
|
|
|
# Calculate the initial active set.
|
|
free_bd = ((xl < 0.0) | (grad < 0.0)) & ((xu > 0.0) | (grad > 0.0))
|
|
|
|
# Set the initial iterate and the initial search direction.
|
|
step = np.zeros_like(grad)
|
|
sd = np.zeros_like(step)
|
|
sd[free_bd] = -grad[free_bd]
|
|
|
|
k = 0
|
|
reduct = 0.0
|
|
boundary_reached = False
|
|
while k < np.count_nonzero(free_bd):
|
|
# Stop the computations if sd is not a descent direction.
|
|
grad_sd = grad @ sd
|
|
if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
|
|
break
|
|
|
|
# Set alpha_tr to the step size for the trust-region constraint.
|
|
try:
|
|
alpha_tr = _alpha_tr(step, sd, delta)
|
|
except ZeroDivisionError:
|
|
break
|
|
|
|
# Stop the computations if a step along sd is expected to give a
|
|
# relatively small reduction in the objective function.
|
|
if -alpha_tr * grad_sd <= 1e-8 * reduct:
|
|
break
|
|
|
|
# Set alpha_quad to the step size for the minimization problem.
|
|
hess_sd = hess_prod(sd)
|
|
curv_sd = sd @ hess_sd
|
|
if curv_sd > TINY * abs(grad_sd):
|
|
alpha_quad = max(-grad_sd / curv_sd, 0.0)
|
|
else:
|
|
alpha_quad = np.inf
|
|
|
|
# Stop the computations if the reduction in the objective function
|
|
# provided by an unconstrained step is small.
|
|
alpha = min(alpha_tr, alpha_quad)
|
|
if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
|
|
break
|
|
|
|
# Set alpha_bd to the step size for the bound constraints.
|
|
i_xl = (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
|
|
i_xu = (xu < np.inf) & (sd > TINY * np.abs(xu - step))
|
|
all_alpha_xl = np.full_like(step, np.inf)
|
|
all_alpha_xu = np.full_like(step, np.inf)
|
|
all_alpha_xl[i_xl] = np.maximum(
|
|
(xl[i_xl] - step[i_xl]) / sd[i_xl],
|
|
0.0,
|
|
)
|
|
all_alpha_xu[i_xu] = np.maximum(
|
|
(xu[i_xu] - step[i_xu]) / sd[i_xu],
|
|
0.0,
|
|
)
|
|
alpha_xl = np.min(all_alpha_xl)
|
|
alpha_xu = np.min(all_alpha_xu)
|
|
alpha_bd = min(alpha_xl, alpha_xu)
|
|
|
|
# Update the iterate.
|
|
alpha = min(alpha, alpha_bd)
|
|
if alpha > 0.0:
|
|
step[free_bd] = np.clip(
|
|
step[free_bd] + alpha * sd[free_bd],
|
|
xl[free_bd],
|
|
xu[free_bd],
|
|
)
|
|
grad += alpha * hess_sd
|
|
reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
|
|
|
|
if alpha < min(alpha_tr, alpha_bd):
|
|
# The current iteration is a conjugate gradient iteration. Update
|
|
# the search direction so that it is conjugate (with respect to H)
|
|
# to all the previous search directions.
|
|
beta = (grad[free_bd] @ hess_sd[free_bd]) / curv_sd
|
|
sd[free_bd] = beta * sd[free_bd] - grad[free_bd]
|
|
sd[~free_bd] = 0.0
|
|
k += 1
|
|
elif alpha < alpha_tr:
|
|
# The iterate is restricted by a bound constraint. Add this bound
|
|
# constraint to the active set, and restart the calculations.
|
|
if alpha_xl <= alpha:
|
|
i_new = np.argmin(all_alpha_xl)
|
|
step[i_new] = xl[i_new]
|
|
else:
|
|
i_new = np.argmin(all_alpha_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_bd[i_new] = False
|
|
sd[free_bd] = -grad[free_bd]
|
|
sd[~free_bd] = 0.0
|
|
k = 0
|
|
else:
|
|
# The current iterate is on the trust-region boundary. Add all the
|
|
# active bounds to the working set to prepare for the improvement
|
|
# of the solution, and stop the iterations.
|
|
if alpha_xl <= alpha:
|
|
i_new = _argmin(all_alpha_xl)
|
|
step[i_new] = xl[i_new]
|
|
free_bd[i_new] = False
|
|
if alpha_xu <= alpha:
|
|
i_new = _argmin(all_alpha_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_bd[i_new] = False
|
|
boundary_reached = True
|
|
break
|
|
|
|
# Attempt to improve the solution on the trust-region boundary.
|
|
if kwargs.get("improve_tcg", True) and boundary_reached:
|
|
step_base = np.copy(step)
|
|
step_comparator = grad_orig @ step_base + 0.5 * step_base @ hess_prod(
|
|
step_base
|
|
)
|
|
|
|
while np.count_nonzero(free_bd) > 0:
|
|
# Check whether a substantial reduction in the objective function
|
|
# is possible, and set the search direction.
|
|
step_sq = step[free_bd] @ step[free_bd]
|
|
grad_sq = grad[free_bd] @ grad[free_bd]
|
|
grad_step = grad[free_bd] @ step[free_bd]
|
|
grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
|
|
sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
|
|
sd[~free_bd] = 0.0
|
|
if grad_sd >= -1e-8 * reduct or np.any(
|
|
grad_sd >= -TINY * np.abs(sd[free_bd])
|
|
):
|
|
break
|
|
sd[free_bd] /= -grad_sd
|
|
|
|
# Calculate an upper bound for the tangent of half the angle theta
|
|
# of this alternative iteration. The step will be updated as:
|
|
# step = cos(theta) * step + sin(theta) * sd.
|
|
temp_xl = np.zeros(n)
|
|
temp_xu = np.zeros(n)
|
|
temp_xl[free_bd] = (
|
|
step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
|
|
)
|
|
temp_xu[free_bd] = (
|
|
step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
|
|
)
|
|
temp_xl[temp_xl > 0.0] = (
|
|
np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
|
|
)
|
|
temp_xu[temp_xu > 0.0] = (
|
|
np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
|
|
)
|
|
dist_xl = np.maximum(step - xl, 0.0)
|
|
dist_xu = np.maximum(xu - step, 0.0)
|
|
i_xl = temp_xl > TINY * dist_xl
|
|
i_xu = temp_xu > TINY * dist_xu
|
|
all_t_xl = np.ones(n)
|
|
all_t_xu = np.ones(n)
|
|
all_t_xl[i_xl] = np.minimum(
|
|
all_t_xl[i_xl],
|
|
dist_xl[i_xl] / temp_xl[i_xl],
|
|
)
|
|
all_t_xu[i_xu] = np.minimum(
|
|
all_t_xu[i_xu],
|
|
dist_xu[i_xu] / temp_xu[i_xu],
|
|
)
|
|
t_xl = np.min(all_t_xl)
|
|
t_xu = np.min(all_t_xu)
|
|
t_bd = min(t_xl, t_xu)
|
|
|
|
# Calculate some curvature information.
|
|
hess_step = hess_prod(step)
|
|
hess_sd = hess_prod(sd)
|
|
curv_step = step @ hess_step
|
|
curv_sd = sd @ hess_sd
|
|
curv_step_sd = step @ hess_sd
|
|
|
|
# For a range of equally spaced values of tan(0.5 * theta),
|
|
# calculate the reduction in the objective function that would be
|
|
# obtained by accepting the corresponding angle.
|
|
n_samples = 20
|
|
n_samples = int((n_samples - 3) * t_bd + 3)
|
|
t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
|
|
sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
|
|
all_reduct = sin_values * (
|
|
grad_step * t_samples
|
|
- grad_sd
|
|
- t_samples * curv_step
|
|
+ sin_values
|
|
* (t_samples * curv_step_sd - 0.5 * (curv_sd - curv_step))
|
|
)
|
|
if np.all(all_reduct <= 0.0):
|
|
# No reduction in the objective function is obtained.
|
|
break
|
|
|
|
# Accept the angle that provides the largest reduction in the
|
|
# objective function, and update the iterate.
|
|
i_max = np.argmax(all_reduct)
|
|
cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
|
|
1.0 + t_samples[i_max] ** 2.0
|
|
)
|
|
step[free_bd] = (
|
|
cos_value * step[free_bd] + sin_values[i_max] * sd[free_bd]
|
|
)
|
|
grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
|
|
reduct += all_reduct[i_max]
|
|
|
|
# If the above angle is restricted by bound constraints, add them
|
|
# to the working set, and restart the alternative iteration.
|
|
# Otherwise, the calculations are terminated.
|
|
if t_bd < 1.0 and i_max == n_samples - 1:
|
|
if t_xl <= t_bd:
|
|
i_new = _argmin(all_t_xl)
|
|
step[i_new] = xl[i_new]
|
|
free_bd[i_new] = False
|
|
if t_xu <= t_bd:
|
|
i_new = _argmin(all_t_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_bd[i_new] = False
|
|
else:
|
|
break
|
|
|
|
# Ensure that the alternative iteration improves the objective
|
|
# function.
|
|
if grad_orig @ step + 0.5 * step @ hess_prod(step) > step_comparator:
|
|
step = step_base
|
|
|
|
if debug:
|
|
assert np.all(xl <= step)
|
|
assert np.all(step <= xu)
|
|
assert np.linalg.norm(step) < 1.1 * delta
|
|
return step
|
|
|
|
|
|
def constrained_tangential_byrd_omojokun(
|
|
grad,
|
|
hess_prod,
|
|
xl,
|
|
xu,
|
|
aub,
|
|
bub,
|
|
aeq,
|
|
delta,
|
|
debug,
|
|
**kwargs,
|
|
):
|
|
r"""
|
|
Minimize approximately a quadratic function subject to bound and linear
|
|
constraints in a trust region.
|
|
|
|
This function solves approximately
|
|
|
|
.. math::
|
|
|
|
\min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
|
|
s^{\mathsf{T}} H s \quad \text{s.t.} \quad
|
|
\left\{ \begin{array}{l}
|
|
l \le s \le u,\\
|
|
A_{\scriptscriptstyle I} s \le b_{\scriptscriptstyle I},\\
|
|
A_{\scriptscriptstyle E} s = 0,\\
|
|
\lVert s \rVert \le \Delta,
|
|
\end{array} \right.
|
|
|
|
using an active-set variation of the truncated conjugate gradient method.
|
|
|
|
Parameters
|
|
----------
|
|
grad : `numpy.ndarray`, shape (n,)
|
|
Gradient :math:`g` as shown above.
|
|
hess_prod : callable
|
|
Product of the Hessian matrix :math:`H` with any vector.
|
|
|
|
``hess_prod(s) -> `numpy.ndarray`, shape (n,)``
|
|
|
|
returns the product :math:`H s`.
|
|
xl : `numpy.ndarray`, shape (n,)
|
|
Lower bounds :math:`l` as shown above.
|
|
xu : `numpy.ndarray`, shape (n,)
|
|
Upper bounds :math:`u` as shown above.
|
|
aub : `numpy.ndarray`, shape (m_linear_ub, n)
|
|
Coefficient matrix :math:`A_{\scriptscriptstyle I}` as shown above.
|
|
bub : `numpy.ndarray`, shape (m_linear_ub,)
|
|
Right-hand side :math:`b_{\scriptscriptstyle I}` as shown above.
|
|
aeq : `numpy.ndarray`, shape (m_linear_eq, n)
|
|
Coefficient matrix :math:`A_{\scriptscriptstyle E}` as shown above.
|
|
delta : float
|
|
Trust-region radius :math:`\Delta` as shown above.
|
|
debug : bool
|
|
Whether to make debugging tests during the execution.
|
|
|
|
Returns
|
|
-------
|
|
`numpy.ndarray`, shape (n,)
|
|
Approximate solution :math:`s`.
|
|
|
|
Other Parameters
|
|
----------------
|
|
improve_tcg : bool, optional
|
|
If True, a solution generated by the truncated conjugate gradient
|
|
method that is on the boundary of the trust region is improved by
|
|
moving around the trust-region boundary on the two-dimensional space
|
|
spanned by the solution and the gradient of the quadratic function at
|
|
the solution (default is True).
|
|
|
|
Notes
|
|
-----
|
|
This function implements Algorithm 6.3 of [1]_. It is assumed that the
|
|
origin is feasible with respect to the bound and linear constraints, and
|
|
that `delta` is finite and positive.
|
|
|
|
References
|
|
----------
|
|
.. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
|
|
and Software*. PhD thesis, Department of Applied Mathematics, The Hong
|
|
Kong Polytechnic University, Hong Kong, China, 2022. URL:
|
|
https://theses.lib.polyu.edu.hk/handle/200/12294.
|
|
"""
|
|
if debug:
|
|
assert isinstance(grad, np.ndarray) and grad.ndim == 1
|
|
assert inspect.signature(hess_prod).bind(grad)
|
|
assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
|
|
assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
|
|
assert (
|
|
isinstance(aub, np.ndarray)
|
|
and aub.ndim == 2
|
|
and aub.shape[1] == grad.size
|
|
)
|
|
assert (
|
|
isinstance(bub, np.ndarray)
|
|
and bub.ndim == 1
|
|
and bub.size == aub.shape[0]
|
|
)
|
|
assert (
|
|
isinstance(aeq, np.ndarray)
|
|
and aeq.ndim == 2
|
|
and aeq.shape[1] == grad.size
|
|
)
|
|
assert isinstance(delta, float)
|
|
assert isinstance(debug, bool)
|
|
tol = get_arrays_tol(xl, xu)
|
|
assert np.all(xl <= tol)
|
|
assert np.all(xu >= -tol)
|
|
assert np.all(bub >= -tol)
|
|
assert np.isfinite(delta) and delta > 0.0
|
|
xl = np.minimum(xl, 0.0)
|
|
xu = np.maximum(xu, 0.0)
|
|
bub = np.maximum(bub, 0.0)
|
|
|
|
# Copy the arrays that may be modified by the code below.
|
|
n = grad.size
|
|
grad = np.copy(grad)
|
|
grad_orig = np.copy(grad)
|
|
|
|
# Calculate the initial active set.
|
|
free_xl = (xl < 0.0) | (grad < 0.0)
|
|
free_xu = (xu > 0.0) | (grad > 0.0)
|
|
free_ub = (bub > 0.0) | (aub @ grad > 0.0)
|
|
n_act, q = qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub)
|
|
|
|
# Set the initial iterate and the initial search direction.
|
|
step = np.zeros_like(grad)
|
|
sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
|
|
resid = np.copy(bub)
|
|
|
|
k = 0
|
|
reduct = 0.0
|
|
boundary_reached = False
|
|
while k < n - n_act:
|
|
# Stop the computations if sd is not a descent direction.
|
|
grad_sd = grad @ sd
|
|
if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
|
|
break
|
|
|
|
# Set alpha_tr to the step size for the trust-region constraint.
|
|
try:
|
|
alpha_tr = _alpha_tr(step, sd, delta)
|
|
except ZeroDivisionError:
|
|
break
|
|
|
|
# Stop the computations if a step along sd is expected to give a
|
|
# relatively small reduction in the objective function.
|
|
if -alpha_tr * grad_sd <= 1e-8 * reduct:
|
|
break
|
|
|
|
# Set alpha_quad to the step size for the minimization problem.
|
|
hess_sd = hess_prod(sd)
|
|
curv_sd = sd @ hess_sd
|
|
if curv_sd > TINY * abs(grad_sd):
|
|
alpha_quad = max(-grad_sd / curv_sd, 0.0)
|
|
else:
|
|
alpha_quad = np.inf
|
|
|
|
# Stop the computations if the reduction in the objective function
|
|
# provided by an unconstrained step is small.
|
|
alpha = min(alpha_tr, alpha_quad)
|
|
if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
|
|
break
|
|
|
|
# Set alpha_bd to the step size for the bound constraints.
|
|
i_xl = free_xl & (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
|
|
i_xu = free_xu & (xu < np.inf) & (sd > TINY * np.abs(xu - step))
|
|
all_alpha_xl = np.full_like(step, np.inf)
|
|
all_alpha_xu = np.full_like(step, np.inf)
|
|
all_alpha_xl[i_xl] = np.maximum(
|
|
(xl[i_xl] - step[i_xl]) / sd[i_xl],
|
|
0.0,
|
|
)
|
|
all_alpha_xu[i_xu] = np.maximum(
|
|
(xu[i_xu] - step[i_xu]) / sd[i_xu],
|
|
0.0,
|
|
)
|
|
alpha_xl = np.min(all_alpha_xl)
|
|
alpha_xu = np.min(all_alpha_xu)
|
|
alpha_bd = min(alpha_xl, alpha_xu)
|
|
|
|
# Set alpha_ub to the step size for the linear constraints.
|
|
aub_sd = aub @ sd
|
|
i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
|
|
all_alpha_ub = np.full_like(bub, np.inf)
|
|
all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
|
|
alpha_ub = np.min(all_alpha_ub, initial=np.inf)
|
|
|
|
# Update the iterate.
|
|
alpha = min(alpha, alpha_bd, alpha_ub)
|
|
if alpha > 0.0:
|
|
step = np.clip(step + alpha * sd, xl, xu)
|
|
grad += alpha * hess_sd
|
|
resid = np.maximum(0.0, resid - alpha * aub_sd)
|
|
reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
|
|
|
|
if alpha < min(alpha_tr, alpha_bd, alpha_ub):
|
|
# The current iteration is a conjugate gradient iteration. Update
|
|
# the search direction so that it is conjugate (with respect to H)
|
|
# to all the previous search directions.
|
|
grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
|
|
beta = (grad_proj @ hess_sd) / curv_sd
|
|
sd = beta * sd - grad_proj
|
|
k += 1
|
|
elif alpha < alpha_tr:
|
|
# The iterate is restricted by a bound/linear constraint. Add this
|
|
# constraint to the active set, and restart the calculations.
|
|
if alpha_xl <= alpha:
|
|
i_new = np.argmin(all_alpha_xl)
|
|
step[i_new] = xl[i_new]
|
|
free_xl[i_new] = False
|
|
elif alpha_xu <= alpha:
|
|
i_new = np.argmin(all_alpha_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_xu[i_new] = False
|
|
else:
|
|
i_new = np.argmin(all_alpha_ub)
|
|
free_ub[i_new] = False
|
|
n_act, q = qr_tangential_byrd_omojokun(
|
|
aub,
|
|
aeq,
|
|
free_xl,
|
|
free_xu,
|
|
free_ub,
|
|
)
|
|
sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
|
|
k = 0
|
|
else:
|
|
# The current iterate is on the trust-region boundary. Add all the
|
|
# active bound/linear constraints to the working set to prepare for
|
|
# the improvement of the solution, and stop the iterations.
|
|
if alpha_xl <= alpha:
|
|
i_new = _argmin(all_alpha_xl)
|
|
step[i_new] = xl[i_new]
|
|
free_xl[i_new] = False
|
|
if alpha_xu <= alpha:
|
|
i_new = _argmin(all_alpha_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_xu[i_new] = False
|
|
if alpha_ub <= alpha:
|
|
i_new = _argmin(all_alpha_ub)
|
|
free_ub[i_new] = False
|
|
n_act, q = qr_tangential_byrd_omojokun(
|
|
aub,
|
|
aeq,
|
|
free_xl,
|
|
free_xu,
|
|
free_ub,
|
|
)
|
|
boundary_reached = True
|
|
break
|
|
|
|
# Attempt to improve the solution on the trust-region boundary.
|
|
if kwargs.get("improve_tcg", True) and boundary_reached and n_act < n:
|
|
step_base = np.copy(step)
|
|
while n_act < n:
|
|
# Check whether a substantial reduction in the objective function
|
|
# is possible, and set the search direction.
|
|
step_proj = q[:, n_act:] @ (q[:, n_act:].T @ step)
|
|
grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
|
|
step_sq = step_proj @ step_proj
|
|
grad_sq = grad_proj @ grad_proj
|
|
grad_step = grad_proj @ step_proj
|
|
grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
|
|
sd = q[:, n_act:] @ (
|
|
q[:, n_act:].T @ (grad_step * step - step_sq * grad)
|
|
)
|
|
if grad_sd >= -1e-8 * reduct or np.any(
|
|
grad_sd >= -TINY * np.abs(sd)
|
|
):
|
|
break
|
|
sd /= -grad_sd
|
|
|
|
# Calculate an upper bound for the tangent of half the angle theta
|
|
# of this alternative iteration for the bound constraints. The step
|
|
# will be updated as:
|
|
# step += (cos(theta) - 1) * step_proj + sin(theta) * sd.
|
|
temp_xl = np.zeros(n)
|
|
temp_xu = np.zeros(n)
|
|
dist_xl = np.maximum(step - xl, 0.0)
|
|
dist_xu = np.maximum(xu - step, 0.0)
|
|
temp_xl[free_xl] = sd[free_xl] ** 2.0 - dist_xl[free_xl] * (
|
|
dist_xl[free_xl] - 2.0 * step_proj[free_xl]
|
|
)
|
|
temp_xu[free_xu] = sd[free_xu] ** 2.0 - dist_xu[free_xu] * (
|
|
dist_xu[free_xu] + 2.0 * step_proj[free_xu]
|
|
)
|
|
temp_xl[temp_xl > 0.0] = (
|
|
np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
|
|
)
|
|
temp_xu[temp_xu > 0.0] = (
|
|
np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
|
|
)
|
|
i_xl = temp_xl > TINY * dist_xl
|
|
i_xu = temp_xu > TINY * dist_xu
|
|
all_t_xl = np.ones(n)
|
|
all_t_xu = np.ones(n)
|
|
all_t_xl[i_xl] = np.minimum(
|
|
all_t_xl[i_xl],
|
|
dist_xl[i_xl] / temp_xl[i_xl],
|
|
)
|
|
all_t_xu[i_xu] = np.minimum(
|
|
all_t_xu[i_xu],
|
|
dist_xu[i_xu] / temp_xu[i_xu],
|
|
)
|
|
t_xl = np.min(all_t_xl)
|
|
t_xu = np.min(all_t_xu)
|
|
t_bd = min(t_xl, t_xu)
|
|
|
|
# Calculate an upper bound for the tangent of half the angle theta
|
|
# of this alternative iteration for the linear constraints.
|
|
temp_ub = np.zeros_like(resid)
|
|
aub_step = aub @ step_proj
|
|
aub_sd = aub @ sd
|
|
temp_ub[free_ub] = aub_sd[free_ub] ** 2.0 - resid[free_ub] * (
|
|
resid[free_ub] + 2.0 * aub_step[free_ub]
|
|
)
|
|
temp_ub[temp_ub > 0.0] = (
|
|
np.sqrt(temp_ub[temp_ub > 0.0]) + aub_sd[temp_ub > 0.0]
|
|
)
|
|
i_ub = temp_ub > TINY * resid
|
|
all_t_ub = np.ones_like(resid)
|
|
all_t_ub[i_ub] = np.minimum(
|
|
all_t_ub[i_ub],
|
|
resid[i_ub] / temp_ub[i_ub],
|
|
)
|
|
t_ub = np.min(all_t_ub, initial=1.0)
|
|
t_min = min(t_bd, t_ub)
|
|
|
|
# Calculate some curvature information.
|
|
hess_step = hess_prod(step_proj)
|
|
hess_sd = hess_prod(sd)
|
|
curv_step = step_proj @ hess_step
|
|
curv_sd = sd @ hess_sd
|
|
curv_step_sd = step_proj @ hess_sd
|
|
|
|
# For a range of equally spaced values of tan(0.5 * theta),
|
|
# calculate the reduction in the objective function that would be
|
|
# obtained by accepting the corresponding angle.
|
|
n_samples = 20
|
|
n_samples = int((n_samples - 3) * t_min + 3)
|
|
t_samples = np.linspace(t_min / n_samples, t_min, n_samples)
|
|
sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
|
|
all_reduct = sin_values * (
|
|
grad_step * t_samples
|
|
- grad_sd
|
|
- sin_values
|
|
* (
|
|
0.5 * t_samples**2.0 * curv_step
|
|
- 2.0 * t_samples * curv_step_sd
|
|
+ 0.5 * curv_sd
|
|
)
|
|
)
|
|
if np.all(all_reduct <= 0.0):
|
|
# No reduction in the objective function is obtained.
|
|
break
|
|
|
|
# Accept the angle that provides the largest reduction in the
|
|
# objective function, and update the iterate.
|
|
i_max = np.argmax(all_reduct)
|
|
cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
|
|
1.0 + t_samples[i_max] ** 2.0
|
|
)
|
|
step = np.clip(
|
|
step + (cos_value - 1.0) * step_proj + sin_values[i_max] * sd,
|
|
xl,
|
|
xu,
|
|
)
|
|
grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
|
|
resid = np.maximum(
|
|
0.0,
|
|
resid
|
|
- (cos_value - 1.0) * aub_step
|
|
- sin_values[i_max] * aub_sd,
|
|
)
|
|
reduct += all_reduct[i_max]
|
|
|
|
# If the above angle is restricted by bound constraints, add them
|
|
# to the working set, and restart the alternative iteration.
|
|
# Otherwise, the calculations are terminated.
|
|
if t_min < 1.0 and i_max == n_samples - 1:
|
|
if t_xl <= t_min:
|
|
i_new = _argmin(all_t_xl)
|
|
step[i_new] = xl[i_new]
|
|
free_xl[i_new] = False
|
|
if t_xu <= t_min:
|
|
i_new = _argmin(all_t_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_xl[i_new] = False
|
|
if t_ub <= t_min:
|
|
i_new = _argmin(all_t_ub)
|
|
free_ub[i_new] = False
|
|
n_act, q = qr_tangential_byrd_omojokun(
|
|
aub,
|
|
aeq,
|
|
free_xl,
|
|
free_xu,
|
|
free_ub,
|
|
)
|
|
else:
|
|
break
|
|
|
|
# Ensure that the alternative iteration improves the objective
|
|
# function.
|
|
if grad_orig @ step + 0.5 * step @ hess_prod(
|
|
step
|
|
) > grad_orig @ step_base + 0.5 * step_base @ hess_prod(step_base):
|
|
step = step_base
|
|
|
|
if debug:
|
|
tol = get_arrays_tol(xl, xu)
|
|
assert np.all(xl <= step)
|
|
assert np.all(step <= xu)
|
|
assert np.all(aub @ step <= bub + tol)
|
|
assert np.all(np.abs(aeq @ step) <= tol)
|
|
assert np.linalg.norm(step) < 1.1 * delta
|
|
return step
|
|
|
|
|
|
def normal_byrd_omojokun(aub, bub, aeq, beq, xl, xu, delta, debug, **kwargs):
|
|
r"""
|
|
Minimize approximately a linear constraint violation subject to bound
|
|
constraints in a trust region.
|
|
|
|
This function solves approximately
|
|
|
|
.. math::
|
|
|
|
\min_{s \in \mathbb{R}^n} \quad \frac{1}{2} \big( \lVert \max \{
|
|
A_{\scriptscriptstyle I} s - b_{\scriptscriptstyle I}, 0 \} \rVert^2 +
|
|
\lVert A_{\scriptscriptstyle E} s - b_{\scriptscriptstyle E} \rVert^2
|
|
\big) \quad \text{s.t.}
|
|
\quad
|
|
\left\{ \begin{array}{l}
|
|
l \le s \le u,\\
|
|
\lVert s \rVert \le \Delta,
|
|
\end{array} \right.
|
|
|
|
using a variation of the truncated conjugate gradient method.
|
|
|
|
Parameters
|
|
----------
|
|
aub : `numpy.ndarray`, shape (m_linear_ub, n)
|
|
Matrix :math:`A_{\scriptscriptstyle I}` as shown above.
|
|
bub : `numpy.ndarray`, shape (m_linear_ub,)
|
|
Vector :math:`b_{\scriptscriptstyle I}` as shown above.
|
|
aeq : `numpy.ndarray`, shape (m_linear_eq, n)
|
|
Matrix :math:`A_{\scriptscriptstyle E}` as shown above.
|
|
beq : `numpy.ndarray`, shape (m_linear_eq,)
|
|
Vector :math:`b_{\scriptscriptstyle E}` as shown above.
|
|
xl : `numpy.ndarray`, shape (n,)
|
|
Lower bounds :math:`l` as shown above.
|
|
xu : `numpy.ndarray`, shape (n,)
|
|
Upper bounds :math:`u` as shown above.
|
|
delta : float
|
|
Trust-region radius :math:`\Delta` as shown above.
|
|
debug : bool
|
|
Whether to make debugging tests during the execution.
|
|
|
|
Returns
|
|
-------
|
|
`numpy.ndarray`, shape (n,)
|
|
Approximate solution :math:`s`.
|
|
|
|
Other Parameters
|
|
----------------
|
|
improve_tcg : bool, optional
|
|
If True, a solution generated by the truncated conjugate gradient
|
|
method that is on the boundary of the trust region is improved by
|
|
moving around the trust-region boundary on the two-dimensional space
|
|
spanned by the solution and the gradient of the quadratic function at
|
|
the solution (default is True).
|
|
|
|
Notes
|
|
-----
|
|
This function implements Algorithm 6.4 of [1]_. It is assumed that the
|
|
origin is feasible with respect to the bound constraints and that `delta`
|
|
is finite and positive.
|
|
|
|
References
|
|
----------
|
|
.. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
|
|
and Software*. PhD thesis, Department of Applied Mathematics, The Hong
|
|
Kong Polytechnic University, Hong Kong, China, 2022. URL:
|
|
https://theses.lib.polyu.edu.hk/handle/200/12294.
|
|
"""
|
|
if debug:
|
|
assert isinstance(aub, np.ndarray) and aub.ndim == 2
|
|
assert (
|
|
isinstance(bub, np.ndarray)
|
|
and bub.ndim == 1
|
|
and bub.size == aub.shape[0]
|
|
)
|
|
assert (
|
|
isinstance(aeq, np.ndarray)
|
|
and aeq.ndim == 2
|
|
and aeq.shape[1] == aub.shape[1]
|
|
)
|
|
assert (
|
|
isinstance(beq, np.ndarray)
|
|
and beq.ndim == 1
|
|
and beq.size == aeq.shape[0]
|
|
)
|
|
assert isinstance(xl, np.ndarray) and xl.shape == (aub.shape[1],)
|
|
assert isinstance(xu, np.ndarray) and xu.shape == (aub.shape[1],)
|
|
assert isinstance(delta, float)
|
|
assert isinstance(debug, bool)
|
|
tol = get_arrays_tol(xl, xu)
|
|
assert np.all(xl <= tol)
|
|
assert np.all(xu >= -tol)
|
|
assert np.isfinite(delta) and delta > 0.0
|
|
xl = np.minimum(xl, 0.0)
|
|
xu = np.maximum(xu, 0.0)
|
|
|
|
# Calculate the initial active set.
|
|
m_linear_ub, n = aub.shape
|
|
grad = np.r_[aeq.T @ -beq, np.maximum(0.0, -bub)]
|
|
free_xl = (xl < 0.0) | (grad[:n] < 0.0)
|
|
free_xu = (xu > 0.0) | (grad[:n] > 0.0)
|
|
free_slack = bub < 0.0
|
|
free_ub = (bub > 0.0) | (aub @ grad[:n] - grad[n:] > 0.0)
|
|
n_act, q = qr_normal_byrd_omojokun(
|
|
aub,
|
|
free_xl,
|
|
free_xu,
|
|
free_slack,
|
|
free_ub,
|
|
)
|
|
|
|
# Calculate an upper bound on the norm of the slack variables. It is not
|
|
# used in the original algorithm, but it may prevent undesired behaviors
|
|
# engendered by computer rounding errors.
|
|
delta_slack = np.sqrt(beq @ beq + grad[n:] @ grad[n:])
|
|
|
|
# Set the initial iterate and the initial search direction.
|
|
step = np.zeros(n)
|
|
sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
|
|
resid = bub + grad[n:]
|
|
|
|
k = 0
|
|
reduct = 0.0
|
|
boundary_reached = False
|
|
while k < n + m_linear_ub - n_act:
|
|
# Stop the computations if sd is not a descent direction.
|
|
grad_sd = grad @ sd
|
|
if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
|
|
break
|
|
|
|
# Set alpha_tr to the step size for the trust-region constraint.
|
|
try:
|
|
alpha_tr = _alpha_tr(step, sd[:n], delta)
|
|
except ZeroDivisionError:
|
|
alpha_tr = np.inf
|
|
|
|
# Prevent undesired behaviors engendered by computer rounding errors by
|
|
# considering the trust-region constraint on the slack variables.
|
|
try:
|
|
alpha_tr = min(alpha_tr, _alpha_tr(grad[n:], sd[n:], delta_slack))
|
|
except ZeroDivisionError:
|
|
pass
|
|
|
|
# Stop the computations if a step along sd is expected to give a
|
|
# relatively small reduction in the objective function.
|
|
if -alpha_tr * grad_sd <= 1e-8 * reduct:
|
|
break
|
|
|
|
# Set alpha_quad to the step size for the minimization problem.
|
|
hess_sd = np.r_[aeq.T @ (aeq @ sd[:n]), sd[n:]]
|
|
curv_sd = sd @ hess_sd
|
|
if curv_sd > TINY * abs(grad_sd):
|
|
alpha_quad = max(-grad_sd / curv_sd, 0.0)
|
|
else:
|
|
alpha_quad = np.inf
|
|
|
|
# Stop the computations if the reduction in the objective function
|
|
# provided by an unconstrained step is small.
|
|
alpha = min(alpha_tr, alpha_quad)
|
|
if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
|
|
break
|
|
|
|
# Set alpha_bd to the step size for the bound constraints.
|
|
i_xl = free_xl & (xl > -np.inf) & (sd[:n] < -TINY * np.abs(xl - step))
|
|
i_xu = free_xu & (xu < np.inf) & (sd[:n] > TINY * np.abs(xu - step))
|
|
i_slack = free_slack & (sd[n:] < -TINY * np.abs(grad[n:]))
|
|
all_alpha_xl = np.full_like(step, np.inf)
|
|
all_alpha_xu = np.full_like(step, np.inf)
|
|
all_alpha_slack = np.full_like(bub, np.inf)
|
|
all_alpha_xl[i_xl] = np.maximum(
|
|
(xl[i_xl] - step[i_xl]) / sd[:n][i_xl],
|
|
0.0,
|
|
)
|
|
all_alpha_xu[i_xu] = np.maximum(
|
|
(xu[i_xu] - step[i_xu]) / sd[:n][i_xu],
|
|
0.0,
|
|
)
|
|
all_alpha_slack[i_slack] = np.maximum(
|
|
-grad[n:][i_slack] / sd[n:][i_slack],
|
|
0.0,
|
|
)
|
|
alpha_xl = np.min(all_alpha_xl)
|
|
alpha_xu = np.min(all_alpha_xu)
|
|
alpha_slack = np.min(all_alpha_slack, initial=np.inf)
|
|
alpha_bd = min(alpha_xl, alpha_xu, alpha_slack)
|
|
|
|
# Set alpha_ub to the step size for the linear constraints.
|
|
aub_sd = aub @ sd[:n] - sd[n:]
|
|
i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
|
|
all_alpha_ub = np.full_like(bub, np.inf)
|
|
all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
|
|
alpha_ub = np.min(all_alpha_ub, initial=np.inf)
|
|
|
|
# Update the iterate.
|
|
alpha = min(alpha, alpha_bd, alpha_ub)
|
|
if alpha > 0.0:
|
|
step = np.clip(step + alpha * sd[:n], xl, xu)
|
|
grad += alpha * hess_sd
|
|
resid = np.maximum(0.0, resid - alpha * aub_sd)
|
|
reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)
|
|
|
|
if alpha < min(alpha_tr, alpha_bd, alpha_ub):
|
|
# The current iteration is a conjugate gradient iteration. Update
|
|
# the search direction so that it is conjugate (with respect to H)
|
|
# to all the previous search directions.
|
|
grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
|
|
beta = (grad_proj @ hess_sd) / curv_sd
|
|
sd = beta * sd - grad_proj
|
|
k += 1
|
|
elif alpha < alpha_tr:
|
|
# The iterate is restricted by a bound/linear constraint. Add this
|
|
# constraint to the active set, and restart the calculations.
|
|
if alpha_xl <= alpha:
|
|
i_new = np.argmin(all_alpha_xl)
|
|
step[i_new] = xl[i_new]
|
|
free_xl[i_new] = False
|
|
elif alpha_xu <= alpha:
|
|
i_new = np.argmin(all_alpha_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_xu[i_new] = False
|
|
elif alpha_slack <= alpha:
|
|
i_new = np.argmin(all_alpha_slack)
|
|
free_slack[i_new] = False
|
|
else:
|
|
i_new = np.argmin(all_alpha_ub)
|
|
free_ub[i_new] = False
|
|
n_act, q = qr_normal_byrd_omojokun(
|
|
aub, free_xl, free_xu, free_slack, free_ub
|
|
)
|
|
sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
|
|
k = 0
|
|
else:
|
|
# The current iterate is on the trust-region boundary. Add all the
|
|
# active bound constraints to the working set to prepare for the
|
|
# improvement of the solution, and stop the iterations.
|
|
if alpha_xl <= alpha:
|
|
i_new = _argmin(all_alpha_xl)
|
|
step[i_new] = xl[i_new]
|
|
free_xl[i_new] = False
|
|
if alpha_xu <= alpha:
|
|
i_new = _argmin(all_alpha_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_xu[i_new] = False
|
|
boundary_reached = True
|
|
break
|
|
|
|
# Attempt to improve the solution on the trust-region boundary.
|
|
if kwargs.get("improve_tcg", True) and boundary_reached:
|
|
step_base = np.copy(step)
|
|
free_bd = free_xl & free_xu
|
|
grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
|
|
aeq @ step - beq
|
|
)
|
|
sd = np.zeros(n)
|
|
while np.count_nonzero(free_bd) > 0:
|
|
# Check whether a substantial reduction in the objective function
|
|
# is possible, and set the search direction.
|
|
step_sq = step[free_bd] @ step[free_bd]
|
|
grad_sq = grad[free_bd] @ grad[free_bd]
|
|
grad_step = grad[free_bd] @ step[free_bd]
|
|
grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
|
|
sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
|
|
sd[~free_bd] = 0.0
|
|
if grad_sd >= -1e-8 * reduct or np.any(
|
|
grad_sd >= -TINY * np.abs(sd[free_bd])
|
|
):
|
|
break
|
|
sd[free_bd] /= -grad_sd
|
|
|
|
# Calculate an upper bound for the tangent of half the angle theta
|
|
# of this alternative iteration. The step will be updated as:
|
|
# step = cos(theta) * step + sin(theta) * sd.
|
|
temp_xl = np.zeros(n)
|
|
temp_xu = np.zeros(n)
|
|
temp_xl[free_bd] = (
|
|
step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
|
|
)
|
|
temp_xu[free_bd] = (
|
|
step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
|
|
)
|
|
temp_xl[temp_xl > 0.0] = (
|
|
np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
|
|
)
|
|
temp_xu[temp_xu > 0.0] = (
|
|
np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
|
|
)
|
|
dist_xl = np.maximum(step - xl, 0.0)
|
|
dist_xu = np.maximum(xu - step, 0.0)
|
|
i_xl = temp_xl > TINY * dist_xl
|
|
i_xu = temp_xu > TINY * dist_xu
|
|
all_t_xl = np.ones(n)
|
|
all_t_xu = np.ones(n)
|
|
all_t_xl[i_xl] = np.minimum(
|
|
all_t_xl[i_xl],
|
|
dist_xl[i_xl] / temp_xl[i_xl],
|
|
)
|
|
all_t_xu[i_xu] = np.minimum(
|
|
all_t_xu[i_xu],
|
|
dist_xu[i_xu] / temp_xu[i_xu],
|
|
)
|
|
t_xl = np.min(all_t_xl)
|
|
t_xu = np.min(all_t_xu)
|
|
t_bd = min(t_xl, t_xu)
|
|
|
|
# For a range of equally spaced values of tan(0.5 * theta),
|
|
# calculate the reduction in the objective function that would be
|
|
# obtained by accepting the corresponding angle.
|
|
n_samples = 20
|
|
n_samples = int((n_samples - 3) * t_bd + 3)
|
|
t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
|
|
resid_ub = np.maximum(aub @ step - bub, 0.0)
|
|
resid_eq = aeq @ step - beq
|
|
step_proj = np.copy(step)
|
|
step_proj[~free_bd] = 0.0
|
|
all_reduct = np.empty(n_samples)
|
|
for i in range(n_samples):
|
|
sin_value = 2.0 * t_samples[i] / (1.0 + t_samples[i] ** 2.0)
|
|
step_alt = np.clip(
|
|
step + sin_value * (sd - t_samples[i] * step_proj),
|
|
xl,
|
|
xu,
|
|
)
|
|
resid_ub_alt = np.maximum(aub @ step_alt - bub, 0.0)
|
|
resid_eq_alt = aeq @ step_alt - beq
|
|
all_reduct[i] = 0.5 * (
|
|
resid_ub @ resid_ub
|
|
+ resid_eq @ resid_eq
|
|
- resid_ub_alt @ resid_ub_alt
|
|
- resid_eq_alt @ resid_eq_alt
|
|
)
|
|
if np.all(all_reduct <= 0.0):
|
|
# No reduction in the objective function is obtained.
|
|
break
|
|
|
|
# Accept the angle that provides the largest reduction in the
|
|
# objective function, and update the iterate.
|
|
i_max = np.argmax(all_reduct)
|
|
cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
|
|
1.0 + t_samples[i_max] ** 2.0
|
|
)
|
|
sin_value = (2.0 * t_samples[i_max]
|
|
/ (1.0 + t_samples[i_max] ** 2.0))
|
|
step[free_bd] = cos_value * step[free_bd] + sin_value * sd[free_bd]
|
|
grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
|
|
aeq @ step - beq
|
|
)
|
|
reduct += all_reduct[i_max]
|
|
|
|
# If the above angle is restricted by bound constraints, add them
|
|
# to the working set, and restart the alternative iteration.
|
|
# Otherwise, the calculations are terminated.
|
|
if t_bd < 1.0 and i_max == n_samples - 1:
|
|
if t_xl <= t_bd:
|
|
i_new = _argmin(all_t_xl)
|
|
step[i_new] = xl[i_new]
|
|
free_bd[i_new] = False
|
|
if t_xu <= t_bd:
|
|
i_new = _argmin(all_t_xu)
|
|
step[i_new] = xu[i_new]
|
|
free_bd[i_new] = False
|
|
else:
|
|
break
|
|
|
|
# Ensure that the alternative iteration improves the objective
|
|
# function.
|
|
resid_ub = np.maximum(aub @ step - bub, 0.0)
|
|
resid_ub_base = np.maximum(aub @ step_base - bub, 0.0)
|
|
resid_eq = aeq @ step - beq
|
|
resid_eq_base = aeq @ step_base - beq
|
|
if (
|
|
resid_ub @ resid_ub + resid_eq @ resid_eq
|
|
> resid_ub_base @ resid_ub_base + resid_eq_base @ resid_eq_base
|
|
):
|
|
step = step_base
|
|
|
|
if debug:
|
|
assert np.all(xl <= step)
|
|
assert np.all(step <= xu)
|
|
assert np.linalg.norm(step) < 1.1 * delta
|
|
return step
|
|
|
|
|
|
def qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub):
|
|
n = free_xl.size
|
|
identity = np.eye(n)
|
|
q, r, _ = qr(
|
|
np.block(
|
|
[
|
|
[aeq],
|
|
[aub[~free_ub, :]],
|
|
[-identity[~free_xl, :]],
|
|
[identity[~free_xu, :]],
|
|
]
|
|
).T,
|
|
pivoting=True,
|
|
)
|
|
n_act = np.count_nonzero(
|
|
np.abs(np.diag(r))
|
|
>= 10.0
|
|
* EPS
|
|
* n
|
|
* np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
|
|
)
|
|
return n_act, q
|
|
|
|
|
|
def qr_normal_byrd_omojokun(aub, free_xl, free_xu, free_slack, free_ub):
|
|
m_linear_ub, n = aub.shape
|
|
identity_n = np.eye(n)
|
|
identity_m = np.eye(m_linear_ub)
|
|
q, r, _ = qr(
|
|
np.block(
|
|
[
|
|
[
|
|
aub[~free_ub, :],
|
|
-identity_m[~free_ub, :],
|
|
],
|
|
[
|
|
np.zeros((m_linear_ub - np.count_nonzero(free_slack), n)),
|
|
-identity_m[~free_slack, :],
|
|
],
|
|
[
|
|
-identity_n[~free_xl, :],
|
|
np.zeros((n - np.count_nonzero(free_xl), m_linear_ub)),
|
|
],
|
|
[
|
|
identity_n[~free_xu, :],
|
|
np.zeros((n - np.count_nonzero(free_xu), m_linear_ub)),
|
|
],
|
|
]
|
|
).T,
|
|
pivoting=True,
|
|
)
|
|
n_act = np.count_nonzero(
|
|
np.abs(np.diag(r))
|
|
>= 10.0
|
|
* EPS
|
|
* (n + m_linear_ub)
|
|
* np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
|
|
)
|
|
return n_act, q
|
|
|
|
|
|
def _alpha_tr(step, sd, delta):
|
|
step_sd = step @ sd
|
|
sd_sq = sd @ sd
|
|
dist_tr_sq = delta**2.0 - step @ step
|
|
temp = np.sqrt(max(step_sd**2.0 + sd_sq * dist_tr_sq, 0.0))
|
|
if step_sd <= 0.0 and sd_sq > TINY * abs(temp - step_sd):
|
|
alpha_tr = max((temp - step_sd) / sd_sq, 0.0)
|
|
elif abs(temp + step_sd) > TINY * dist_tr_sq:
|
|
alpha_tr = max(dist_tr_sq / (temp + step_sd), 0.0)
|
|
else:
|
|
raise ZeroDivisionError
|
|
return alpha_tr
|
|
|
|
|
|
def _argmax(x):
|
|
return np.flatnonzero(x >= np.max(x))
|
|
|
|
|
|
def _argmin(x):
|
|
return np.flatnonzero(x <= np.min(x))
|