import inspect import numpy as np from scipy.linalg import qr from ..utils import get_arrays_tol TINY = np.finfo(float).tiny EPS = np.finfo(float).eps def tangential_byrd_omojokun(grad, hess_prod, xl, xu, delta, debug, **kwargs): r""" Minimize approximately a quadratic function subject to bound constraints in a trust region. This function solves approximately .. math:: \min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2} s^{\mathsf{T}} H s \quad \text{s.t.} \quad \left\{ \begin{array}{l} l \le s \le u\\ \lVert s \rVert \le \Delta, \end{array} \right. using an active-set variation of the truncated conjugate gradient method. Parameters ---------- grad : `numpy.ndarray`, shape (n,) Gradient :math:`g` as shown above. hess_prod : callable Product of the Hessian matrix :math:`H` with any vector. ``hess_prod(s) -> `numpy.ndarray`, shape (n,)`` returns the product :math:`H s`. xl : `numpy.ndarray`, shape (n,) Lower bounds :math:`l` as shown above. xu : `numpy.ndarray`, shape (n,) Upper bounds :math:`u` as shown above. delta : float Trust-region radius :math:`\Delta` as shown above. debug : bool Whether to make debugging tests during the execution. Returns ------- `numpy.ndarray`, shape (n,) Approximate solution :math:`s`. Other Parameters ---------------- improve_tcg : bool, optional If True, a solution generated by the truncated conjugate gradient method that is on the boundary of the trust region is improved by moving around the trust-region boundary on the two-dimensional space spanned by the solution and the gradient of the quadratic function at the solution (default is True). Notes ----- This function implements Algorithm 6.2 of [1]_. It is assumed that the origin is feasible with respect to the bound constraints and that `delta` is finite and positive. References ---------- .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods and Software*. PhD thesis, Department of Applied Mathematics, The Hong Kong Polytechnic University, Hong Kong, China, 2022. URL: https://theses.lib.polyu.edu.hk/handle/200/12294. """ if debug: assert isinstance(grad, np.ndarray) and grad.ndim == 1 assert inspect.signature(hess_prod).bind(grad) assert isinstance(xl, np.ndarray) and xl.shape == grad.shape assert isinstance(xu, np.ndarray) and xu.shape == grad.shape assert isinstance(delta, float) assert isinstance(debug, bool) tol = get_arrays_tol(xl, xu) assert np.all(xl <= tol) assert np.all(xu >= -tol) assert np.isfinite(delta) and delta > 0.0 xl = np.minimum(xl, 0.0) xu = np.maximum(xu, 0.0) # Copy the arrays that may be modified by the code below. n = grad.size grad = np.copy(grad) grad_orig = np.copy(grad) # Calculate the initial active set. free_bd = ((xl < 0.0) | (grad < 0.0)) & ((xu > 0.0) | (grad > 0.0)) # Set the initial iterate and the initial search direction. step = np.zeros_like(grad) sd = np.zeros_like(step) sd[free_bd] = -grad[free_bd] k = 0 reduct = 0.0 boundary_reached = False while k < np.count_nonzero(free_bd): # Stop the computations if sd is not a descent direction. grad_sd = grad @ sd if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)): break # Set alpha_tr to the step size for the trust-region constraint. try: alpha_tr = _alpha_tr(step, sd, delta) except ZeroDivisionError: break # Stop the computations if a step along sd is expected to give a # relatively small reduction in the objective function. if -alpha_tr * grad_sd <= 1e-8 * reduct: break # Set alpha_quad to the step size for the minimization problem. hess_sd = hess_prod(sd) curv_sd = sd @ hess_sd if curv_sd > TINY * abs(grad_sd): alpha_quad = max(-grad_sd / curv_sd, 0.0) else: alpha_quad = np.inf # Stop the computations if the reduction in the objective function # provided by an unconstrained step is small. alpha = min(alpha_tr, alpha_quad) if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct: break # Set alpha_bd to the step size for the bound constraints. i_xl = (xl > -np.inf) & (sd < -TINY * np.abs(xl - step)) i_xu = (xu < np.inf) & (sd > TINY * np.abs(xu - step)) all_alpha_xl = np.full_like(step, np.inf) all_alpha_xu = np.full_like(step, np.inf) all_alpha_xl[i_xl] = np.maximum( (xl[i_xl] - step[i_xl]) / sd[i_xl], 0.0, ) all_alpha_xu[i_xu] = np.maximum( (xu[i_xu] - step[i_xu]) / sd[i_xu], 0.0, ) alpha_xl = np.min(all_alpha_xl) alpha_xu = np.min(all_alpha_xu) alpha_bd = min(alpha_xl, alpha_xu) # Update the iterate. alpha = min(alpha, alpha_bd) if alpha > 0.0: step[free_bd] = np.clip( step[free_bd] + alpha * sd[free_bd], xl[free_bd], xu[free_bd], ) grad += alpha * hess_sd reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd) if alpha < min(alpha_tr, alpha_bd): # The current iteration is a conjugate gradient iteration. Update # the search direction so that it is conjugate (with respect to H) # to all the previous search directions. beta = (grad[free_bd] @ hess_sd[free_bd]) / curv_sd sd[free_bd] = beta * sd[free_bd] - grad[free_bd] sd[~free_bd] = 0.0 k += 1 elif alpha < alpha_tr: # The iterate is restricted by a bound constraint. Add this bound # constraint to the active set, and restart the calculations. if alpha_xl <= alpha: i_new = np.argmin(all_alpha_xl) step[i_new] = xl[i_new] else: i_new = np.argmin(all_alpha_xu) step[i_new] = xu[i_new] free_bd[i_new] = False sd[free_bd] = -grad[free_bd] sd[~free_bd] = 0.0 k = 0 else: # The current iterate is on the trust-region boundary. Add all the # active bounds to the working set to prepare for the improvement # of the solution, and stop the iterations. if alpha_xl <= alpha: i_new = _argmin(all_alpha_xl) step[i_new] = xl[i_new] free_bd[i_new] = False if alpha_xu <= alpha: i_new = _argmin(all_alpha_xu) step[i_new] = xu[i_new] free_bd[i_new] = False boundary_reached = True break # Attempt to improve the solution on the trust-region boundary. if kwargs.get("improve_tcg", True) and boundary_reached: step_base = np.copy(step) step_comparator = grad_orig @ step_base + 0.5 * step_base @ hess_prod( step_base ) while np.count_nonzero(free_bd) > 0: # Check whether a substantial reduction in the objective function # is possible, and set the search direction. step_sq = step[free_bd] @ step[free_bd] grad_sq = grad[free_bd] @ grad[free_bd] grad_step = grad[free_bd] @ step[free_bd] grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0)) sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd] sd[~free_bd] = 0.0 if grad_sd >= -1e-8 * reduct or np.any( grad_sd >= -TINY * np.abs(sd[free_bd]) ): break sd[free_bd] /= -grad_sd # Calculate an upper bound for the tangent of half the angle theta # of this alternative iteration. The step will be updated as: # step = cos(theta) * step + sin(theta) * sd. temp_xl = np.zeros(n) temp_xu = np.zeros(n) temp_xl[free_bd] = ( step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0 ) temp_xu[free_bd] = ( step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0 ) temp_xl[temp_xl > 0.0] = ( np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0] ) temp_xu[temp_xu > 0.0] = ( np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0] ) dist_xl = np.maximum(step - xl, 0.0) dist_xu = np.maximum(xu - step, 0.0) i_xl = temp_xl > TINY * dist_xl i_xu = temp_xu > TINY * dist_xu all_t_xl = np.ones(n) all_t_xu = np.ones(n) all_t_xl[i_xl] = np.minimum( all_t_xl[i_xl], dist_xl[i_xl] / temp_xl[i_xl], ) all_t_xu[i_xu] = np.minimum( all_t_xu[i_xu], dist_xu[i_xu] / temp_xu[i_xu], ) t_xl = np.min(all_t_xl) t_xu = np.min(all_t_xu) t_bd = min(t_xl, t_xu) # Calculate some curvature information. hess_step = hess_prod(step) hess_sd = hess_prod(sd) curv_step = step @ hess_step curv_sd = sd @ hess_sd curv_step_sd = step @ hess_sd # For a range of equally spaced values of tan(0.5 * theta), # calculate the reduction in the objective function that would be # obtained by accepting the corresponding angle. n_samples = 20 n_samples = int((n_samples - 3) * t_bd + 3) t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples) sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0) all_reduct = sin_values * ( grad_step * t_samples - grad_sd - t_samples * curv_step + sin_values * (t_samples * curv_step_sd - 0.5 * (curv_sd - curv_step)) ) if np.all(all_reduct <= 0.0): # No reduction in the objective function is obtained. break # Accept the angle that provides the largest reduction in the # objective function, and update the iterate. i_max = np.argmax(all_reduct) cos_value = (1.0 - t_samples[i_max] ** 2.0) / ( 1.0 + t_samples[i_max] ** 2.0 ) step[free_bd] = ( cos_value * step[free_bd] + sin_values[i_max] * sd[free_bd] ) grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd reduct += all_reduct[i_max] # If the above angle is restricted by bound constraints, add them # to the working set, and restart the alternative iteration. # Otherwise, the calculations are terminated. if t_bd < 1.0 and i_max == n_samples - 1: if t_xl <= t_bd: i_new = _argmin(all_t_xl) step[i_new] = xl[i_new] free_bd[i_new] = False if t_xu <= t_bd: i_new = _argmin(all_t_xu) step[i_new] = xu[i_new] free_bd[i_new] = False else: break # Ensure that the alternative iteration improves the objective # function. if grad_orig @ step + 0.5 * step @ hess_prod(step) > step_comparator: step = step_base if debug: assert np.all(xl <= step) assert np.all(step <= xu) assert np.linalg.norm(step) < 1.1 * delta return step def constrained_tangential_byrd_omojokun( grad, hess_prod, xl, xu, aub, bub, aeq, delta, debug, **kwargs, ): r""" Minimize approximately a quadratic function subject to bound and linear constraints in a trust region. This function solves approximately .. math:: \min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2} s^{\mathsf{T}} H s \quad \text{s.t.} \quad \left\{ \begin{array}{l} l \le s \le u,\\ A_{\scriptscriptstyle I} s \le b_{\scriptscriptstyle I},\\ A_{\scriptscriptstyle E} s = 0,\\ \lVert s \rVert \le \Delta, \end{array} \right. using an active-set variation of the truncated conjugate gradient method. Parameters ---------- grad : `numpy.ndarray`, shape (n,) Gradient :math:`g` as shown above. hess_prod : callable Product of the Hessian matrix :math:`H` with any vector. ``hess_prod(s) -> `numpy.ndarray`, shape (n,)`` returns the product :math:`H s`. xl : `numpy.ndarray`, shape (n,) Lower bounds :math:`l` as shown above. xu : `numpy.ndarray`, shape (n,) Upper bounds :math:`u` as shown above. aub : `numpy.ndarray`, shape (m_linear_ub, n) Coefficient matrix :math:`A_{\scriptscriptstyle I}` as shown above. bub : `numpy.ndarray`, shape (m_linear_ub,) Right-hand side :math:`b_{\scriptscriptstyle I}` as shown above. aeq : `numpy.ndarray`, shape (m_linear_eq, n) Coefficient matrix :math:`A_{\scriptscriptstyle E}` as shown above. delta : float Trust-region radius :math:`\Delta` as shown above. debug : bool Whether to make debugging tests during the execution. Returns ------- `numpy.ndarray`, shape (n,) Approximate solution :math:`s`. Other Parameters ---------------- improve_tcg : bool, optional If True, a solution generated by the truncated conjugate gradient method that is on the boundary of the trust region is improved by moving around the trust-region boundary on the two-dimensional space spanned by the solution and the gradient of the quadratic function at the solution (default is True). Notes ----- This function implements Algorithm 6.3 of [1]_. It is assumed that the origin is feasible with respect to the bound and linear constraints, and that `delta` is finite and positive. References ---------- .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods and Software*. PhD thesis, Department of Applied Mathematics, The Hong Kong Polytechnic University, Hong Kong, China, 2022. URL: https://theses.lib.polyu.edu.hk/handle/200/12294. """ if debug: assert isinstance(grad, np.ndarray) and grad.ndim == 1 assert inspect.signature(hess_prod).bind(grad) assert isinstance(xl, np.ndarray) and xl.shape == grad.shape assert isinstance(xu, np.ndarray) and xu.shape == grad.shape assert ( isinstance(aub, np.ndarray) and aub.ndim == 2 and aub.shape[1] == grad.size ) assert ( isinstance(bub, np.ndarray) and bub.ndim == 1 and bub.size == aub.shape[0] ) assert ( isinstance(aeq, np.ndarray) and aeq.ndim == 2 and aeq.shape[1] == grad.size ) assert isinstance(delta, float) assert isinstance(debug, bool) tol = get_arrays_tol(xl, xu) assert np.all(xl <= tol) assert np.all(xu >= -tol) assert np.all(bub >= -tol) assert np.isfinite(delta) and delta > 0.0 xl = np.minimum(xl, 0.0) xu = np.maximum(xu, 0.0) bub = np.maximum(bub, 0.0) # Copy the arrays that may be modified by the code below. n = grad.size grad = np.copy(grad) grad_orig = np.copy(grad) # Calculate the initial active set. free_xl = (xl < 0.0) | (grad < 0.0) free_xu = (xu > 0.0) | (grad > 0.0) free_ub = (bub > 0.0) | (aub @ grad > 0.0) n_act, q = qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub) # Set the initial iterate and the initial search direction. step = np.zeros_like(grad) sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad) resid = np.copy(bub) k = 0 reduct = 0.0 boundary_reached = False while k < n - n_act: # Stop the computations if sd is not a descent direction. grad_sd = grad @ sd if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)): break # Set alpha_tr to the step size for the trust-region constraint. try: alpha_tr = _alpha_tr(step, sd, delta) except ZeroDivisionError: break # Stop the computations if a step along sd is expected to give a # relatively small reduction in the objective function. if -alpha_tr * grad_sd <= 1e-8 * reduct: break # Set alpha_quad to the step size for the minimization problem. hess_sd = hess_prod(sd) curv_sd = sd @ hess_sd if curv_sd > TINY * abs(grad_sd): alpha_quad = max(-grad_sd / curv_sd, 0.0) else: alpha_quad = np.inf # Stop the computations if the reduction in the objective function # provided by an unconstrained step is small. alpha = min(alpha_tr, alpha_quad) if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct: break # Set alpha_bd to the step size for the bound constraints. i_xl = free_xl & (xl > -np.inf) & (sd < -TINY * np.abs(xl - step)) i_xu = free_xu & (xu < np.inf) & (sd > TINY * np.abs(xu - step)) all_alpha_xl = np.full_like(step, np.inf) all_alpha_xu = np.full_like(step, np.inf) all_alpha_xl[i_xl] = np.maximum( (xl[i_xl] - step[i_xl]) / sd[i_xl], 0.0, ) all_alpha_xu[i_xu] = np.maximum( (xu[i_xu] - step[i_xu]) / sd[i_xu], 0.0, ) alpha_xl = np.min(all_alpha_xl) alpha_xu = np.min(all_alpha_xu) alpha_bd = min(alpha_xl, alpha_xu) # Set alpha_ub to the step size for the linear constraints. aub_sd = aub @ sd i_ub = free_ub & (aub_sd > TINY * np.abs(resid)) all_alpha_ub = np.full_like(bub, np.inf) all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub] alpha_ub = np.min(all_alpha_ub, initial=np.inf) # Update the iterate. alpha = min(alpha, alpha_bd, alpha_ub) if alpha > 0.0: step = np.clip(step + alpha * sd, xl, xu) grad += alpha * hess_sd resid = np.maximum(0.0, resid - alpha * aub_sd) reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd) if alpha < min(alpha_tr, alpha_bd, alpha_ub): # The current iteration is a conjugate gradient iteration. Update # the search direction so that it is conjugate (with respect to H) # to all the previous search directions. grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad) beta = (grad_proj @ hess_sd) / curv_sd sd = beta * sd - grad_proj k += 1 elif alpha < alpha_tr: # The iterate is restricted by a bound/linear constraint. Add this # constraint to the active set, and restart the calculations. if alpha_xl <= alpha: i_new = np.argmin(all_alpha_xl) step[i_new] = xl[i_new] free_xl[i_new] = False elif alpha_xu <= alpha: i_new = np.argmin(all_alpha_xu) step[i_new] = xu[i_new] free_xu[i_new] = False else: i_new = np.argmin(all_alpha_ub) free_ub[i_new] = False n_act, q = qr_tangential_byrd_omojokun( aub, aeq, free_xl, free_xu, free_ub, ) sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad) k = 0 else: # The current iterate is on the trust-region boundary. Add all the # active bound/linear constraints to the working set to prepare for # the improvement of the solution, and stop the iterations. if alpha_xl <= alpha: i_new = _argmin(all_alpha_xl) step[i_new] = xl[i_new] free_xl[i_new] = False if alpha_xu <= alpha: i_new = _argmin(all_alpha_xu) step[i_new] = xu[i_new] free_xu[i_new] = False if alpha_ub <= alpha: i_new = _argmin(all_alpha_ub) free_ub[i_new] = False n_act, q = qr_tangential_byrd_omojokun( aub, aeq, free_xl, free_xu, free_ub, ) boundary_reached = True break # Attempt to improve the solution on the trust-region boundary. if kwargs.get("improve_tcg", True) and boundary_reached and n_act < n: step_base = np.copy(step) while n_act < n: # Check whether a substantial reduction in the objective function # is possible, and set the search direction. step_proj = q[:, n_act:] @ (q[:, n_act:].T @ step) grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad) step_sq = step_proj @ step_proj grad_sq = grad_proj @ grad_proj grad_step = grad_proj @ step_proj grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0)) sd = q[:, n_act:] @ ( q[:, n_act:].T @ (grad_step * step - step_sq * grad) ) if grad_sd >= -1e-8 * reduct or np.any( grad_sd >= -TINY * np.abs(sd) ): break sd /= -grad_sd # Calculate an upper bound for the tangent of half the angle theta # of this alternative iteration for the bound constraints. The step # will be updated as: # step += (cos(theta) - 1) * step_proj + sin(theta) * sd. temp_xl = np.zeros(n) temp_xu = np.zeros(n) dist_xl = np.maximum(step - xl, 0.0) dist_xu = np.maximum(xu - step, 0.0) temp_xl[free_xl] = sd[free_xl] ** 2.0 - dist_xl[free_xl] * ( dist_xl[free_xl] - 2.0 * step_proj[free_xl] ) temp_xu[free_xu] = sd[free_xu] ** 2.0 - dist_xu[free_xu] * ( dist_xu[free_xu] + 2.0 * step_proj[free_xu] ) temp_xl[temp_xl > 0.0] = ( np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0] ) temp_xu[temp_xu > 0.0] = ( np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0] ) i_xl = temp_xl > TINY * dist_xl i_xu = temp_xu > TINY * dist_xu all_t_xl = np.ones(n) all_t_xu = np.ones(n) all_t_xl[i_xl] = np.minimum( all_t_xl[i_xl], dist_xl[i_xl] / temp_xl[i_xl], ) all_t_xu[i_xu] = np.minimum( all_t_xu[i_xu], dist_xu[i_xu] / temp_xu[i_xu], ) t_xl = np.min(all_t_xl) t_xu = np.min(all_t_xu) t_bd = min(t_xl, t_xu) # Calculate an upper bound for the tangent of half the angle theta # of this alternative iteration for the linear constraints. temp_ub = np.zeros_like(resid) aub_step = aub @ step_proj aub_sd = aub @ sd temp_ub[free_ub] = aub_sd[free_ub] ** 2.0 - resid[free_ub] * ( resid[free_ub] + 2.0 * aub_step[free_ub] ) temp_ub[temp_ub > 0.0] = ( np.sqrt(temp_ub[temp_ub > 0.0]) + aub_sd[temp_ub > 0.0] ) i_ub = temp_ub > TINY * resid all_t_ub = np.ones_like(resid) all_t_ub[i_ub] = np.minimum( all_t_ub[i_ub], resid[i_ub] / temp_ub[i_ub], ) t_ub = np.min(all_t_ub, initial=1.0) t_min = min(t_bd, t_ub) # Calculate some curvature information. hess_step = hess_prod(step_proj) hess_sd = hess_prod(sd) curv_step = step_proj @ hess_step curv_sd = sd @ hess_sd curv_step_sd = step_proj @ hess_sd # For a range of equally spaced values of tan(0.5 * theta), # calculate the reduction in the objective function that would be # obtained by accepting the corresponding angle. n_samples = 20 n_samples = int((n_samples - 3) * t_min + 3) t_samples = np.linspace(t_min / n_samples, t_min, n_samples) sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0) all_reduct = sin_values * ( grad_step * t_samples - grad_sd - sin_values * ( 0.5 * t_samples**2.0 * curv_step - 2.0 * t_samples * curv_step_sd + 0.5 * curv_sd ) ) if np.all(all_reduct <= 0.0): # No reduction in the objective function is obtained. break # Accept the angle that provides the largest reduction in the # objective function, and update the iterate. i_max = np.argmax(all_reduct) cos_value = (1.0 - t_samples[i_max] ** 2.0) / ( 1.0 + t_samples[i_max] ** 2.0 ) step = np.clip( step + (cos_value - 1.0) * step_proj + sin_values[i_max] * sd, xl, xu, ) grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd resid = np.maximum( 0.0, resid - (cos_value - 1.0) * aub_step - sin_values[i_max] * aub_sd, ) reduct += all_reduct[i_max] # If the above angle is restricted by bound constraints, add them # to the working set, and restart the alternative iteration. # Otherwise, the calculations are terminated. if t_min < 1.0 and i_max == n_samples - 1: if t_xl <= t_min: i_new = _argmin(all_t_xl) step[i_new] = xl[i_new] free_xl[i_new] = False if t_xu <= t_min: i_new = _argmin(all_t_xu) step[i_new] = xu[i_new] free_xl[i_new] = False if t_ub <= t_min: i_new = _argmin(all_t_ub) free_ub[i_new] = False n_act, q = qr_tangential_byrd_omojokun( aub, aeq, free_xl, free_xu, free_ub, ) else: break # Ensure that the alternative iteration improves the objective # function. if grad_orig @ step + 0.5 * step @ hess_prod( step ) > grad_orig @ step_base + 0.5 * step_base @ hess_prod(step_base): step = step_base if debug: tol = get_arrays_tol(xl, xu) assert np.all(xl <= step) assert np.all(step <= xu) assert np.all(aub @ step <= bub + tol) assert np.all(np.abs(aeq @ step) <= tol) assert np.linalg.norm(step) < 1.1 * delta return step def normal_byrd_omojokun(aub, bub, aeq, beq, xl, xu, delta, debug, **kwargs): r""" Minimize approximately a linear constraint violation subject to bound constraints in a trust region. This function solves approximately .. math:: \min_{s \in \mathbb{R}^n} \quad \frac{1}{2} \big( \lVert \max \{ A_{\scriptscriptstyle I} s - b_{\scriptscriptstyle I}, 0 \} \rVert^2 + \lVert A_{\scriptscriptstyle E} s - b_{\scriptscriptstyle E} \rVert^2 \big) \quad \text{s.t.} \quad \left\{ \begin{array}{l} l \le s \le u,\\ \lVert s \rVert \le \Delta, \end{array} \right. using a variation of the truncated conjugate gradient method. Parameters ---------- aub : `numpy.ndarray`, shape (m_linear_ub, n) Matrix :math:`A_{\scriptscriptstyle I}` as shown above. bub : `numpy.ndarray`, shape (m_linear_ub,) Vector :math:`b_{\scriptscriptstyle I}` as shown above. aeq : `numpy.ndarray`, shape (m_linear_eq, n) Matrix :math:`A_{\scriptscriptstyle E}` as shown above. beq : `numpy.ndarray`, shape (m_linear_eq,) Vector :math:`b_{\scriptscriptstyle E}` as shown above. xl : `numpy.ndarray`, shape (n,) Lower bounds :math:`l` as shown above. xu : `numpy.ndarray`, shape (n,) Upper bounds :math:`u` as shown above. delta : float Trust-region radius :math:`\Delta` as shown above. debug : bool Whether to make debugging tests during the execution. Returns ------- `numpy.ndarray`, shape (n,) Approximate solution :math:`s`. Other Parameters ---------------- improve_tcg : bool, optional If True, a solution generated by the truncated conjugate gradient method that is on the boundary of the trust region is improved by moving around the trust-region boundary on the two-dimensional space spanned by the solution and the gradient of the quadratic function at the solution (default is True). Notes ----- This function implements Algorithm 6.4 of [1]_. It is assumed that the origin is feasible with respect to the bound constraints and that `delta` is finite and positive. References ---------- .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods and Software*. PhD thesis, Department of Applied Mathematics, The Hong Kong Polytechnic University, Hong Kong, China, 2022. URL: https://theses.lib.polyu.edu.hk/handle/200/12294. """ if debug: assert isinstance(aub, np.ndarray) and aub.ndim == 2 assert ( isinstance(bub, np.ndarray) and bub.ndim == 1 and bub.size == aub.shape[0] ) assert ( isinstance(aeq, np.ndarray) and aeq.ndim == 2 and aeq.shape[1] == aub.shape[1] ) assert ( isinstance(beq, np.ndarray) and beq.ndim == 1 and beq.size == aeq.shape[0] ) assert isinstance(xl, np.ndarray) and xl.shape == (aub.shape[1],) assert isinstance(xu, np.ndarray) and xu.shape == (aub.shape[1],) assert isinstance(delta, float) assert isinstance(debug, bool) tol = get_arrays_tol(xl, xu) assert np.all(xl <= tol) assert np.all(xu >= -tol) assert np.isfinite(delta) and delta > 0.0 xl = np.minimum(xl, 0.0) xu = np.maximum(xu, 0.0) # Calculate the initial active set. m_linear_ub, n = aub.shape grad = np.r_[aeq.T @ -beq, np.maximum(0.0, -bub)] free_xl = (xl < 0.0) | (grad[:n] < 0.0) free_xu = (xu > 0.0) | (grad[:n] > 0.0) free_slack = bub < 0.0 free_ub = (bub > 0.0) | (aub @ grad[:n] - grad[n:] > 0.0) n_act, q = qr_normal_byrd_omojokun( aub, free_xl, free_xu, free_slack, free_ub, ) # Calculate an upper bound on the norm of the slack variables. It is not # used in the original algorithm, but it may prevent undesired behaviors # engendered by computer rounding errors. delta_slack = np.sqrt(beq @ beq + grad[n:] @ grad[n:]) # Set the initial iterate and the initial search direction. step = np.zeros(n) sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad) resid = bub + grad[n:] k = 0 reduct = 0.0 boundary_reached = False while k < n + m_linear_ub - n_act: # Stop the computations if sd is not a descent direction. grad_sd = grad @ sd if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)): break # Set alpha_tr to the step size for the trust-region constraint. try: alpha_tr = _alpha_tr(step, sd[:n], delta) except ZeroDivisionError: alpha_tr = np.inf # Prevent undesired behaviors engendered by computer rounding errors by # considering the trust-region constraint on the slack variables. try: alpha_tr = min(alpha_tr, _alpha_tr(grad[n:], sd[n:], delta_slack)) except ZeroDivisionError: pass # Stop the computations if a step along sd is expected to give a # relatively small reduction in the objective function. if -alpha_tr * grad_sd <= 1e-8 * reduct: break # Set alpha_quad to the step size for the minimization problem. hess_sd = np.r_[aeq.T @ (aeq @ sd[:n]), sd[n:]] curv_sd = sd @ hess_sd if curv_sd > TINY * abs(grad_sd): alpha_quad = max(-grad_sd / curv_sd, 0.0) else: alpha_quad = np.inf # Stop the computations if the reduction in the objective function # provided by an unconstrained step is small. alpha = min(alpha_tr, alpha_quad) if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct: break # Set alpha_bd to the step size for the bound constraints. i_xl = free_xl & (xl > -np.inf) & (sd[:n] < -TINY * np.abs(xl - step)) i_xu = free_xu & (xu < np.inf) & (sd[:n] > TINY * np.abs(xu - step)) i_slack = free_slack & (sd[n:] < -TINY * np.abs(grad[n:])) all_alpha_xl = np.full_like(step, np.inf) all_alpha_xu = np.full_like(step, np.inf) all_alpha_slack = np.full_like(bub, np.inf) all_alpha_xl[i_xl] = np.maximum( (xl[i_xl] - step[i_xl]) / sd[:n][i_xl], 0.0, ) all_alpha_xu[i_xu] = np.maximum( (xu[i_xu] - step[i_xu]) / sd[:n][i_xu], 0.0, ) all_alpha_slack[i_slack] = np.maximum( -grad[n:][i_slack] / sd[n:][i_slack], 0.0, ) alpha_xl = np.min(all_alpha_xl) alpha_xu = np.min(all_alpha_xu) alpha_slack = np.min(all_alpha_slack, initial=np.inf) alpha_bd = min(alpha_xl, alpha_xu, alpha_slack) # Set alpha_ub to the step size for the linear constraints. aub_sd = aub @ sd[:n] - sd[n:] i_ub = free_ub & (aub_sd > TINY * np.abs(resid)) all_alpha_ub = np.full_like(bub, np.inf) all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub] alpha_ub = np.min(all_alpha_ub, initial=np.inf) # Update the iterate. alpha = min(alpha, alpha_bd, alpha_ub) if alpha > 0.0: step = np.clip(step + alpha * sd[:n], xl, xu) grad += alpha * hess_sd resid = np.maximum(0.0, resid - alpha * aub_sd) reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd) if alpha < min(alpha_tr, alpha_bd, alpha_ub): # The current iteration is a conjugate gradient iteration. Update # the search direction so that it is conjugate (with respect to H) # to all the previous search directions. grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad) beta = (grad_proj @ hess_sd) / curv_sd sd = beta * sd - grad_proj k += 1 elif alpha < alpha_tr: # The iterate is restricted by a bound/linear constraint. Add this # constraint to the active set, and restart the calculations. if alpha_xl <= alpha: i_new = np.argmin(all_alpha_xl) step[i_new] = xl[i_new] free_xl[i_new] = False elif alpha_xu <= alpha: i_new = np.argmin(all_alpha_xu) step[i_new] = xu[i_new] free_xu[i_new] = False elif alpha_slack <= alpha: i_new = np.argmin(all_alpha_slack) free_slack[i_new] = False else: i_new = np.argmin(all_alpha_ub) free_ub[i_new] = False n_act, q = qr_normal_byrd_omojokun( aub, free_xl, free_xu, free_slack, free_ub ) sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad) k = 0 else: # The current iterate is on the trust-region boundary. Add all the # active bound constraints to the working set to prepare for the # improvement of the solution, and stop the iterations. if alpha_xl <= alpha: i_new = _argmin(all_alpha_xl) step[i_new] = xl[i_new] free_xl[i_new] = False if alpha_xu <= alpha: i_new = _argmin(all_alpha_xu) step[i_new] = xu[i_new] free_xu[i_new] = False boundary_reached = True break # Attempt to improve the solution on the trust-region boundary. if kwargs.get("improve_tcg", True) and boundary_reached: step_base = np.copy(step) free_bd = free_xl & free_xu grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ ( aeq @ step - beq ) sd = np.zeros(n) while np.count_nonzero(free_bd) > 0: # Check whether a substantial reduction in the objective function # is possible, and set the search direction. step_sq = step[free_bd] @ step[free_bd] grad_sq = grad[free_bd] @ grad[free_bd] grad_step = grad[free_bd] @ step[free_bd] grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0)) sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd] sd[~free_bd] = 0.0 if grad_sd >= -1e-8 * reduct or np.any( grad_sd >= -TINY * np.abs(sd[free_bd]) ): break sd[free_bd] /= -grad_sd # Calculate an upper bound for the tangent of half the angle theta # of this alternative iteration. The step will be updated as: # step = cos(theta) * step + sin(theta) * sd. temp_xl = np.zeros(n) temp_xu = np.zeros(n) temp_xl[free_bd] = ( step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0 ) temp_xu[free_bd] = ( step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0 ) temp_xl[temp_xl > 0.0] = ( np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0] ) temp_xu[temp_xu > 0.0] = ( np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0] ) dist_xl = np.maximum(step - xl, 0.0) dist_xu = np.maximum(xu - step, 0.0) i_xl = temp_xl > TINY * dist_xl i_xu = temp_xu > TINY * dist_xu all_t_xl = np.ones(n) all_t_xu = np.ones(n) all_t_xl[i_xl] = np.minimum( all_t_xl[i_xl], dist_xl[i_xl] / temp_xl[i_xl], ) all_t_xu[i_xu] = np.minimum( all_t_xu[i_xu], dist_xu[i_xu] / temp_xu[i_xu], ) t_xl = np.min(all_t_xl) t_xu = np.min(all_t_xu) t_bd = min(t_xl, t_xu) # For a range of equally spaced values of tan(0.5 * theta), # calculate the reduction in the objective function that would be # obtained by accepting the corresponding angle. n_samples = 20 n_samples = int((n_samples - 3) * t_bd + 3) t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples) resid_ub = np.maximum(aub @ step - bub, 0.0) resid_eq = aeq @ step - beq step_proj = np.copy(step) step_proj[~free_bd] = 0.0 all_reduct = np.empty(n_samples) for i in range(n_samples): sin_value = 2.0 * t_samples[i] / (1.0 + t_samples[i] ** 2.0) step_alt = np.clip( step + sin_value * (sd - t_samples[i] * step_proj), xl, xu, ) resid_ub_alt = np.maximum(aub @ step_alt - bub, 0.0) resid_eq_alt = aeq @ step_alt - beq all_reduct[i] = 0.5 * ( resid_ub @ resid_ub + resid_eq @ resid_eq - resid_ub_alt @ resid_ub_alt - resid_eq_alt @ resid_eq_alt ) if np.all(all_reduct <= 0.0): # No reduction in the objective function is obtained. break # Accept the angle that provides the largest reduction in the # objective function, and update the iterate. i_max = np.argmax(all_reduct) cos_value = (1.0 - t_samples[i_max] ** 2.0) / ( 1.0 + t_samples[i_max] ** 2.0 ) sin_value = (2.0 * t_samples[i_max] / (1.0 + t_samples[i_max] ** 2.0)) step[free_bd] = cos_value * step[free_bd] + sin_value * sd[free_bd] grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ ( aeq @ step - beq ) reduct += all_reduct[i_max] # If the above angle is restricted by bound constraints, add them # to the working set, and restart the alternative iteration. # Otherwise, the calculations are terminated. if t_bd < 1.0 and i_max == n_samples - 1: if t_xl <= t_bd: i_new = _argmin(all_t_xl) step[i_new] = xl[i_new] free_bd[i_new] = False if t_xu <= t_bd: i_new = _argmin(all_t_xu) step[i_new] = xu[i_new] free_bd[i_new] = False else: break # Ensure that the alternative iteration improves the objective # function. resid_ub = np.maximum(aub @ step - bub, 0.0) resid_ub_base = np.maximum(aub @ step_base - bub, 0.0) resid_eq = aeq @ step - beq resid_eq_base = aeq @ step_base - beq if ( resid_ub @ resid_ub + resid_eq @ resid_eq > resid_ub_base @ resid_ub_base + resid_eq_base @ resid_eq_base ): step = step_base if debug: assert np.all(xl <= step) assert np.all(step <= xu) assert np.linalg.norm(step) < 1.1 * delta return step def qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub): n = free_xl.size identity = np.eye(n) q, r, _ = qr( np.block( [ [aeq], [aub[~free_ub, :]], [-identity[~free_xl, :]], [identity[~free_xu, :]], ] ).T, pivoting=True, ) n_act = np.count_nonzero( np.abs(np.diag(r)) >= 10.0 * EPS * n * np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0) ) return n_act, q def qr_normal_byrd_omojokun(aub, free_xl, free_xu, free_slack, free_ub): m_linear_ub, n = aub.shape identity_n = np.eye(n) identity_m = np.eye(m_linear_ub) q, r, _ = qr( np.block( [ [ aub[~free_ub, :], -identity_m[~free_ub, :], ], [ np.zeros((m_linear_ub - np.count_nonzero(free_slack), n)), -identity_m[~free_slack, :], ], [ -identity_n[~free_xl, :], np.zeros((n - np.count_nonzero(free_xl), m_linear_ub)), ], [ identity_n[~free_xu, :], np.zeros((n - np.count_nonzero(free_xu), m_linear_ub)), ], ] ).T, pivoting=True, ) n_act = np.count_nonzero( np.abs(np.diag(r)) >= 10.0 * EPS * (n + m_linear_ub) * np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0) ) return n_act, q def _alpha_tr(step, sd, delta): step_sd = step @ sd sd_sq = sd @ sd dist_tr_sq = delta**2.0 - step @ step temp = np.sqrt(max(step_sd**2.0 + sd_sq * dist_tr_sq, 0.0)) if step_sd <= 0.0 and sd_sq > TINY * abs(temp - step_sd): alpha_tr = max((temp - step_sd) / sd_sq, 0.0) elif abs(temp + step_sd) > TINY * dist_tr_sq: alpha_tr = max(dist_tr_sq / (temp + step_sd), 0.0) else: raise ZeroDivisionError return alpha_tr def _argmax(x): return np.flatnonzero(x >= np.max(x)) def _argmin(x): return np.flatnonzero(x <= np.min(x))