From e4e4049824029c6f0c3184e46999d5ccb4225397 Mon Sep 17 00:00:00 2001 From: Lindon Roberts Date: Thu, 5 Sep 2024 15:45:15 +1000 Subject: [PATCH] First implementation of convex constraints --- pybobyqa/controller.py | 32 ++++++++---- pybobyqa/model.py | 5 +- pybobyqa/solver.py | 25 +++++---- pybobyqa/trust_region.py | 109 +++++++++++++++++++++++++++++++++++++-- pybobyqa/util.py | 49 +++++++++++++++++- 5 files changed, 196 insertions(+), 24 deletions(-) diff --git a/pybobyqa/controller.py b/pybobyqa/controller.py index e5dfbb9..1761e1a 100644 --- a/pybobyqa/controller.py +++ b/pybobyqa/controller.py @@ -94,11 +94,11 @@ def able_to_do_restart(self): class Controller(object): - def __init__(self, objfun, x0, args, f0, f0_nsamples, xl, xu, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=True): + def __init__(self, objfun, x0, args, f0, f0_nsamples, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=True): self.objfun = objfun self.maxfun = maxfun self.args = args - self.model = Model(npt, x0, f0, xl, xu, f0_nsamples, abs_tol=params("model.abs_tol"), + self.model = Model(npt, x0, f0, xl, xu, projections, f0_nsamples, abs_tol=params("model.abs_tol"), precondition=params("interpolation.precondition"), do_logging=do_logging) self.nf = nf self.nx = nx @@ -271,15 +271,24 @@ def initialise_random_directions(self, number_of_samples, num_directions, params def trust_region_step(self): # Build model for full least squares objectives gopt, H = self.model.build_full_model() - try: - d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta) - except ValueError: - # A ValueError may be raised if gopt or H have nan/inf values (issue #14) - # Although this should be picked up earlier, in this situation just return a zero - # trust-region step, which leads to a safety step being called in the main algorithm. + if np.any(np.isinf(gopt)) or np.any(np.isnan(gopt)) or np.any(np.isinf(H)) or np.any(np.isnan(H)): + # Skip computing the step if gopt or H are badly formed d = np.zeros(gopt.shape) gnew = gopt.copy() crvmin = 0.0 # this usually represents 'step on trust-region boundary' but seems to be a sensible default for errors + else: + try: + if self.model.projections is None: + d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta) + else: + d, gnew, crvmin = ctrsbox(self.model.xbase, self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.model.projections, self.delta) + except ValueError: + # A ValueError may be raised if gopt or H have nan/inf values (issue #14) + # Although this should be picked up earlier, in this situation just return a zero + # trust-region step, which leads to a safety step being called in the main algorithm. + d = np.zeros(gopt.shape) + gnew = gopt.copy() + crvmin = 0.0 # this usually represents 'step on trust-region boundary' but seems to be a sensible default for errors return d, gopt, H, gnew, crvmin def geometry_step(self, knew, adelt, number_of_samples, params): @@ -293,7 +302,12 @@ def geometry_step(self, knew, adelt, number_of_samples, params): # Solve problem: bounds are sl <= xnew <= su, and ||xnew-xopt|| <= adelt try: - xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt) + if np.any(np.isinf(g)) or np.any(np.isnan(g)) or np.any(np.isinf(H)) or np.any(np.isnan(H)): + raise ValueError + if self.model.projections is None: + xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt) + else: + xnew = ctrsbox_geometry(self.model.xbase, self.model.xopt(), c, g, H, self.model.sl, self.model.su, self.model.projections, adelt) except ValueError: # A ValueError may be raised if gopt or H have nan/inf values (issue #23) # Ideally this should be picked up earlier in self.model.lagrange_polynomial(...) diff --git a/pybobyqa/model.py b/pybobyqa/model.py index 454a8d5..ef1188d 100644 --- a/pybobyqa/model.py +++ b/pybobyqa/model.py @@ -36,7 +36,7 @@ from .hessian import to_upper_triangular_vector from .trust_region import trsbox_geometry -from .util import sumsq, model_value +from .util import sumsq, model_value, dykstra __all__ = ['Model'] @@ -45,7 +45,7 @@ class Model(object): - def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, precondition=True, do_logging=True): + def __init__(self, npt, x0, f0, xl, xu, projections, f0_nsamples, n=None, abs_tol=-1e20, precondition=True, do_logging=True): if n is None: n = len(x0) assert npt >= n + 1, "Require npt >= n+1 for quadratic models" @@ -62,6 +62,7 @@ def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, prec self.xbase = x0.copy() self.sl = xl - self.xbase # lower bound w.r.t. xbase (require xpt >= sl) self.su = xu - self.xbase # upper bound w.r.t. xbase (require xpt <= su) + self.projections = projections # list of projection-based constraints (excluding explicit bounds) self.points = np.zeros((npt, n)) # interpolation points w.r.t. xbase # Function values diff --git a/pybobyqa/solver.py b/pybobyqa/solver.py index 226e0d7..3097131 100644 --- a/pybobyqa/solver.py +++ b/pybobyqa/solver.py @@ -96,7 +96,7 @@ def __str__(self): return output -def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params, +def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params, diagnostic_info, scaling_changes, f0_avg_old=None, f0_nsamples_old=None, do_logging=True, print_progress=False): # Evaluate at x0 (keep nf, nx correct and check for f small) if f0_avg_old is None: @@ -144,7 +144,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f nx = nx_so_far # Initialise controller - control = Controller(objfun, x0, args, f0_avg, num_samples_run, xl, xu, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=do_logging) + control = Controller(objfun, x0, args, f0_avg, num_samples_run, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=do_logging) # Initialise interpolation set number_of_samples = max(nsamples(control.delta, control.rho, 0, nruns_so_far), 1) @@ -665,7 +665,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f return x, f, gradmin, hessmin, nsamples, control.nf, control.nx, nruns_so_far, exit_info, diagnostic_info -def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None, +def solve(objfun, x0, args=(), bounds=None, projections=None, npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None, objfun_has_noise=False, seek_global_minimum=False, scaling_within_bounds=False, do_logging=True, print_progress=False): n = len(x0) if type(x0) == list: @@ -694,7 +694,11 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, if (xl is None or xu is None) and scaling_within_bounds: scaling_within_bounds = False warnings.warn("Ignoring scaling_within_bounds=True for unconstrained problem/1-sided bounds", RuntimeWarning) - + + if (projections is not None) and scaling_within_bounds: + scaling_within_bounds = False + warnings.warn("Ignoring scaling_within_bounds=True for problems with projections given", RuntimeWarning) + exit_info = None if seek_global_minimum and (xl is None or xu is None): exit_info = ExitInformation(EXIT_INPUT_ERROR, "If seeking global minimum, must specify upper and lower bounds") @@ -761,6 +765,9 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, if exit_info is None and np.min(xu - xl) < 2.0 * rhobeg: exit_info = ExitInformation(EXIT_INPUT_ERROR, "gap between lower and upper must be at least 2*rhobeg") + if exit_info is None and projections is not None and type(projections) != list: + exit_info = ExitInformation(EXIT_INPUT_ERROR, "projections must be a list of functions") + if maxfun <= npt: warnings.warn("maxfun <= npt: Are you sure your budget is large enough?", RuntimeWarning) @@ -792,12 +799,12 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, return results # Enforce lower & upper bounds on x0 - idx = (x0 <= xl) + idx = (x0 < xl) if np.any(idx): warnings.warn("x0 below lower bound, adjusting", RuntimeWarning) x0[idx] = xl[idx] - idx = (x0 >= xu) + idx = (x0 > xu) if np.any(idx): warnings.warn("x0 above upper bound, adjusting", RuntimeWarning) x0[idx] = xu[idx] @@ -808,7 +815,7 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, nf = 0 nx = 0 xmin, fmin, gradmin, hessmin, nsamples_min, nf, nx, nruns, exit_info, diagnostic_info = \ - solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params, + solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params, diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress) # Hard restarts loop @@ -829,11 +836,11 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, % (fmin, nf, _rhobeg, _rhoend)) if params("restarts.hard.use_old_fk"): xmin2, fmin2, gradmin2, hessmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \ - solve_main(objfun, xmin, args, xl, xu, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params, + solve_main(objfun, xmin, args, xl, xu, projections, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params, diagnostic_info, scaling_changes, f0_avg_old=fmin, f0_nsamples_old=nsamples_min, do_logging=do_logging, print_progress=print_progress) else: xmin2, fmin2, gradmin2, hessmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \ - solve_main(objfun, xmin, args, xl, xu, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params, + solve_main(objfun, xmin, args, xl, xu, projections, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params, diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress) if fmin2 < fmin or np.isnan(fmin): diff --git a/pybobyqa/trust_region.py b/pybobyqa/trust_region.py index 183cf63..e97fb1c 100644 --- a/pybobyqa/trust_region.py +++ b/pybobyqa/trust_region.py @@ -9,7 +9,21 @@ s.t. ||d|| <= delta sl <= xopt + d <= su The other outputs: gnew is the gradient of the model at d, and crvmin has -information about the curvature of the model at the solution. +information about the curvature of the model at the solution: +- If ||d||=delta, then crvmin = 0 +- If d is constrained in all directions by the box constraints, then crvmin = -1 +- Otherwise, crvmin > 0 is the smallest positive curvature seen in the Hessian, min_{k} (dk^T H dk / ||dk||^2) for iterates dk + +For problems with general convex constraints, we have an alternative function + d, gnew, crvmin = ctrsbox(xbase, xopt, g, H, sl, su, projections, delta) +which solves the subproblem + min_{d} g'*d + 0.5*d'*H*d + s.t. ||d|| <= delta + sl <= xopt + d <= su + xbase + xopt + d in the intersection of all convex sets C with proj_{C} in the list 'projections' +The outputs are the same as trsbox, but without the negative case for crvmin. +This version is solved using projected gradient descent; see Chapter 10 of +A. Beck, First-Order Methods in Optimization. SIAM, 2017. Notes ---- @@ -53,10 +67,10 @@ USE_FORTRAN = False -from .util import sumsq, model_value +from .util import sumsq, model_value, dykstra, pball, pbox -__all__ = ['trsbox', 'trsbox_geometry'] +__all__ = ['trsbox', 'trsbox_geometry', 'ctrsbox', 'ctrsbox_geometry'] ZERO_THRESH = 1e-14 @@ -400,3 +414,92 @@ def trsbox_geometry(xbase, c, g, H, lower, upper, Delta, use_fortran=USE_FORTRAN return xbase + smin else: return xbase + smax + + +def ctrsbox(xbase, xopt, g, H, sl, su, projections, delta, dykstra_max_iters=100, dykstra_tol=1e-10, gtol=1e-8): + n = xopt.size + assert xopt.shape == (n,), "xopt has wrong shape (should be vector)" + assert xbase.shape == (n,), "xbase and xopt has wrong shape (should be vector)" + assert g.shape == (n,), "g and xopt have incompatible sizes" + assert len(H.shape) == 2, "H must be a matrix" + assert H.shape == (n, n), "H and xopt have incompatible sizes" + if not np.allclose(H, H.T): + # Enforce symmetry + H = 0.5 * (H + H.T) + warnings.warn("Trust-region solver: fixing non-symmetric Hessian", RuntimeWarning) + assert sl.shape == (n,), "sl and xopt have incompatible sizes" + assert su.shape == (n,), "su and xopt have incompatible sizes" + assert delta > 0.0, "delta must be strictly positive" + + # Get full list of required projections (i.e. including TR constraint and box constraints) + proj_tr = lambda w: pball(w, xbase + xopt, delta) + proj_box = lambda w: pbox(w, xbase + sl, xbase + su) + P = list(projections) # make a copy of the projections list + P.append(proj_tr) + P.append(proj_box) + + # Compute the joint projection into all constraints, but using increments from xopt + def proj(d0): + p = dykstra(P, xbase + xopt + d0, max_iter=dykstra_max_iters, tol=dykstra_tol) + # we want the step only, so we subtract xbase+xopt + # from the new point: proj(xk+d) - xk + return p - xopt - xbase + + # Compute Lipschitz constant of quadratic objective, L >= ||H||_2 + # Stepsize for projected GD will be Lk=L + L = np.linalg.norm(H) # use L = ||H||_F >= ||H|_2 as valid L-smooth constant + L = max(L, delta / np.linalg.norm(g)) # another upper bound; if L~0 then will take steps (1/L)*g, and don't want to go beyond trust-region + + MAX_LOOP_ITERS = 100 * n ** 2 + + d = np.zeros((n,)) + gnew = g.copy() + crvmin = -1.0 + + # projected GD loop + for ii in range(MAX_LOOP_ITERS): + s = proj(d - (1 / L) * gnew) - d # new step is dnew = d + s + + # take the step + d += s + gnew += H.dot(s) + + # update CRVMIN + crv = np.dot(H.dot(s), s) / sumsq(s) if sumsq(s) >= ZERO_THRESH else crvmin + crvmin = min(crvmin, crv) if crvmin != -1.0 else crv + + # exit condition + if np.linalg.norm(s) <= gtol: + break + + # Lastly, ensure the explicit bounds are hard-enforced + if np.linalg.norm(d) > delta: + d *= delta / np.linalg.norm(d) + d = np.maximum(np.minimum(xopt + d, su), sl) - xopt # does not increase ||d|| + gnew = g + H.dot(d) + + if np.linalg.norm(d) >= delta - ZERO_THRESH: + crvmin = 0.0 + return d, gnew, crvmin + + +def ctrsbox_geometry(xbase, xopt, c, g, H, lower, upper, projections, Delta, dykstra_max_iters=100, dykstra_tol=1e-10, gtol=1e-8): + # Given a Lagrange polynomial defined by: L(x) = c + g' * (x - xbase) + 0.5*(x-xbase)*H*(x-xbase) + # Maximise |L(x)| in the feasible region + trust region - that is, solve: + # max_x abs(c + g' * (x - xopt) + 0.5*(x-xopt)*H*(x-xopt)) + # s.t. lower <= x <= upper + # ||x-xopt|| <= Delta + # xbase+x in the intersection of all convex sets C with proj_{C} in the list 'projections' + # Setting s = x-xbase (or x = xopt + s), this is equivalent to: + # max_s abs(c + g' * s + 0.5*s*H*s) + # s.t. lower <= xopt + s <= upper + # ||s|| <= Delta + # xbase + xopt + s in the intersection of all convex sets C with proj_{C} in the list 'projections' + smin, gmin, crvmin = ctrsbox(xbase, xopt, g, H, lower, upper, projections, Delta, + dykstra_max_iters=dykstra_max_iters, dykstra_tol=dykstra_tol, gtol=gtol) # minimise L(x) + smax, gmax, crvmax = ctrsbox(xbase, xopt, -g, -H, lower, upper, projections, Delta, + dykstra_max_iters=dykstra_max_iters, dykstra_tol=dykstra_tol, gtol=gtol) # maximise L(x) + if abs(c + model_value(g, H, smin)) >= abs(c + model_value(g, H, smax)): # take largest abs value + return xopt + smin + else: + return xopt + smax \ No newline at end of file diff --git a/pybobyqa/util.py b/pybobyqa/util.py index cf5df10..bed86e8 100644 --- a/pybobyqa/util.py +++ b/pybobyqa/util.py @@ -31,7 +31,8 @@ __all__ = ['sumsq', 'eval_objective', 'model_value', 'random_orthog_directions_within_bounds', - 'random_directions_within_bounds', 'apply_scaling', 'remove_scaling'] + 'random_directions_within_bounds', 'apply_scaling', 'remove_scaling', + 'dykstra', 'pball', 'pbox'] module_logger = logging.getLogger(__name__) @@ -205,3 +206,49 @@ def remove_scaling(x_scaled, scaling_changes): return x_scaled shift, scale = scaling_changes return shift + x_scaled * scale + + +def dykstra(P, x0, max_iter=100, tol=1e-10): + # Dykstra's algorithm for computing the projection of x0 into the intersection + # of several convex sets, each with its own projection operator. + # For more details, including the robust termination condition, see + # E. G. Birgin, M. Raydan. Robust Stopping Criteria for Dykstra's Algorithm. + # SIAM J. Scientific Computing, 26:4 (2005), pp. 1405-1414. + + # Here, x0 is the point to project, and P is a list of projection functions, + # i.e. proj_{ith set}(x) = P[i](x) + x = x0.copy() + p = len(P) + y = np.zeros((p, x0.shape[0])) + + n = 0 + cI = float('inf') + while n < max_iter and cI >= tol: + cI = 0.0 + for i in range(p): + # Update iterate + prev_x = x.copy() + x = P[i](prev_x - y[i,:]) + + # Update increment + prev_y = y[i, :].copy() + y[i, :] = x - (prev_x - prev_y) + + # Stop condition + cI += np.linalg.norm(prev_y - y[i, :])**2 + + n += 1 + + return x + + +def pball(x, c, r): + # Projection operator for a Euclidean ball with center c and radius r + # i.e. pball(x, c, r) = proj_{B(c,r)}(x) + return c + r/max(np.linalg.norm(x-c), r) * (x-c) + + +def pbox(x, l, u): + # Projection operator for box constraints, l <= x <= u + # i.e. pbox(x, c, r) = proj_{[l,u]}(x) + return np.minimum(np.maximum(x, l), u) \ No newline at end of file