From e4e4049824029c6f0c3184e46999d5ccb4225397 Mon Sep 17 00:00:00 2001
From: Lindon Roberts <lindon.roberts@anu.edu.au>
Date: Thu, 5 Sep 2024 15:45:15 +1000
Subject: [PATCH] First implementation of convex constraints

---
 pybobyqa/controller.py   |  32 ++++++++----
 pybobyqa/model.py        |   5 +-
 pybobyqa/solver.py       |  25 +++++----
 pybobyqa/trust_region.py | 109 +++++++++++++++++++++++++++++++++++++--
 pybobyqa/util.py         |  49 +++++++++++++++++-
 5 files changed, 196 insertions(+), 24 deletions(-)

diff --git a/pybobyqa/controller.py b/pybobyqa/controller.py
index e5dfbb9..1761e1a 100644
--- a/pybobyqa/controller.py
+++ b/pybobyqa/controller.py
@@ -94,11 +94,11 @@ def able_to_do_restart(self):
 
 
 class Controller(object):
-    def __init__(self, objfun, x0, args, f0, f0_nsamples, xl, xu, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=True):
+    def __init__(self, objfun, x0, args, f0, f0_nsamples, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=True):
         self.objfun = objfun
         self.maxfun = maxfun
         self.args = args
-        self.model = Model(npt, x0, f0, xl, xu, f0_nsamples, abs_tol=params("model.abs_tol"),
+        self.model = Model(npt, x0, f0, xl, xu, projections, f0_nsamples, abs_tol=params("model.abs_tol"),
                            precondition=params("interpolation.precondition"), do_logging=do_logging)
         self.nf = nf
         self.nx = nx
@@ -271,15 +271,24 @@ def initialise_random_directions(self, number_of_samples, num_directions, params
     def trust_region_step(self):
         # Build model for full least squares objectives
         gopt, H = self.model.build_full_model()
-        try:
-            d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
-        except ValueError:
-            # A ValueError may be raised if gopt or H have nan/inf values (issue #14)
-            # Although this should be picked up earlier, in this situation just return a zero 
-            # trust-region step, which leads to a safety step being called in the main algorithm.
+        if np.any(np.isinf(gopt)) or np.any(np.isnan(gopt)) or np.any(np.isinf(H)) or np.any(np.isnan(H)):
+            # Skip computing the step if gopt or H are badly formed
             d = np.zeros(gopt.shape)
             gnew = gopt.copy()
             crvmin = 0.0  # this usually represents 'step on trust-region boundary' but seems to be a sensible default for errors
+        else:
+            try:
+                if self.model.projections is None:
+                    d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
+                else:
+                    d, gnew, crvmin = ctrsbox(self.model.xbase, self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.model.projections, self.delta)
+            except ValueError:
+                # A ValueError may be raised if gopt or H have nan/inf values (issue #14)
+                # Although this should be picked up earlier, in this situation just return a zero
+                # trust-region step, which leads to a safety step being called in the main algorithm.
+                d = np.zeros(gopt.shape)
+                gnew = gopt.copy()
+                crvmin = 0.0  # this usually represents 'step on trust-region boundary' but seems to be a sensible default for errors
         return d, gopt, H, gnew, crvmin
 
     def geometry_step(self, knew, adelt, number_of_samples, params):
@@ -293,7 +302,12 @@ def geometry_step(self, knew, adelt, number_of_samples, params):
         
         # Solve problem: bounds are sl <= xnew <= su, and ||xnew-xopt|| <= adelt
         try:
-            xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt)
+            if np.any(np.isinf(g)) or np.any(np.isnan(g)) or np.any(np.isinf(H)) or np.any(np.isnan(H)):
+                raise ValueError
+            if self.model.projections is None:
+                xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt)
+            else:
+                xnew = ctrsbox_geometry(self.model.xbase, self.model.xopt(), c, g, H, self.model.sl, self.model.su, self.model.projections, adelt)
         except ValueError:
             # A ValueError may be raised if gopt or H have nan/inf values (issue #23)
             # Ideally this should be picked up earlier in self.model.lagrange_polynomial(...)
diff --git a/pybobyqa/model.py b/pybobyqa/model.py
index 454a8d5..ef1188d 100644
--- a/pybobyqa/model.py
+++ b/pybobyqa/model.py
@@ -36,7 +36,7 @@
 
 from .hessian import to_upper_triangular_vector
 from .trust_region import trsbox_geometry
-from .util import sumsq, model_value
+from .util import sumsq, model_value, dykstra
 
 __all__ = ['Model']
 
@@ -45,7 +45,7 @@
 
 
 class Model(object):
-    def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, precondition=True, do_logging=True):
+    def __init__(self, npt, x0, f0, xl, xu, projections, f0_nsamples, n=None, abs_tol=-1e20, precondition=True, do_logging=True):
         if n is None:
             n = len(x0)
         assert npt >= n + 1, "Require npt >= n+1 for quadratic models"
@@ -62,6 +62,7 @@ def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, prec
         self.xbase = x0.copy()
         self.sl = xl - self.xbase  # lower bound w.r.t. xbase (require xpt >= sl)
         self.su = xu - self.xbase  # upper bound w.r.t. xbase (require xpt <= su)
+        self.projections = projections  # list of projection-based constraints (excluding explicit bounds)
         self.points = np.zeros((npt, n))  # interpolation points w.r.t. xbase
 
         # Function values
diff --git a/pybobyqa/solver.py b/pybobyqa/solver.py
index 226e0d7..3097131 100644
--- a/pybobyqa/solver.py
+++ b/pybobyqa/solver.py
@@ -96,7 +96,7 @@ def __str__(self):
         return output
 
 
-def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
+def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
                diagnostic_info, scaling_changes, f0_avg_old=None, f0_nsamples_old=None, do_logging=True, print_progress=False):
     # Evaluate at x0 (keep nf, nx correct and check for f small)
     if f0_avg_old is None:
@@ -144,7 +144,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
         nx = nx_so_far
 
     # Initialise controller
-    control = Controller(objfun, x0, args, f0_avg, num_samples_run, xl, xu, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=do_logging)
+    control = Controller(objfun, x0, args, f0_avg, num_samples_run, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=do_logging)
 
     # Initialise interpolation set
     number_of_samples = max(nsamples(control.delta, control.rho, 0, nruns_so_far), 1)
@@ -665,7 +665,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
     return x, f, gradmin, hessmin, nsamples, control.nf, control.nx, nruns_so_far, exit_info, diagnostic_info
 
 
-def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
+def solve(objfun, x0, args=(), bounds=None, projections=None, npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
           objfun_has_noise=False, seek_global_minimum=False, scaling_within_bounds=False, do_logging=True, print_progress=False):
     n = len(x0)
     if type(x0) == list:
@@ -694,7 +694,11 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
     if (xl is None or xu is None) and scaling_within_bounds:
         scaling_within_bounds = False
         warnings.warn("Ignoring scaling_within_bounds=True for unconstrained problem/1-sided bounds", RuntimeWarning)
-    
+
+    if (projections is not None) and scaling_within_bounds:
+        scaling_within_bounds = False
+        warnings.warn("Ignoring scaling_within_bounds=True for problems with projections given", RuntimeWarning)
+
     exit_info = None
     if seek_global_minimum and (xl is None or xu is None):
         exit_info = ExitInformation(EXIT_INPUT_ERROR, "If seeking global minimum, must specify upper and lower bounds")
@@ -761,6 +765,9 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
     if exit_info is None and np.min(xu - xl) < 2.0 * rhobeg:
         exit_info = ExitInformation(EXIT_INPUT_ERROR, "gap between lower and upper must be at least 2*rhobeg")
 
+    if exit_info is None and projections is not None and type(projections) != list:
+        exit_info = ExitInformation(EXIT_INPUT_ERROR, "projections must be a list of functions")
+
     if maxfun <= npt:
         warnings.warn("maxfun <= npt: Are you sure your budget is large enough?", RuntimeWarning)
 
@@ -792,12 +799,12 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
         return results
 
     # Enforce lower & upper bounds on x0
-    idx = (x0 <= xl)
+    idx = (x0 < xl)
     if np.any(idx):
         warnings.warn("x0 below lower bound, adjusting", RuntimeWarning)
     x0[idx] = xl[idx]
 
-    idx = (x0 >= xu)
+    idx = (x0 > xu)
     if np.any(idx):
         warnings.warn("x0 above upper bound, adjusting", RuntimeWarning)
     x0[idx] = xu[idx]
@@ -808,7 +815,7 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
     nf = 0
     nx = 0
     xmin, fmin, gradmin, hessmin, nsamples_min, nf, nx, nruns, exit_info, diagnostic_info = \
-        solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
+        solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
                     diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress)
 
     # Hard restarts loop
@@ -829,11 +836,11 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
                      % (fmin, nf, _rhobeg, _rhoend))
         if params("restarts.hard.use_old_fk"):
             xmin2, fmin2, gradmin2, hessmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \
-                solve_main(objfun, xmin, args, xl, xu, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
+                solve_main(objfun, xmin, args, xl, xu, projections, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
                             diagnostic_info, scaling_changes, f0_avg_old=fmin, f0_nsamples_old=nsamples_min, do_logging=do_logging, print_progress=print_progress)
         else:
             xmin2, fmin2, gradmin2, hessmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \
-                solve_main(objfun, xmin, args, xl, xu, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
+                solve_main(objfun, xmin, args, xl, xu, projections, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
                            diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress)
 
         if fmin2 < fmin or np.isnan(fmin):
diff --git a/pybobyqa/trust_region.py b/pybobyqa/trust_region.py
index 183cf63..e97fb1c 100644
--- a/pybobyqa/trust_region.py
+++ b/pybobyqa/trust_region.py
@@ -9,7 +9,21 @@
     s.t.    ||d|| <= delta
             sl <= xopt + d <= su
 The other outputs: gnew is the gradient of the model at d, and crvmin has
-information about the curvature of the model at the solution.
+information about the curvature of the model at the solution:
+- If ||d||=delta, then crvmin = 0
+- If d is constrained in all directions by the box constraints, then crvmin = -1
+- Otherwise, crvmin > 0 is the smallest positive curvature seen in the Hessian, min_{k} (dk^T H dk / ||dk||^2) for iterates dk
+
+For problems with general convex constraints, we have an alternative function
+    d, gnew, crvmin = ctrsbox(xbase, xopt, g, H, sl, su, projections, delta)
+which solves the subproblem
+    min_{d}  g'*d + 0.5*d'*H*d
+    s.t.    ||d|| <= delta
+            sl <= xopt + d <= su
+            xbase + xopt + d in the intersection of all convex sets C with proj_{C} in the list 'projections'
+The outputs are the same as trsbox, but without the negative case for crvmin.
+This version is solved using projected gradient descent; see Chapter 10 of
+A. Beck, First-Order Methods in Optimization. SIAM, 2017.
 
 Notes
 ----
@@ -53,10 +67,10 @@
     USE_FORTRAN = False
 
 
-from .util import sumsq, model_value
+from .util import sumsq, model_value, dykstra, pball, pbox
 
 
-__all__ = ['trsbox', 'trsbox_geometry']
+__all__ = ['trsbox', 'trsbox_geometry', 'ctrsbox', 'ctrsbox_geometry']
 
 ZERO_THRESH = 1e-14
 
@@ -400,3 +414,92 @@ def trsbox_geometry(xbase, c, g, H, lower, upper, Delta, use_fortran=USE_FORTRAN
         return xbase + smin
     else:
         return xbase + smax
+
+
+def ctrsbox(xbase, xopt, g, H, sl, su, projections, delta, dykstra_max_iters=100, dykstra_tol=1e-10, gtol=1e-8):
+    n = xopt.size
+    assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
+    assert xbase.shape == (n,), "xbase and xopt has wrong shape (should be vector)"
+    assert g.shape == (n,), "g and xopt have incompatible sizes"
+    assert len(H.shape) == 2, "H must be a matrix"
+    assert H.shape == (n, n), "H and xopt have incompatible sizes"
+    if not np.allclose(H, H.T):
+        # Enforce symmetry
+        H = 0.5 * (H + H.T)
+        warnings.warn("Trust-region solver: fixing non-symmetric Hessian", RuntimeWarning)
+    assert sl.shape == (n,), "sl and xopt have incompatible sizes"
+    assert su.shape == (n,), "su and xopt have incompatible sizes"
+    assert delta > 0.0, "delta must be strictly positive"
+
+    # Get full list of required projections (i.e. including TR constraint and box constraints)
+    proj_tr = lambda w: pball(w, xbase + xopt, delta)
+    proj_box = lambda w: pbox(w, xbase + sl, xbase + su)
+    P = list(projections)  # make a copy of the projections list
+    P.append(proj_tr)
+    P.append(proj_box)
+
+    # Compute the joint projection into all constraints, but using increments from xopt
+    def proj(d0):
+        p = dykstra(P, xbase + xopt + d0, max_iter=dykstra_max_iters, tol=dykstra_tol)
+        # we want the step only, so we subtract xbase+xopt
+        # from the new point: proj(xk+d) - xk
+        return p - xopt - xbase
+
+    # Compute Lipschitz constant of quadratic objective, L >= ||H||_2
+    # Stepsize for projected GD will be Lk=L
+    L = np.linalg.norm(H)  # use L = ||H||_F >= ||H|_2 as valid L-smooth constant
+    L = max(L, delta / np.linalg.norm(g))  # another upper bound; if L~0 then will take steps (1/L)*g, and don't want to go beyond trust-region
+
+    MAX_LOOP_ITERS = 100 * n ** 2
+
+    d = np.zeros((n,))
+    gnew = g.copy()
+    crvmin = -1.0
+
+    # projected GD loop
+    for ii in range(MAX_LOOP_ITERS):
+        s = proj(d - (1 / L) * gnew) - d  # new step is dnew = d + s
+
+        # take the step
+        d += s
+        gnew += H.dot(s)
+
+        # update CRVMIN
+        crv = np.dot(H.dot(s), s) / sumsq(s) if sumsq(s) >= ZERO_THRESH else crvmin
+        crvmin = min(crvmin, crv) if crvmin != -1.0 else crv
+
+        # exit condition
+        if np.linalg.norm(s) <= gtol:
+            break
+
+    # Lastly, ensure the explicit bounds are hard-enforced
+    if np.linalg.norm(d) > delta:
+        d *= delta / np.linalg.norm(d)
+    d = np.maximum(np.minimum(xopt + d, su), sl) - xopt  # does not increase ||d||
+    gnew = g + H.dot(d)
+
+    if np.linalg.norm(d) >= delta - ZERO_THRESH:
+        crvmin = 0.0
+    return d, gnew, crvmin
+
+
+def ctrsbox_geometry(xbase, xopt, c, g, H, lower, upper, projections, Delta, dykstra_max_iters=100, dykstra_tol=1e-10, gtol=1e-8):
+    # Given a Lagrange polynomial defined by: L(x) = c + g' * (x - xbase) + 0.5*(x-xbase)*H*(x-xbase)
+    # Maximise |L(x)| in the feasible region + trust region - that is, solve:
+    #   max_x  abs(c + g' * (x - xopt) + 0.5*(x-xopt)*H*(x-xopt))
+    #    s.t.  lower <= x <= upper
+    #          ||x-xopt|| <= Delta
+    #          xbase+x in the intersection of all convex sets C with proj_{C} in the list 'projections'
+    # Setting s = x-xbase (or x = xopt + s), this is equivalent to:
+    #   max_s  abs(c + g' * s + 0.5*s*H*s)
+    #   s.t.   lower <= xopt + s <= upper
+    #          ||s|| <= Delta
+    #          xbase + xopt + s in the intersection of all convex sets C with proj_{C} in the list 'projections'
+    smin, gmin, crvmin = ctrsbox(xbase, xopt, g, H, lower, upper, projections, Delta,
+                                 dykstra_max_iters=dykstra_max_iters, dykstra_tol=dykstra_tol, gtol=gtol)  # minimise L(x)
+    smax, gmax, crvmax = ctrsbox(xbase, xopt, -g, -H, lower, upper, projections, Delta,
+                                 dykstra_max_iters=dykstra_max_iters, dykstra_tol=dykstra_tol, gtol=gtol)  # maximise L(x)
+    if abs(c + model_value(g, H, smin)) >= abs(c + model_value(g, H, smax)):  # take largest abs value
+        return xopt + smin
+    else:
+        return xopt + smax
\ No newline at end of file
diff --git a/pybobyqa/util.py b/pybobyqa/util.py
index cf5df10..bed86e8 100644
--- a/pybobyqa/util.py
+++ b/pybobyqa/util.py
@@ -31,7 +31,8 @@
 
 
 __all__ = ['sumsq', 'eval_objective', 'model_value', 'random_orthog_directions_within_bounds',
-           'random_directions_within_bounds', 'apply_scaling', 'remove_scaling']
+           'random_directions_within_bounds', 'apply_scaling', 'remove_scaling',
+           'dykstra', 'pball', 'pbox']
 
 module_logger = logging.getLogger(__name__) 
 
@@ -205,3 +206,49 @@ def remove_scaling(x_scaled, scaling_changes):
         return x_scaled
     shift, scale = scaling_changes
     return shift + x_scaled * scale
+
+
+def dykstra(P, x0, max_iter=100, tol=1e-10):
+    # Dykstra's algorithm for computing the projection of x0 into the intersection
+    # of several convex sets, each with its own projection operator.
+    # For more details, including the robust termination condition, see
+    # E. G. Birgin, M. Raydan. Robust Stopping Criteria for Dykstra's Algorithm.
+    # SIAM J. Scientific Computing, 26:4 (2005), pp. 1405-1414.
+
+    # Here, x0 is the point to project, and P is a list of projection functions,
+    # i.e. proj_{ith set}(x) = P[i](x)
+    x = x0.copy()
+    p = len(P)
+    y = np.zeros((p, x0.shape[0]))
+
+    n = 0
+    cI = float('inf')
+    while n < max_iter and cI >= tol:
+        cI = 0.0
+        for i in range(p):
+            # Update iterate
+            prev_x = x.copy()
+            x = P[i](prev_x - y[i,:])
+
+            # Update increment
+            prev_y = y[i, :].copy()
+            y[i, :] = x - (prev_x - prev_y)
+
+            # Stop condition
+            cI += np.linalg.norm(prev_y - y[i, :])**2
+
+        n += 1
+
+    return x
+
+
+def pball(x, c, r):
+    # Projection operator for a Euclidean ball with center c and radius r
+    # i.e. pball(x, c, r) = proj_{B(c,r)}(x)
+    return c + r/max(np.linalg.norm(x-c), r) * (x-c)
+
+
+def pbox(x, l, u):
+    # Projection operator for box constraints, l <= x <= u
+    # i.e. pbox(x, c, r) = proj_{[l,u]}(x)
+    return np.minimum(np.maximum(x, l), u)
\ No newline at end of file