Skip to content

Commit

Permalink
First implementation of convex constraints
Browse files Browse the repository at this point in the history
  • Loading branch information
lindonroberts committed Sep 5, 2024
1 parent 3a3bd50 commit e4e4049
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 24 deletions.
32 changes: 23 additions & 9 deletions pybobyqa/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ def able_to_do_restart(self):


class Controller(object):
def __init__(self, objfun, x0, args, f0, f0_nsamples, xl, xu, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=True):
def __init__(self, objfun, x0, args, f0, f0_nsamples, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=True):
self.objfun = objfun
self.maxfun = maxfun
self.args = args
self.model = Model(npt, x0, f0, xl, xu, f0_nsamples, abs_tol=params("model.abs_tol"),
self.model = Model(npt, x0, f0, xl, xu, projections, f0_nsamples, abs_tol=params("model.abs_tol"),
precondition=params("interpolation.precondition"), do_logging=do_logging)
self.nf = nf
self.nx = nx
Expand Down Expand Up @@ -271,15 +271,24 @@ def initialise_random_directions(self, number_of_samples, num_directions, params
def trust_region_step(self):
# Build model for full least squares objectives
gopt, H = self.model.build_full_model()
try:
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
except ValueError:
# A ValueError may be raised if gopt or H have nan/inf values (issue #14)
# Although this should be picked up earlier, in this situation just return a zero
# trust-region step, which leads to a safety step being called in the main algorithm.
if np.any(np.isinf(gopt)) or np.any(np.isnan(gopt)) or np.any(np.isinf(H)) or np.any(np.isnan(H)):
# Skip computing the step if gopt or H are badly formed
d = np.zeros(gopt.shape)
gnew = gopt.copy()
crvmin = 0.0 # this usually represents 'step on trust-region boundary' but seems to be a sensible default for errors
else:
try:
if self.model.projections is None:
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
else:
d, gnew, crvmin = ctrsbox(self.model.xbase, self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.model.projections, self.delta)
except ValueError:
# A ValueError may be raised if gopt or H have nan/inf values (issue #14)
# Although this should be picked up earlier, in this situation just return a zero
# trust-region step, which leads to a safety step being called in the main algorithm.
d = np.zeros(gopt.shape)
gnew = gopt.copy()
crvmin = 0.0 # this usually represents 'step on trust-region boundary' but seems to be a sensible default for errors
return d, gopt, H, gnew, crvmin

def geometry_step(self, knew, adelt, number_of_samples, params):
Expand All @@ -293,7 +302,12 @@ def geometry_step(self, knew, adelt, number_of_samples, params):

# Solve problem: bounds are sl <= xnew <= su, and ||xnew-xopt|| <= adelt
try:
xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt)
if np.any(np.isinf(g)) or np.any(np.isnan(g)) or np.any(np.isinf(H)) or np.any(np.isnan(H)):
raise ValueError
if self.model.projections is None:
xnew = trsbox_geometry(self.model.xopt(), c, g, H, self.model.sl, self.model.su, adelt)
else:
xnew = ctrsbox_geometry(self.model.xbase, self.model.xopt(), c, g, H, self.model.sl, self.model.su, self.model.projections, adelt)
except ValueError:
# A ValueError may be raised if gopt or H have nan/inf values (issue #23)
# Ideally this should be picked up earlier in self.model.lagrange_polynomial(...)
Expand Down
5 changes: 3 additions & 2 deletions pybobyqa/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

from .hessian import to_upper_triangular_vector
from .trust_region import trsbox_geometry
from .util import sumsq, model_value
from .util import sumsq, model_value, dykstra

__all__ = ['Model']

Expand All @@ -45,7 +45,7 @@


class Model(object):
def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, precondition=True, do_logging=True):
def __init__(self, npt, x0, f0, xl, xu, projections, f0_nsamples, n=None, abs_tol=-1e20, precondition=True, do_logging=True):
if n is None:
n = len(x0)
assert npt >= n + 1, "Require npt >= n+1 for quadratic models"
Expand All @@ -62,6 +62,7 @@ def __init__(self, npt, x0, f0, xl, xu, f0_nsamples, n=None, abs_tol=-1e20, prec
self.xbase = x0.copy()
self.sl = xl - self.xbase # lower bound w.r.t. xbase (require xpt >= sl)
self.su = xu - self.xbase # upper bound w.r.t. xbase (require xpt <= su)
self.projections = projections # list of projection-based constraints (excluding explicit bounds)
self.points = np.zeros((npt, n)) # interpolation points w.r.t. xbase

# Function values
Expand Down
25 changes: 16 additions & 9 deletions pybobyqa/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def __str__(self):
return output


def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
diagnostic_info, scaling_changes, f0_avg_old=None, f0_nsamples_old=None, do_logging=True, print_progress=False):
# Evaluate at x0 (keep nf, nx correct and check for f small)
if f0_avg_old is None:
Expand Down Expand Up @@ -144,7 +144,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
nx = nx_so_far

# Initialise controller
control = Controller(objfun, x0, args, f0_avg, num_samples_run, xl, xu, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=do_logging)
control = Controller(objfun, x0, args, f0_avg, num_samples_run, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params, scaling_changes, do_logging=do_logging)

# Initialise interpolation set
number_of_samples = max(nsamples(control.delta, control.rho, 0, nruns_so_far), 1)
Expand Down Expand Up @@ -665,7 +665,7 @@ def solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns_so_f
return x, f, gradmin, hessmin, nsamples, control.nf, control.nx, nruns_so_far, exit_info, diagnostic_info


def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
def solve(objfun, x0, args=(), bounds=None, projections=None, npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
objfun_has_noise=False, seek_global_minimum=False, scaling_within_bounds=False, do_logging=True, print_progress=False):
n = len(x0)
if type(x0) == list:
Expand Down Expand Up @@ -694,7 +694,11 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
if (xl is None or xu is None) and scaling_within_bounds:
scaling_within_bounds = False
warnings.warn("Ignoring scaling_within_bounds=True for unconstrained problem/1-sided bounds", RuntimeWarning)


if (projections is not None) and scaling_within_bounds:
scaling_within_bounds = False
warnings.warn("Ignoring scaling_within_bounds=True for problems with projections given", RuntimeWarning)

exit_info = None
if seek_global_minimum and (xl is None or xu is None):
exit_info = ExitInformation(EXIT_INPUT_ERROR, "If seeking global minimum, must specify upper and lower bounds")
Expand Down Expand Up @@ -761,6 +765,9 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
if exit_info is None and np.min(xu - xl) < 2.0 * rhobeg:
exit_info = ExitInformation(EXIT_INPUT_ERROR, "gap between lower and upper must be at least 2*rhobeg")

if exit_info is None and projections is not None and type(projections) != list:
exit_info = ExitInformation(EXIT_INPUT_ERROR, "projections must be a list of functions")

if maxfun <= npt:
warnings.warn("maxfun <= npt: Are you sure your budget is large enough?", RuntimeWarning)

Expand Down Expand Up @@ -792,12 +799,12 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
return results

# Enforce lower & upper bounds on x0
idx = (x0 <= xl)
idx = (x0 < xl)
if np.any(idx):
warnings.warn("x0 below lower bound, adjusting", RuntimeWarning)
x0[idx] = xl[idx]

idx = (x0 >= xu)
idx = (x0 > xu)
if np.any(idx):
warnings.warn("x0 above upper bound, adjusting", RuntimeWarning)
x0[idx] = xu[idx]
Expand All @@ -808,7 +815,7 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
nf = 0
nx = 0
xmin, fmin, gradmin, hessmin, nsamples_min, nf, nx, nruns, exit_info, diagnostic_info = \
solve_main(objfun, x0, args, xl, xu, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress)

# Hard restarts loop
Expand All @@ -829,11 +836,11 @@ def solve(objfun, x0, args=(), bounds=None, npt=None, rhobeg=None, rhoend=1e-8,
% (fmin, nf, _rhobeg, _rhoend))
if params("restarts.hard.use_old_fk"):
xmin2, fmin2, gradmin2, hessmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \
solve_main(objfun, xmin, args, xl, xu, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
solve_main(objfun, xmin, args, xl, xu, projections, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
diagnostic_info, scaling_changes, f0_avg_old=fmin, f0_nsamples_old=nsamples_min, do_logging=do_logging, print_progress=print_progress)
else:
xmin2, fmin2, gradmin2, hessmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \
solve_main(objfun, xmin, args, xl, xu, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
solve_main(objfun, xmin, args, xl, xu, projections, npt, _rhobeg, _rhoend, maxfun, nruns, nf, nx, nsamples, params,
diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress)

if fmin2 < fmin or np.isnan(fmin):
Expand Down
109 changes: 106 additions & 3 deletions pybobyqa/trust_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,21 @@
s.t. ||d|| <= delta
sl <= xopt + d <= su
The other outputs: gnew is the gradient of the model at d, and crvmin has
information about the curvature of the model at the solution.
information about the curvature of the model at the solution:
- If ||d||=delta, then crvmin = 0
- If d is constrained in all directions by the box constraints, then crvmin = -1
- Otherwise, crvmin > 0 is the smallest positive curvature seen in the Hessian, min_{k} (dk^T H dk / ||dk||^2) for iterates dk
For problems with general convex constraints, we have an alternative function
d, gnew, crvmin = ctrsbox(xbase, xopt, g, H, sl, su, projections, delta)
which solves the subproblem
min_{d} g'*d + 0.5*d'*H*d
s.t. ||d|| <= delta
sl <= xopt + d <= su
xbase + xopt + d in the intersection of all convex sets C with proj_{C} in the list 'projections'
The outputs are the same as trsbox, but without the negative case for crvmin.
This version is solved using projected gradient descent; see Chapter 10 of
A. Beck, First-Order Methods in Optimization. SIAM, 2017.
Notes
----
Expand Down Expand Up @@ -53,10 +67,10 @@
USE_FORTRAN = False


from .util import sumsq, model_value
from .util import sumsq, model_value, dykstra, pball, pbox


__all__ = ['trsbox', 'trsbox_geometry']
__all__ = ['trsbox', 'trsbox_geometry', 'ctrsbox', 'ctrsbox_geometry']

ZERO_THRESH = 1e-14

Expand Down Expand Up @@ -400,3 +414,92 @@ def trsbox_geometry(xbase, c, g, H, lower, upper, Delta, use_fortran=USE_FORTRAN
return xbase + smin
else:
return xbase + smax


def ctrsbox(xbase, xopt, g, H, sl, su, projections, delta, dykstra_max_iters=100, dykstra_tol=1e-10, gtol=1e-8):
n = xopt.size
assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
assert xbase.shape == (n,), "xbase and xopt has wrong shape (should be vector)"
assert g.shape == (n,), "g and xopt have incompatible sizes"
assert len(H.shape) == 2, "H must be a matrix"
assert H.shape == (n, n), "H and xopt have incompatible sizes"
if not np.allclose(H, H.T):
# Enforce symmetry
H = 0.5 * (H + H.T)
warnings.warn("Trust-region solver: fixing non-symmetric Hessian", RuntimeWarning)
assert sl.shape == (n,), "sl and xopt have incompatible sizes"
assert su.shape == (n,), "su and xopt have incompatible sizes"
assert delta > 0.0, "delta must be strictly positive"

# Get full list of required projections (i.e. including TR constraint and box constraints)
proj_tr = lambda w: pball(w, xbase + xopt, delta)
proj_box = lambda w: pbox(w, xbase + sl, xbase + su)
P = list(projections) # make a copy of the projections list
P.append(proj_tr)
P.append(proj_box)

# Compute the joint projection into all constraints, but using increments from xopt
def proj(d0):
p = dykstra(P, xbase + xopt + d0, max_iter=dykstra_max_iters, tol=dykstra_tol)
# we want the step only, so we subtract xbase+xopt
# from the new point: proj(xk+d) - xk
return p - xopt - xbase

# Compute Lipschitz constant of quadratic objective, L >= ||H||_2
# Stepsize for projected GD will be Lk=L
L = np.linalg.norm(H) # use L = ||H||_F >= ||H|_2 as valid L-smooth constant
L = max(L, delta / np.linalg.norm(g)) # another upper bound; if L~0 then will take steps (1/L)*g, and don't want to go beyond trust-region

MAX_LOOP_ITERS = 100 * n ** 2

d = np.zeros((n,))
gnew = g.copy()
crvmin = -1.0

# projected GD loop
for ii in range(MAX_LOOP_ITERS):
s = proj(d - (1 / L) * gnew) - d # new step is dnew = d + s

# take the step
d += s
gnew += H.dot(s)

# update CRVMIN
crv = np.dot(H.dot(s), s) / sumsq(s) if sumsq(s) >= ZERO_THRESH else crvmin
crvmin = min(crvmin, crv) if crvmin != -1.0 else crv

# exit condition
if np.linalg.norm(s) <= gtol:
break

# Lastly, ensure the explicit bounds are hard-enforced
if np.linalg.norm(d) > delta:
d *= delta / np.linalg.norm(d)
d = np.maximum(np.minimum(xopt + d, su), sl) - xopt # does not increase ||d||
gnew = g + H.dot(d)

if np.linalg.norm(d) >= delta - ZERO_THRESH:
crvmin = 0.0
return d, gnew, crvmin


def ctrsbox_geometry(xbase, xopt, c, g, H, lower, upper, projections, Delta, dykstra_max_iters=100, dykstra_tol=1e-10, gtol=1e-8):
# Given a Lagrange polynomial defined by: L(x) = c + g' * (x - xbase) + 0.5*(x-xbase)*H*(x-xbase)
# Maximise |L(x)| in the feasible region + trust region - that is, solve:
# max_x abs(c + g' * (x - xopt) + 0.5*(x-xopt)*H*(x-xopt))
# s.t. lower <= x <= upper
# ||x-xopt|| <= Delta
# xbase+x in the intersection of all convex sets C with proj_{C} in the list 'projections'
# Setting s = x-xbase (or x = xopt + s), this is equivalent to:
# max_s abs(c + g' * s + 0.5*s*H*s)
# s.t. lower <= xopt + s <= upper
# ||s|| <= Delta
# xbase + xopt + s in the intersection of all convex sets C with proj_{C} in the list 'projections'
smin, gmin, crvmin = ctrsbox(xbase, xopt, g, H, lower, upper, projections, Delta,
dykstra_max_iters=dykstra_max_iters, dykstra_tol=dykstra_tol, gtol=gtol) # minimise L(x)
smax, gmax, crvmax = ctrsbox(xbase, xopt, -g, -H, lower, upper, projections, Delta,
dykstra_max_iters=dykstra_max_iters, dykstra_tol=dykstra_tol, gtol=gtol) # maximise L(x)
if abs(c + model_value(g, H, smin)) >= abs(c + model_value(g, H, smax)): # take largest abs value
return xopt + smin
else:
return xopt + smax
49 changes: 48 additions & 1 deletion pybobyqa/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@


__all__ = ['sumsq', 'eval_objective', 'model_value', 'random_orthog_directions_within_bounds',
'random_directions_within_bounds', 'apply_scaling', 'remove_scaling']
'random_directions_within_bounds', 'apply_scaling', 'remove_scaling',
'dykstra', 'pball', 'pbox']

module_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -205,3 +206,49 @@ def remove_scaling(x_scaled, scaling_changes):
return x_scaled
shift, scale = scaling_changes
return shift + x_scaled * scale


def dykstra(P, x0, max_iter=100, tol=1e-10):
# Dykstra's algorithm for computing the projection of x0 into the intersection
# of several convex sets, each with its own projection operator.
# For more details, including the robust termination condition, see
# E. G. Birgin, M. Raydan. Robust Stopping Criteria for Dykstra's Algorithm.
# SIAM J. Scientific Computing, 26:4 (2005), pp. 1405-1414.

# Here, x0 is the point to project, and P is a list of projection functions,
# i.e. proj_{ith set}(x) = P[i](x)
x = x0.copy()
p = len(P)
y = np.zeros((p, x0.shape[0]))

n = 0
cI = float('inf')
while n < max_iter and cI >= tol:
cI = 0.0
for i in range(p):
# Update iterate
prev_x = x.copy()
x = P[i](prev_x - y[i,:])

# Update increment
prev_y = y[i, :].copy()
y[i, :] = x - (prev_x - prev_y)

# Stop condition
cI += np.linalg.norm(prev_y - y[i, :])**2

n += 1

return x


def pball(x, c, r):
# Projection operator for a Euclidean ball with center c and radius r
# i.e. pball(x, c, r) = proj_{B(c,r)}(x)
return c + r/max(np.linalg.norm(x-c), r) * (x-c)


def pbox(x, l, u):
# Projection operator for box constraints, l <= x <= u
# i.e. pbox(x, c, r) = proj_{[l,u]}(x)
return np.minimum(np.maximum(x, l), u)

0 comments on commit e4e4049

Please sign in to comment.