diff --git a/.github/workflows/rust-clippy.yml b/.github/workflows/rust-clippy.yml index 686be34..82c9a30 100644 --- a/.github/workflows/rust-clippy.yml +++ b/.github/workflows/rust-clippy.yml @@ -28,7 +28,7 @@ jobs: actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install Rust toolchain uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af #@v1 diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index d54fa2f..000bb2c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build run: cargo build --verbose - name: Run tests diff --git a/Cargo.toml b/Cargo.toml index 43db2b6..b380a43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -blas-src = { version = "*", features=["openblas"]} +blas-src = { version = "*", features = ["openblas"] } cblas = "*" num = "*" log = "*" @@ -16,6 +16,8 @@ float_eq = "*" rand = "*" criterion = { version = "*", features = ["html_reports"] } +# Benchmarks with Criterion + [[bench]] name = "bfgs" harness = false @@ -31,3 +33,7 @@ harness = false [[bench]] name = "bfgs_backtracking" harness = false + +[[bench]] +name = "gradient_descent" +harness = false diff --git a/benches/bench_functions.rs b/benches/bench_functions.rs new file mode 100644 index 0000000..f49af12 --- /dev/null +++ b/benches/bench_functions.rs @@ -0,0 +1,10 @@ +// Note: allow(unused) flag to avoid compiler warning + +#[allow(unused)] +// Global minimum: [0., 0.., ...] +pub fn sphere(x: &[f64], _g: &[f64], f: &mut f64, _d: i32) { + *f = 0.; + for v in x { + *f += v * v; + } +} diff --git a/benches/bfgs.rs b/benches/bfgs.rs index 8482744..8d3045d 100644 --- a/benches/bfgs.rs +++ b/benches/bfgs.rs @@ -1,16 +1,11 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; use bfgs::settings::MinimizationAlg; -// Global minimum: [0., 0.., ...] -pub fn sphere(x: &Vec, _g: &Vec, f: &mut f64, d: i32) { - *f = 0.; - for i in 0..d as usize { - *f += x[i] * x[i]; - } -} +mod bench_functions; -fn bench_bfgs(c: &mut Criterion) { +use bench_functions::sphere; +fn bench_bfgs(c: &mut Criterion) { let dims = vec![2, 6, 20, 60, 200]; let mut settings: bfgs::settings::Settings = Default::default(); diff --git a/benches/bfgs_backtracking.rs b/benches/bfgs_backtracking.rs index dfba71c..36be30e 100644 --- a/benches/bfgs_backtracking.rs +++ b/benches/bfgs_backtracking.rs @@ -2,15 +2,11 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion, Benchmark use bfgs::settings::{LineSearchAlg, MinimizationAlg}; // Global minimum: [0., 0.., ...] -pub fn sphere(x: &Vec, _g: &Vec, f: &mut f64, d: i32) { - *f = 0.; - for i in 0..d as usize { - *f += x[i] * x[i]; - } -} +mod bench_functions; -fn bench_bfgs_backtracking(c: &mut Criterion) { +use bench_functions::sphere; +fn bench_bfgs_backtracking(c: &mut Criterion) { let dims = vec![2, 6, 20, 60, 200]; let mut settings: bfgs::settings::Settings = Default::default(); diff --git a/benches/gradient_descent.rs b/benches/gradient_descent.rs new file mode 100644 index 0000000..59d9eb0 --- /dev/null +++ b/benches/gradient_descent.rs @@ -0,0 +1,26 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; +use bfgs::settings::MinimizationAlg; + +mod bench_functions; + +use bench_functions::sphere; + +fn bench_gradient_descent(c: &mut Criterion) { + let dims = vec![2, 6, 20, 60, 200]; + + let mut settings: bfgs::settings::Settings = Default::default(); + settings.minimization = MinimizationAlg::GradientDescent; + + let mut group = c.benchmark_group("gradient_descent"); + + for d in dims { + group.bench_with_input(BenchmarkId::from_parameter(d), &d, |b, &d| { + b.iter(|| bfgs::get_minimum(&sphere, black_box(&mut vec![1.7; d]), &settings)); + }); + } + + group.finish(); +} + +criterion_group!(benches, bench_gradient_descent); +criterion_main!(benches); diff --git a/benches/lbfgs.rs b/benches/lbfgs.rs index 91d2a05..79608a5 100644 --- a/benches/lbfgs.rs +++ b/benches/lbfgs.rs @@ -1,16 +1,11 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; use bfgs::settings::MinimizationAlg; -// Global minimum: [0., 0.., ...] -pub fn sphere(x: &Vec, _g: &Vec, f: &mut f64, d: i32) { - *f = 0.; - for i in 0..d as usize { - *f += x[i] * x[i]; - } -} +mod bench_functions; -fn bench_lbfgs(c: &mut Criterion) { +use bench_functions::sphere; +fn bench_lbfgs(c: &mut Criterion) { let dims = vec![2, 6, 20, 60, 200]; let mut settings: bfgs::settings::Settings = Default::default(); diff --git a/benches/lbfgs_backtracking.rs b/benches/lbfgs_backtracking.rs index 8c8a09d..d1d2880 100644 --- a/benches/lbfgs_backtracking.rs +++ b/benches/lbfgs_backtracking.rs @@ -1,16 +1,11 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId}; use bfgs::settings::{LineSearchAlg, MinimizationAlg}; -// Global minimum: [0., 0.., ...] -pub fn sphere(x: &Vec, _g: &Vec, f: &mut f64, d: i32) { - *f = 0.; - for i in 0..d as usize { - *f += x[i] * x[i]; - } -} +mod bench_functions; -fn bench_lbfgs_backtracking(c: &mut Criterion) { +use bench_functions::sphere; +fn bench_lbfgs_backtracking(c: &mut Criterion) { let dims = vec![2, 6, 20, 60, 200]; let mut settings: bfgs::settings::Settings = Default::default(); diff --git a/src/bfgs.rs b/src/bfgs.rs index 64c3aec..d18de16 100644 --- a/src/bfgs.rs +++ b/src/bfgs.rs @@ -1,8 +1,8 @@ use crate::{exit_condition, line_search}; use crate::settings::Settings; -#[allow(non_snake_case)] -fn Hessian(H: &mut Vec, s: &Vec, y: &Vec, I: &Vec, B: &mut Vec, C: &mut Vec, +#[allow(non_snake_case, clippy::too_many_arguments)] +fn Hessian(H: &mut [f64], s: &[f64], y: &[f64], I: &[f64], B: &mut Vec, C: &mut Vec, d: i32, layout: cblas::Layout, part: cblas::Part) { let rho: f64 = 1. / unsafe { cblas::ddot(d, y, 1, s, 1) }; @@ -25,11 +25,11 @@ fn Hessian(H: &mut Vec, s: &Vec, y: &Vec, I: &Vec, B: &mut V } #[allow(non_snake_case)] -pub fn bfgs(fn_function: &Ef, fn_gradient: &Gf, x: &mut Vec, settings: &Settings) - -> Option +pub fn bfgs(fn_function: &Function, fn_gradient: &Gradient, x: &mut Vec, settings: &Settings) + -> Option where - Ef: Fn(&Vec, &Vec, &mut f64, i32), - Gf: Fn(&Vec, &mut Vec, &f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) { // Settings let iter_max = settings.iter_max; @@ -62,7 +62,7 @@ pub fn bfgs(fn_function: &Ef, fn_gradient: &Gf, x: &mut Vec, settin for i in 0..d { I[(i * (d + 1)) as usize] = 1.; } - unsafe { cblas::dcopy(d * d, &*I, 1, &mut *H, 1); } + unsafe { cblas::dcopy(d * d, &I, 1, &mut H, 1); } // Search direction @@ -96,35 +96,35 @@ pub fn bfgs(fn_function: &Ef, fn_gradient: &Gf, x: &mut Vec, settin k += 1; // Store current values - unsafe { cblas::dcopy(d, &*x, 1, &mut *s, 1); } - unsafe { cblas::dcopy(d, &*g, 1, &mut *y, 1); } + unsafe { cblas::dcopy(d, &*x, 1, &mut s, 1); } + unsafe { cblas::dcopy(d, &g, 1, &mut y, 1); } f_old = f; // Store current values - unsafe { cblas::dsymv(layout, part, d, -1., &*H, d, &*g, 1, 0., &mut *p, 1); } + unsafe { cblas::dsymv(layout, part, d, -1., &H, d, &g, 1, 0., &mut p, 1); } // Save the value of Phi_0 to be used for both line_search - let phi_0: line_search::Point = line_search::Point { a: 0., f: f, d: unsafe { cblas::ddot(d, &*g, 1, &mut *p, 1) } }; + let phi_0: line_search::Point = line_search::Point { a: 0., f, d: unsafe { cblas::ddot(d, &g, 1, &p, 1) } }; // Perform line search (updating a) - if !line_search::line_search(&fn_function, &fn_gradient, &phi_0, &p, x, &mut x_new, &mut g, &mut f, &mut a, d, k, &settings, &mut eval) { + if !line_search::line_search(&fn_function, &fn_gradient, &phi_0, &p, x, &mut x_new, &mut g, &mut f, &mut a, d, k, settings, &mut eval) { eprintln!("ERROR: Line search not converging"); return None; } // Update x with the new values of a - unsafe { cblas::dcopy(d, &*x_new, 1, &mut *x, 1); } + unsafe { cblas::dcopy(d, &x_new, 1, &mut *x, 1); } // Compute -s and -y - unsafe { cblas::daxpy(d, -1., &*x, 1, &mut *s, 1); } - unsafe { cblas::daxpy(d, -1., &*g, 1, &mut *y, 1); } + unsafe { cblas::daxpy(d, -1., &*x, 1, &mut s, 1); } + unsafe { cblas::daxpy(d, -1., &g, 1, &mut y, 1); } // Normalize the Hessian at first iteration if k == 1 { - let ynorm: f64 = unsafe { cblas::dnrm2(d, &*y, 1) }; + let ynorm: f64 = unsafe { cblas::dnrm2(d, &y, 1) }; for i in 0..d { H[(i * (d + 1)) as usize] *= - unsafe { cblas::ddot(d, &*y, 1, &*s, 1) } / (ynorm * ynorm); + unsafe { cblas::ddot(d, &y, 1, &s, 1) } / (ynorm * ynorm); } } @@ -136,7 +136,7 @@ pub fn bfgs(fn_function: &Ef, fn_gradient: &Gf, x: &mut Vec, settin }; // Exit condition - if !exit_condition::evaluate(&x, &g, f, f_old, d, &settings) { + if !exit_condition::evaluate(x, &g, f, f_old, d, settings) { break; } } diff --git a/src/exit_condition.rs b/src/exit_condition.rs index d40b296..6a828b0 100644 --- a/src/exit_condition.rs +++ b/src/exit_condition.rs @@ -1,6 +1,6 @@ use crate::settings::Settings; -pub(crate) fn evaluate(x: &Vec, g: &Vec, f: f64, f_old: f64, d: i32, settings: &Settings) -> bool { +pub(crate) fn evaluate(x: &[f64], g: &[f64], f: f64, f_old: f64, d: i32, settings: &Settings) -> bool { let g_norm = unsafe { cblas::dnrm2(d, g, 1) }; let x_norm = unsafe { cblas::dnrm2(d, x, 1) }; diff --git a/src/gradient_descent.rs b/src/gradient_descent.rs new file mode 100644 index 0000000..7283456 --- /dev/null +++ b/src/gradient_descent.rs @@ -0,0 +1,68 @@ +use crate::Settings; + +pub(crate) fn gradient_descent(fn_function: &Function, fn_gradient: &Gradient, + x: &mut Vec, settings: &Settings) + -> Option + where + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) +{ + // Settings + let iter_max = settings.iter_max; + let verbose = settings.verbose; + + // Get the dimension + let d = x.len() as i32; + + // Function update evaluations + let mut eval: usize = 0; + + // Energy definition + let mut f: f64 = 0.; + + // Gradient definition + let mut g: Vec = vec![0.; d as usize]; + + // Update energy and gradient + fn_function(x, &g, &mut f, d); + fn_gradient(x, &mut g, &f, d); + eval += 1; + + // Iteration counter + let mut iter: usize = 0; + + // Learning rate + let alpha = 0.1; + + // Iteration + while iter < iter_max { + // Update the iteration counter + iter += 1; + + // Update the position + unsafe { cblas::daxpy(d, -alpha, &g, 1, &mut *x, 1); } + + // Update energy and gradient + fn_function(x, &g, &mut f, d); + fn_gradient(x, &mut g, &f, d); + eval += 1; + + let g_norm = unsafe { cblas::dnrm2(d, &g, 1) }; + if g_norm < settings.gtol { + if verbose { + println!("Exit condition reached:"); + println!(" - Iterations: {} ", iter); + println!(" - Function evaluations: {}", eval); + } + return Some(f); + } + } + + if verbose { + println!("Maximum number of iterations reached"); + } + + None +} + + diff --git a/src/lbfgs.rs b/src/lbfgs.rs index 2de493f..de76a25 100644 --- a/src/lbfgs.rs +++ b/src/lbfgs.rs @@ -9,7 +9,7 @@ mod lbfgs_deque; /// Jorge Nocedal Stephen J. Wright "Numerical Optimization" (2nd Edition) /// Algorithm 7.4 (L-BFGS two-loop recursion) #[allow(non_snake_case)] -fn search_direction(history: &VecDeque, p: &mut Vec, g: &Vec, H: &Vec, alpha: &mut Vec, d: i32, settings: &Settings) { +fn search_direction(history: &VecDeque, p: &mut [f64], g: &[f64], H: &[f64], alpha: &mut [f64], d: i32, settings: &Settings) { let m = history.len(); // If no points in history, estimate the Hessian @@ -29,21 +29,21 @@ fn search_direction(history: &VecDeque, p: &mut Vec, g: &Vec< for i in 0..m { let h = &history[i]; alpha[i] = unsafe { - -cblas::ddot(d, &*h.s, 1, p, 1) / - cblas::ddot(d, &*h.y, 1, &*h.s, 1) + -cblas::ddot(d, &h.s, 1, p, 1) / + cblas::ddot(d, &h.y, 1, &h.s, 1) }; - unsafe { cblas::daxpy(d, alpha[i], &*h.y, 1, p, 1); } + unsafe { cblas::daxpy(d, alpha[i], &h.y, 1, p, 1); } } // Compute first version of p with gamma // If M1QN3 is not used, the "Hessian" is considered a scalar if !settings.m1qn3 { let h = &history[0]; - let y = unsafe { cblas::dnrm2(d, &*h.y, 1) }; - unsafe { cblas::dscal(d, -cblas::ddot(d, &*h.y, 1, &*h.s, 1) / (y * y), p, 1); } + let y = unsafe { cblas::dnrm2(d, &h.y, 1) }; + unsafe { cblas::dscal(d, -cblas::ddot(d, &h.y, 1, &h.s, 1) / (y * y), p, 1); } } else { for i in 0..d { - p[i as usize] = -H[i as usize] * p[i as usize]; + p[i as usize] *= -H[i as usize]; } } @@ -51,10 +51,10 @@ fn search_direction(history: &VecDeque, p: &mut Vec, g: &Vec< for i in 0..m { let h = &history[m - 1 - i]; beta = unsafe { - cblas::ddot(d, &*h.y, 1, p, 1) / - cblas::ddot(d, &*h.y, 1, &*h.s, 1) + cblas::ddot(d, &h.y, 1, p, 1) / + cblas::ddot(d, &h.y, 1, &h.s, 1) }; - unsafe { cblas::daxpy(d, -beta + alpha[m - 1 - i], &*h.s, 1, p, 1); } + unsafe { cblas::daxpy(d, -beta + alpha[m - 1 - i], &h.s, 1, p, 1); } } } @@ -65,7 +65,7 @@ fn search_direction(history: &VecDeque, p: &mut Vec, g: &Vec< /// Hessian. Since we are using a vector to store the diagonal part, we can not use blas /// for all computations. #[allow(non_snake_case)] -fn Hessian(H: &mut Vec, s: &Vec, y: &Vec, d: i32) { +fn Hessian(H: &mut [f64], s: &[f64], y: &[f64], d: i32) { let mut dinvss = 0.; let mut dyy = 0.; @@ -90,8 +90,8 @@ fn Hessian(H: &mut Vec, s: &Vec, y: &Vec, d: i32) { pub fn lbfgs(fn_function: &Function, fn_gradient: &Gradient, x: &mut Vec, settings: &Settings) -> Option where - Function: Fn(&Vec, &Vec, &mut f64, i32), - Gradient: Fn(&Vec, &mut Vec, &f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) { // Settings let iter_max = settings.iter_max; @@ -152,28 +152,28 @@ pub fn lbfgs(fn_function: &Function, fn_gradient: &Gradient, let mut y: Vec = vec![0.; d as usize]; // Store current values - unsafe { cblas::dcopy(d, &*x, 1, &mut *s, 1); } - unsafe { cblas::dcopy(d, &*g, 1, &mut *y, 1); } + unsafe { cblas::dcopy(d, &*x, 1, &mut s, 1); } + unsafe { cblas::dcopy(d, &g, 1, &mut y, 1); } f_old = f; // Store current values - search_direction(&history, &mut p, &g, &H, &mut alpha, d, &settings); + search_direction(&history, &mut p, &g, &H, &mut alpha, d, settings); // Save the value of Phi_0 to be used for both line_search - let phi_0: line_search::Point = line_search::Point { a: 0., f: f, d: unsafe { cblas::ddot(d, &*g, 1, &mut *p, 1) } }; + let phi_0: line_search::Point = line_search::Point { a: 0., f, d: unsafe { cblas::ddot(d, &g, 1, &p, 1) } }; // Perform line search (updating a) - if !line_search::line_search(&fn_function, &fn_gradient, &phi_0, &p, x, &mut x_new, &mut g, &mut f, &mut a, d, k, &settings, &mut eval) { + if !line_search::line_search(&fn_function, &fn_gradient, &phi_0, &p, x, &mut x_new, &mut g, &mut f, &mut a, d, k, settings, &mut eval) { eprintln!("ERROR: Line search not converging"); return None; } // Update x with the new values of a - unsafe { cblas::dcopy(d, &*x_new, 1, &mut *x, 1); } + unsafe { cblas::dcopy(d, &x_new, 1, &mut *x, 1); } // Compute -s and -y - unsafe { cblas::daxpy(d, -1., &*x, 1, &mut *s, 1); } - unsafe { cblas::daxpy(d, -1., &*g, 1, &mut *y, 1); } + unsafe { cblas::daxpy(d, -1., &*x, 1, &mut s, 1); } + unsafe { cblas::daxpy(d, -1., &g, 1, &mut y, 1); } // Compute the Hessian if settings.m1qn3 { @@ -185,10 +185,10 @@ pub fn lbfgs(fn_function: &Function, fn_gradient: &Gradient, }; // Store in deque - fifo_operation(&mut history, s, y, &settings); + fifo_operation(&mut history, s, y, settings); // Exit condition - if !exit_condition::evaluate(&x, &g, f, f_old, d, &settings) { + if !exit_condition::evaluate(x, &g, f, f_old, d, settings) { break; } } diff --git a/src/lbfgs/lbfgs_deque.rs b/src/lbfgs/lbfgs_deque.rs index 427ca17..fdfa232 100644 --- a/src/lbfgs/lbfgs_deque.rs +++ b/src/lbfgs/lbfgs_deque.rs @@ -2,12 +2,12 @@ use std::collections::VecDeque; use crate::settings::Settings; pub(super) struct HistoryPoint { - pub(super) s : Vec, - pub(super) y : Vec + pub(super) s: Vec, + pub(super) y: Vec, } -pub(super) fn fifo_operation (q : &mut VecDeque, s : Vec, y : Vec, settings: &Settings) { - q.push_front(HistoryPoint{s, y}); +pub(super) fn fifo_operation(q: &mut VecDeque, s: Vec, y: Vec, settings: &Settings) { + q.push_front(HistoryPoint { s, y }); if q.len() > settings.history_depth { q.pop_back(); } diff --git a/src/lib.rs b/src/lib.rs index b8ea474..10e5ea4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ mod lbfgs; mod line_search; mod exit_condition; mod log; +mod gradient_descent; use crate::settings::Settings; use crate::settings::MinimizationAlg; @@ -41,18 +42,47 @@ use crate::settings::MinimizationAlg; /// The minimum energy value if the algorithm converges within the /// maximum number of iterations specified in the `settings`. Returns /// `None` if the algorithm does not converge. +/// +/// # Examples +/// +/// ```no_run +/// // Import r-bfgs library +/// use bfgs; +/// use bfgs::settings::{LineSearchAlg, MinimizationAlg}; +/// +/// // Create the settings with default parameters +/// let mut settings: bfgs::settings::Settings = Default::default(); +/// // And eventually change some of the settings +/// settings.minimization = MinimizationAlg::Lbfgs; +/// settings.line_search = LineSearchAlg::Backtracking; +/// +/// // Function to be minimized +/// let function = |x: &[f64], g: &[f64], f: &mut f64, d: i32| { +/// *f = 0.; +/// for v in x { +/// *f += v * v; +/// } +/// }; +/// +/// // Set the starting point +/// let mut x = vec![0., -1.]; +/// // Find the minimum +/// let result = bfgs::get_minimum(&function, &mut x, &settings); +/// // Check if the result is found +/// assert_ne!(result, None, "Result not found"); +/// ``` #[allow(non_snake_case)] pub fn get_minimum(fn_function: &Function, x: &mut Vec, settings: &Settings) - -> Option + -> Option where - Function: Fn(&Vec, &Vec, &mut f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32) { // Default gradient - let gf = |x: &Vec, g: &mut Vec, _f: &f64, d: i32| { + let gf = |x: &[f64], g: &mut [f64], _f: &f64, d: i32| { // Finite difference derivative let h = 1e-5; - let mut x_for = x.clone(); - let mut x_bck = x.clone(); + let mut x_for: Vec = x.to_vec(); + let mut x_bck: Vec = x.to_vec(); for i in 0..d { let mut f1 = 0.; let mut f2 = 0.; @@ -100,12 +130,48 @@ pub fn get_minimum(fn_function: &Function, x: &mut Vec, settings: /// The minimum energy value if the algorithm converges within the /// maximum number of iterations specified in the `settings`. Returns /// `None` if the algorithm does not converge. +/// +/// # Examples +/// +/// ```no_run +/// // Import r-bfgs library +/// use bfgs; +/// use bfgs::settings::{LineSearchAlg, MinimizationAlg}; +/// +/// // Create the settings with default parameters +/// let mut settings: bfgs::settings::Settings = Default::default(); +/// // And eventually change some of the settings +/// settings.minimization = MinimizationAlg::Lbfgs; +/// settings.line_search = LineSearchAlg::Backtracking; +/// +/// // Function to be minimized +/// let function = |x: &[f64], g: &[f64], f: &mut f64, _d: i32| { +/// *f = 0.; +/// for v in x { +/// *f += v * v; +/// } +/// }; +/// +/// // Gradient +/// let gradient = |x: &[f64], g: &mut [f64], f: &f64, d: i32| { +/// for i in 0..d as usize { +/// g[i] = 2. * x[i]; +/// } +/// }; +/// +/// // Set the starting point +/// let mut x = vec![0., -1.]; +/// // Find the minimum +/// let result = bfgs::get_minimum_with_gradient(&function, &gradient, &mut x, &settings); +/// // Check if the result is found +/// assert_ne!(result, None, "Result not found"); +/// ``` #[allow(non_snake_case)] -pub fn get_minimum_with_grad(fn_function: &Function, fn_gradient: &Gradient, x: &mut Vec, settings: &Settings) - -> Option +pub fn get_minimum_with_gradient(fn_function: &Function, fn_gradient: &Gradient, x: &mut Vec, settings: &Settings) + -> Option where - Function: Fn(&Vec, &Vec, &mut f64, i32), - Gradient: Fn(&Vec, &mut Vec, &f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) { do_bfgs(fn_function, fn_gradient, x, settings) } @@ -113,8 +179,8 @@ pub fn get_minimum_with_grad(fn_function: &Function, fn_grad fn do_bfgs(fn_function: &Function, fn_gradient: &Gradient, x: &mut Vec, settings: &Settings) -> Option where - Function: Fn(&Vec, &Vec, &mut f64, i32), - Gradient: Fn(&Vec, &mut Vec, &f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) { // Check value of settings if settings.mu > settings.eta { @@ -124,6 +190,10 @@ fn do_bfgs(fn_function: &Function, fn_gradient: &Gradient, x // Handle different minimization methods match settings.minimization { + MinimizationAlg::GradientDescent => { + use crate::gradient_descent::gradient_descent; + gradient_descent(fn_function, fn_gradient, x, settings) + } MinimizationAlg::Bfgs => { use crate::bfgs::bfgs; bfgs(fn_function, fn_gradient, x, settings) @@ -139,10 +209,7 @@ fn do_bfgs(fn_function: &Function, fn_gradient: &Gradient, x Some(f) => Some(f), None => { use crate::lbfgs::lbfgs; - match lbfgs(fn_function, fn_gradient, x, settings) { - Some(f) => Some(f), - None => None - } + lbfgs(fn_function, fn_gradient, x, settings) } } } diff --git a/src/line_search.rs b/src/line_search.rs index 4370830..b50e3d8 100644 --- a/src/line_search.rs +++ b/src/line_search.rs @@ -2,12 +2,14 @@ use std::mem; use crate::settings::{LineSearchAlg, Settings}; /// Simple line search according to Wolfe's condition -pub(crate) fn line_search_simple(ef: &Ef, gf: &Gf, p: &Vec, x: &mut Vec, x_new: &mut Vec, - g: &mut Vec, f: &mut f64, a: &mut f64, d: i32, k_out: usize, settings: &Settings, eval: &mut usize) - -> bool +#[allow(clippy::too_many_arguments)] +pub(crate) fn line_search_simple(ef: &Function, gf: &Gradient, p: &[f64], x: &mut [f64], + x_new: &mut [f64], g: &mut [f64], f: &mut f64, a: &mut f64, + d: i32, k_out: usize, settings: &Settings, eval: &mut usize) + -> bool where - Ef: Fn(&Vec, &Vec, &mut f64, i32), - Gf: Fn(&Vec, &mut Vec, &f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) { // Import settings let mu = settings.mu; @@ -41,7 +43,7 @@ pub(crate) fn line_search_simple(ef: &Ef, gf: &Gf, p: &Vec, x: &mut break; } - *a = *a * 0.5; + *a *= 0.5; // Try to increase the value of initial a or stop the process if the value of a is too low if *a < 1e-6 { @@ -134,7 +136,7 @@ fn trial_value(l: &Point, t: &Point, u: &Point, bracketed: bool) -> (f64, u32) { if (l.a < u.a && ac <= t.a) || (l.a > u.a && ac >= t.a) { ac = u.a }; - let res = if bracketed { if (ac - t.a).abs() < (ar - t.a).abs() { ac } else { ar } } else { if (ac - t.a).abs() > (ar - t.a).abs() { ac } else { ar } }; + let res = if bracketed { if (ac - t.a).abs() < (ar - t.a).abs() { ac } else { ar } } else if (ac - t.a).abs() > (ar - t.a).abs() { ac } else { ar }; return (res, 3); } @@ -145,11 +147,14 @@ fn trial_value(l: &Point, t: &Point, u: &Point, bracketed: bool) -> (f64, u32) { } /// Update the value of the energy and the gradient in the Point structure -pub(crate) fn update_function(ef: &Ef, gf: &Gf, p: &Vec, x: &Vec, x_new: &mut Vec, - g: &mut Vec, f: &mut f64, a: f64, d: i32, phi: &mut Point, eval: &mut usize) +#[allow(clippy::too_many_arguments)] +pub(crate) fn update_function(ef: &Function, gf: &Gradient, p: &[f64], x: &[f64], + x_new: &mut [f64], g: &mut [f64], f: &mut f64, a: f64, d: i32, + phi: &mut Point, eval: &mut usize) where - Ef: Fn(&Vec, &Vec, &mut f64, i32), - Gf: Fn(&Vec, &mut Vec, &f64, i32) { + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) +{ unsafe { cblas::dcopy(d, x, 1, x_new, 1) }; unsafe { cblas::daxpy(d, a, p, 1, x_new, 1) }; @@ -157,16 +162,19 @@ pub(crate) fn update_function(ef: &Ef, gf: &Gf, p: &Vec, x: &Vec(ef: &Ef, gf: &Gf, phi_s: &Point, p: &Vec, x: &Vec, x_new: &mut Vec, - g: &mut Vec, f: &mut f64, a: &mut f64, d: i32, k_out: usize, settings: &Settings, eval: &mut usize, iter_max: usize) - -> bool +#[allow(clippy::too_many_arguments)] +pub(crate) fn line_search_more_thuente(ef: &Function, gf: &Gradient, phi_s: &Point, p: &[f64], + x: &[f64], x_new: &mut [f64], g: &mut [f64], + f: &mut f64, a: &mut f64, d: i32, k_out: usize, + settings: &Settings, eval: &mut usize, iter_max: usize) + -> bool where - Ef: Fn(&Vec, &Vec, &mut f64, i32), - Gf: Fn(&Vec, &mut Vec, &f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) { // Estimate the value of a from the gradient *a = if k_out > 0 { 1. } else { f64::min(1., 1. / unsafe { cblas::dnrm2(d, g, 1) }) }; @@ -183,12 +191,12 @@ pub(crate) fn line_search_more_thuente(ef: &Ef, gf: &Gf, phi_s: &Point, let mut bracketed = false; // Extract starting point from input - let mut phi_0: Point = (*phi_s).clone(); + let mut phi_0: Point = *phi_s; // Change the sign of the derivative to be consistent with paper notation phi_0.d *= -1.; // Temporary points - let mut phi_l: Point = phi_0.clone(); + let mut phi_l: Point = phi_0; let mut phi_u = Point { a: 0., f: 0., d: 0. }; let mut phi_j = Point { a: 0., f: 0., d: 0. }; @@ -284,19 +292,22 @@ pub(crate) fn line_search_more_thuente(ef: &Ef, gf: &Gf, phi_s: &Point, /* Routine for efficient line-search from * Jorge Nocedal Stephen J. Wright "Numerical Optimization" (2nd Edition) * Algorithm 3.5 (Line Search Algorithm) and Algorithm 3.6 (Zoom) */ -pub(crate) fn line_search_backtracking(ef: &Ef, gf: &Gf, phi_0: &Point, p: &Vec, x: &Vec, x_new: &mut Vec, - g: &mut Vec, f: &mut f64, a: &mut f64, d: i32, settings: &Settings, eval: &mut usize, iter_max: usize) - -> bool +#[allow(clippy::too_many_arguments)] +pub(crate) fn line_search_backtracking(ef: &Function, gf: &Gradient, phi_0: &Point, p: &[f64], + x: &[f64], x_new: &mut [f64], g: &mut [f64], + f: &mut f64, a: &mut f64, d: i32, settings: &Settings, + eval: &mut usize, iter_max: usize) + -> bool where - Ef: Fn(&Vec, &Vec, &mut f64, i32), - Gf: Fn(&Vec, &mut Vec, &f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) { // Import settings let mu = settings.mu; let eta = settings.eta; let init_step = *a; - let mut phi_l: Point = phi_0.clone(); + let mut phi_l: Point = *phi_0; let mut phi_u: Point = Point { a: 0., f: 0., d: 0. }; let mut k = 1; @@ -351,19 +362,22 @@ pub(crate) fn line_search_backtracking(ef: &Ef, gf: &Gf, phi_0: &Point, } *a = init_step; - return false; + + false } -pub(crate) fn line_search(ef: &Ef, gf: &Gf, phi_0: &Point, p: &Vec, x: &mut Vec, x_new: &mut Vec, - g: &mut Vec, f: &mut f64, a: &mut f64, d: i32, k_out: usize, settings: &Settings, eval: &mut usize) - -> bool +#[allow(clippy::too_many_arguments)] +pub(crate) fn line_search(ef: &Function, gf: &Gradient, phi_0: &Point, p: &[f64], + x: &mut [f64], x_new: &mut [f64], g: &mut [f64], f: &mut f64, + a: &mut f64, d: i32, k_out: usize, settings: &Settings, eval: &mut usize) + -> bool where - Ef: Fn(&Vec, &Vec, &mut f64, i32), - Gf: Fn(&Vec, &mut Vec, &f64, i32) + Function: Fn(&[f64], &[f64], &mut f64, i32), + Gradient: Fn(&[f64], &mut [f64], &f64, i32) { match settings.line_search { LineSearchAlg::Simple => { - line_search_simple(&ef, &gf, &p, x, x_new, g, f, a, d, k_out, &settings, eval) + line_search_simple(&ef, &gf, p, x, x_new, g, f, a, d, k_out, settings, eval) } /* Find a according to Wolfe's condition: * - more_thuente: check if this can be used to find a (if yes use that a value) @@ -372,7 +386,7 @@ pub(crate) fn line_search(ef: &Ef, gf: &Gf, phi_0: &Point, p: &Vec, */ LineSearchAlg::Backtracking => { line_search_more_thuente(&ef, &gf, phi_0, p, x, x_new, g, f, a, d, k_out, settings, eval, 10) || - line_search_backtracking(&ef, &gf, phi_0, &p, x, x_new, g, f, a, d, &settings, eval, 30) + line_search_backtracking(&ef, &gf, phi_0, p, x, x_new, g, f, a, d, settings, eval, 30) } } } diff --git a/src/log.rs b/src/log.rs index 5353f8d..bcac28b 100644 --- a/src/log.rs +++ b/src/log.rs @@ -1,4 +1,6 @@ -pub(crate) fn print_log(x: &Vec, g: &Vec, p: &Vec, y: &Vec, s: &Vec, f: f64, f_old: f64, k: usize, a: f64, d: i32, eval: usize) { +#[allow(clippy::too_many_arguments)] +pub(crate) fn print_log(x: &[f64], g: &[f64], p: &[f64], y: &[f64], s: &[f64], f: f64, f_old: f64, k: usize, a: f64, + d: i32, eval: usize) { println!("--- Iteration {k}"); println!(" Function evaluations : {}", eval); println!(" Exit condition ||g||/max(1,||x||) : {}", diff --git a/src/settings.rs b/src/settings.rs index 9688001..6950bc2 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -1,6 +1,10 @@ /// Enumerator for minimization algorithm pub enum MinimizationAlg { + /// Simple gradient descent algorithm (slow) + GradientDescent, + /// BFGS algorithm Bfgs, + /// L-BFGS algorithm (memory efficient implementation of BFGS) Lbfgs, /// Use L-BFGS algorithm if BFGS fails BfgsBackup, diff --git a/tests/test_functions.rs b/tests/test_functions.rs index 362293b..8e06e6e 100644 --- a/tests/test_functions.rs +++ b/tests/test_functions.rs @@ -1,17 +1,26 @@ -// allow(unused) flag to avoid compiler warning +// Minimization functions from https://www.sfu.ca/~ssurjano/optimization.html +// Note: allow(unused) flag to avoid compiler warning #[allow(unused)] // Global minimum: [0., 0.., ...] -pub fn sphere(x: &Vec, _g: &Vec, f: &mut f64, d: i32) { +pub fn sphere(x: &[f64], _g: &[f64], f: &mut f64, _d: i32) { *f = 0.; + for v in x { + *f += v * v; + } +} + +#[allow(unused)] +// Global minimum: [0., 0.., ...] +pub fn sphere_gradient(x: &[f64], g: &mut [f64], f: &f64, d: i32) { for i in 0..d as usize { - *f += x[i] * x[i]; + g[i] = 2. * x[i]; } } #[allow(unused)] // Global minimum: [1., 1., ...] -pub fn rosenbrock(x: &Vec, _g: &Vec, f: &mut f64, d: i32) { +pub fn rosenbrock(x: &[f64], _g: &[f64], f: &mut f64, d: i32) { *f = 0.; for i in 0..(d - 1) as usize { let t1 = x[i + 1] - x[i] * x[i]; @@ -22,37 +31,37 @@ pub fn rosenbrock(x: &Vec, _g: &Vec, f: &mut f64, d: i32) { #[allow(unused)] // Global minima: [-2.805118, 3.131312] [3.,2.] [-3.779310, -3.283186] [3.584428, -1.848126] -pub fn himmelblau(x: &Vec, _g: &Vec, f: &mut f64, _d: i32) { +pub fn himmelblau(x: &[f64], _g: &[f64], f: &mut f64, _d: i32) { *f = (x[0] * x[0] + x[1] - 11.) * (x[0] * x[0] + x[1] - 11.) + (x[0] + x[1] * x[1] - 7.) * (x[0] + x[1] * x[1] - 7.); } #[allow(unused)] // Global minimum: [0., 0.] // Local minimum: [-1.74755, 0.873776] -pub fn three_hump_camel(x: &Vec, _g: &Vec, f: &mut f64, _d: i32) { +pub fn three_hump_camel(x: &[f64], _g: &[f64], f: &mut f64, _d: i32) { *f = 2. * x[0] * x[0] - 1.05 * x[0] * x[0] * x[0] * x[0] + x[0] * x[0] * x[0] * x[0] * x[0] * x[0] / 6. + x[0] * x[1] + x[1] * x[1]; } #[allow(unused)] // Global minimum: [-0.54719, -1.54719] -pub fn mccormick(x: &Vec, _g: &Vec, f: &mut f64, _d: i32) { +pub fn mccormick(x: &[f64], _g: &[f64], f: &mut f64, _d: i32) { *f = f64::sin(x[0] + x[1]) + (x[0] - x[1]) * (x[0] - x[1]) - 1.5 * x[0] + 2.5 * x[1] + 1.; } #[allow(unused)] // Global minimum: [-2.903524, -2.903524, ...] // Local minima with 2.7468 on any of the elements of the resulting vector, e.g. [-2.903534, 2.7468] -pub fn styblinki_tang(x: &Vec, _g: &Vec, f: &mut f64, d: i32) { +pub fn styblinki_tang(x: &[f64], _g: &[f64], f: &mut f64, _d: i32) { *f = 0.; - for i in 0..(d as usize) { - *f += x[i] * x[i] * x[i] * x[i] - 16. * x[i] * x[i] + 5. * x[i]; + for v in x { + *f += v * v * v * v - 16. * v * v + 5. * v; } *f *= 0.5; } #[allow(unused)] // Global minimum: [3., 0.5] -pub fn beale(x: &Vec, _g: &Vec, f: &mut f64, _d: i32) { +pub fn beale(x: &[f64], _g: &[f64], f: &mut f64, _d: i32) { *f = (1.5 - x[0] + x[0] * x[1]) * (1.5 - x[0] + x[0] * x[1]) + (2.25 - x[0] + x[0] * x[1] * x[1]) * (2.25 - x[0] + x[0] * x[1] * x[1]) + (2.625 - x[0] + x[0] * x[1] * x[1] * x[1]) * (2.625 - x[0] + x[0] * x[1] * x[1] * x[1]); @@ -61,7 +70,7 @@ pub fn beale(x: &Vec, _g: &Vec, f: &mut f64, _d: i32) { #[allow(unused)] // Global minimum: [0., -1.] // Local minima: [1.2, 0.8] [1.8, 0.2] [-0.6, -0.4] -pub fn goldstein_price(r: &Vec, _g: &Vec, f: &mut f64, _d: i32) { +pub fn goldstein_price(r: &[f64], _g: &[f64], f: &mut f64, _d: i32) { let x = r[0]; let y = r[1]; *f = (1. + (x + y + 1.) * (x + y + 1.) * (19. - 14. * x + 3. * x * x - 14. * y + 6. * x * y + 3. * y * y)) * @@ -70,7 +79,7 @@ pub fn goldstein_price(r: &Vec, _g: &Vec, f: &mut f64, _d: i32) { #[allow(unused)] // Global minimum: [0., -1.] -pub fn booth(r: &Vec, _g: &Vec, f: &mut f64, _d: i32) { +pub fn booth(r: &[f64], _g: &[f64], f: &mut f64, _d: i32) { let x = r[0]; let y = r[1]; *f = (x + 2. * y - 7.) * (x + 2. * y - 7.) + (2. * x + y - 5.) * (2. * x + y - 5.); @@ -78,8 +87,8 @@ pub fn booth(r: &Vec, _g: &Vec, f: &mut f64, _d: i32) { #[allow(unused)] // Global minimum: [0., 0.] -pub fn matyas(r: &Vec, _g: &Vec, f: &mut f64, _d: i32) { +pub fn matyas(r: &[f64], _g: &[f64], f: &mut f64, _d: i32) { let x = r[0]; let y = r[1]; - *f = 0.26 * (x *x + y*y) - 0.48*x*y; + *f = 0.26 * (x * x + y * y) - 0.48 * x * y; } diff --git a/tests/test_gradient_descent.rs b/tests/test_gradient_descent.rs new file mode 100644 index 0000000..cc19f19 --- /dev/null +++ b/tests/test_gradient_descent.rs @@ -0,0 +1,26 @@ +use rand::{Rng, thread_rng}; +use bfgs::settings::MinimizationAlg; + +mod test_functions; +mod test_utils; + +#[test] +fn test_sphere_function() { + use bfgs; + + // Create settings with default parameters + let mut settings: bfgs::settings::Settings = Default::default(); + // Select the minimization algorithm + settings.minimization = MinimizationAlg::GradientDescent; + settings.iter_max = 1000; + + let dims = vec![2, 6, 20, 100, 1000]; + + for d in dims { + let mut x = vec![(); d].into_iter().map(|_| thread_rng().gen_range(-10.0..10.0)).collect(); + let result = bfgs::get_minimum(&test_functions::sphere, &mut x, &settings); + assert_ne!(result, None, "Result not found"); + let cmp = vec![0.; d]; + test_utils::check_result(x, cmp); + } +} diff --git a/tests/tests_bfgs.rs b/tests/tests_bfgs.rs index bd5149e..18fbf26 100644 --- a/tests/tests_bfgs.rs +++ b/tests/tests_bfgs.rs @@ -24,6 +24,27 @@ fn test_sphere_function() { } } +#[test] +fn test_sphere_function_with_gradient() { + use bfgs; + + // Create settings with default parameters + let mut settings: bfgs::settings::Settings = Default::default(); + // Select the minimization algorithm + settings.minimization = MinimizationAlg::Bfgs; + + let dims = vec![2, 6, 20, 100, 1000]; + + for d in dims { + let mut x = vec![(); d].into_iter().map(|_| thread_rng().gen_range(-10.0..10.0)).collect(); + let result = bfgs::get_minimum_with_gradient(&test_functions::sphere, + &test_functions::sphere_gradient, &mut x, &settings); + assert_ne!(result, None, "Result not found"); + let cmp = vec![0.; d]; + test_utils::check_result(x, cmp); + } +} + #[test] fn test_rosenbrock_function() { use bfgs;