From 5a6f0b9177c9fbf4e809692c570c18f61d789c91 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Wed, 17 Jul 2024 09:55:31 +0200 Subject: [PATCH 1/4] Switch from AbstractDifferentiation to DifferentiationInterface --- Project.toml | 6 ++- README.md | 2 +- benchmark/benchmarks.jl | 48 +++++++++---------- docs/Project.toml | 2 +- docs/src/examples/sparse_linear_regression.jl | 10 ++-- docs/src/guide/custom_objectives.jl | 47 +++++++++--------- docs/src/guide/getting_started.jl | 11 ++--- docs/src/index.md | 2 +- src/ProximalAlgorithms.jl | 26 +++++----- src/algorithms/davis_yin.jl | 7 ++- src/algorithms/fast_forward_backward.jl | 7 ++- src/algorithms/forward_backward.jl | 7 ++- src/algorithms/li_lin.jl | 10 ++-- src/algorithms/panoc.jl | 15 +++--- src/algorithms/panocplus.jl | 15 +++--- src/algorithms/primal_dual.jl | 8 ++-- src/algorithms/sfista.jl | 7 ++- src/algorithms/zerofpr.jl | 11 ++--- src/utilities/fb_tools.jl | 36 ++++++-------- test/Project.toml | 2 +- test/problems/test_elasticnet.jl | 8 ++-- test/problems/test_equivalence.jl | 6 +-- test/problems/test_lasso_small.jl | 6 +-- .../test_lasso_small_strongly_convex.jl | 4 +- test/problems/test_linear_programs.jl | 8 ++-- test/problems/test_nonconvex_qp.jl | 6 +-- test/problems/test_sparse_logistic_small.jl | 6 +-- test/problems/test_verbose.jl | 6 +-- test/runtests.jl | 8 ++-- test/utilities/test_ad.jl | 10 ++-- test/utilities/test_fb_tools.jl | 2 +- 31 files changed, 167 insertions(+), 182 deletions(-) diff --git a/Project.toml b/Project.toml index e8e7aab..41a6248 100644 --- a/Project.toml +++ b/Project.toml @@ -3,13 +3,15 @@ uuid = "140ffc9f-1907-541a-a177-7475e0a401e9" version = "0.6.0" [deps] -AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" +ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b" [compat] -AbstractDifferentiation = "0.6" +ADTypes = "1.5.3" +DifferentiationInterface = "0.5.8" LinearAlgebra = "1.2" Printf = "1.2" ProximalCore = "0.1" diff --git a/README.md b/README.md index 1ce9dd2..c52c6a3 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Implemented algorithms include: Check out [this section](https://juliafirstorder.github.io/ProximalAlgorithms.jl/stable/guide/implemented_algorithms/) for an overview of the available algorithms. Algorithms rely on: -- [AbstractDifferentiation.jl](https://github.com/JuliaDiff/AbstractDifferentiation.jl) for automatic differentiation (but you can easily bring your own gradients) +- [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl) for automatic differentiation (but you can easily bring your own gradients) - the [ProximalCore API](https://github.com/JuliaFirstOrder/ProximalCore.jl) for proximal mappings, projections, etc, to handle non-differentiable terms (see for example [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl) for an extensive collection of functions). ## Documentation diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 035b35d..27032f2 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -8,12 +8,12 @@ using FileIO const SUITE = BenchmarkGroup() -function ProximalAlgorithms.value_and_gradient_closure( +function ProximalAlgorithms.value_and_gradient( f::ProximalOperators.LeastSquaresDirect, x, ) res = f.A * x - f.b - norm(res)^2, () -> f.A' * res + norm(res)^2, f.A' * res end struct SquaredDistance{Tb} @@ -22,9 +22,9 @@ end (f::SquaredDistance)(x) = norm(x - f.b)^2 -function ProximalAlgorithms.value_and_gradient_closure(f::SquaredDistance, x) +function ProximalAlgorithms.value_and_gradient(f::SquaredDistance, x) diff = x - f.b - norm(diff)^2, () -> diff + norm(diff)^2, diff end for (benchmark_name, file_name) in [ @@ -45,56 +45,56 @@ for (benchmark_name, file_name) in [ m, n = size(A) SUITE[k]["ForwardBackward"] = - @benchmarkable solver(x0 = x0, f = f, g = g) setup = begin - solver = ProximalAlgorithms.ForwardBackward(tol = 1e-6) + @benchmarkable solver(x0=x0, f=f, g=g) setup = begin + solver = ProximalAlgorithms.ForwardBackward(tol=1e-6) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) g = NormL1($lam) end SUITE[k]["FastForwardBackward"] = - @benchmarkable solver(x0 = x0, f = f, g = g) setup = begin - solver = ProximalAlgorithms.FastForwardBackward(tol = 1e-6) + @benchmarkable solver(x0=x0, f=f, g=g) setup = begin + solver = ProximalAlgorithms.FastForwardBackward(tol=1e-6) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) g = NormL1($lam) end SUITE[k]["ZeroFPR"] = - @benchmarkable solver(x0 = x0, f = f, A = $A, g = g) setup = begin - solver = ProximalAlgorithms.ZeroFPR(tol = 1e-6) + @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup = begin + solver = ProximalAlgorithms.ZeroFPR(tol=1e-6) x0 = zeros($T, size($A, 2)) f = SquaredDistance($b) g = NormL1($lam) end SUITE[k]["PANOC"] = - @benchmarkable solver(x0 = x0, f = f, A = $A, g = g) setup = begin - solver = ProximalAlgorithms.PANOC(tol = 1e-6) + @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup = begin + solver = ProximalAlgorithms.PANOC(tol=1e-6) x0 = zeros($T, size($A, 2)) f = SquaredDistance($b) g = NormL1($lam) end SUITE[k]["PANOCplus"] = - @benchmarkable solver(x0 = x0, f = f, A = $A, g = g) setup = begin - solver = ProximalAlgorithms.PANOCplus(tol = 1e-6) + @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup = begin + solver = ProximalAlgorithms.PANOCplus(tol=1e-6) x0 = zeros($T, size($A, 2)) f = SquaredDistance($b) g = NormL1($lam) end SUITE[k]["DouglasRachford"] = - @benchmarkable solver(x0 = x0, f = f, g = g, gamma = $R(1)) setup = begin - solver = ProximalAlgorithms.DouglasRachford(tol = 1e-6) + @benchmarkable solver(x0=x0, f=f, g=g, gamma=$R(1)) setup = begin + solver = ProximalAlgorithms.DouglasRachford(tol=1e-6) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) g = NormL1($lam) end SUITE[k]["DRLS"] = - @benchmarkable solver(x0 = x0, f = f, g = g, Lf = Lf) setup = begin - solver = ProximalAlgorithms.DRLS(tol = 1e-6) + @benchmarkable solver(x0=x0, f=f, g=g, Lf=Lf) setup = begin + solver = ProximalAlgorithms.DRLS(tol=1e-6) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) Lf = opnorm(($A)' * $A) @@ -102,11 +102,11 @@ for (benchmark_name, file_name) in [ end SUITE[k]["AFBA-1"] = - @benchmarkable solver(x0 = x0, y0 = y0, f = f, g = g, beta_f = beta_f) setup = + @benchmarkable solver(x0=x0, y0=y0, f=f, g=g, beta_f=beta_f) setup = begin beta_f = opnorm($A)^2 solver = - ProximalAlgorithms.AFBA(theta = $R(1), mu = $R(1), tol = $R(1e-6)) + ProximalAlgorithms.AFBA(theta=$R(1), mu=$R(1), tol=$R(1e-6)) x0 = zeros($T, size($A, 2)) y0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) @@ -114,10 +114,10 @@ for (benchmark_name, file_name) in [ end SUITE[k]["AFBA-2"] = - @benchmarkable solver(x0 = x0, y0 = y0, h = h, L = $A, g = g) setup = begin + @benchmarkable solver(x0=x0, y0=y0, h=h, L=$A, g=g) setup = begin beta_f = opnorm($A)^2 solver = - ProximalAlgorithms.AFBA(theta = $R(1), mu = $R(1), tol = $R(1e-6)) + ProximalAlgorithms.AFBA(theta=$R(1), mu=$R(1), tol=$R(1e-6)) x0 = zeros($T, size($A, 2)) y0 = zeros($T, size($A, 1)) h = Translate(SqrNormL2(), -$b) @@ -125,8 +125,8 @@ for (benchmark_name, file_name) in [ end SUITE[k]["SFISTA"] = - @benchmarkable solver(x0 = x0, f = f, Lf = Lf, g = g) setup = begin - solver = ProximalAlgorithms.SFISTA(tol = $R(1e-3)) + @benchmarkable solver(x0=x0, f=f, Lf=Lf, g=g) setup = begin + solver = ProximalAlgorithms.SFISTA(tol=$R(1e-3)) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) g = NormL1($lam) diff --git a/docs/Project.toml b/docs/Project.toml index f38368c..64d2acd 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,5 @@ [deps] -AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" diff --git a/docs/src/examples/sparse_linear_regression.jl b/docs/src/examples/sparse_linear_regression.jl index 771a596..8952a86 100644 --- a/docs/src/examples/sparse_linear_regression.jl +++ b/docs/src/examples/sparse_linear_regression.jl @@ -35,8 +35,8 @@ n_training, n_features = size(training_input) using LinearAlgebra using Statistics -input_loc = mean(training_input, dims = 1) |> vec -input_scale = std(training_input, dims = 1) |> vec +input_loc = mean(training_input, dims=1) |> vec +input_scale = std(training_input, dims=1) |> vec linear_model(wb, input) = input * wb[1:end-1] .+ wb[end] @@ -53,12 +53,12 @@ end mean_squared_error(label, output) = mean((output .- label) .^ 2) / 2 using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalAlgorithms training_loss = ProximalAlgorithms.AutoDifferentiable( wb -> mean_squared_error(training_label, standardized_linear_model(wb, training_input)), - ZygoteBackend(), + AutoZygote(), ) # As regularization we will use the L1 norm, implemented in [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl): @@ -73,7 +73,7 @@ reg = ProximalOperators.NormL1(1) # and the objective terms `f=training_loss` (smooth) and `g=reg` (non smooth). ffb = ProximalAlgorithms.FastForwardBackward() -solution, iterations = ffb(x0 = zeros(n_features + 1), f = training_loss, g = reg) +solution, iterations = ffb(x0=zeros(n_features + 1), f=training_loss, g=reg) # We can now check how well the trained model performs on the test portion of our data. diff --git a/docs/src/guide/custom_objectives.jl b/docs/src/guide/custom_objectives.jl index 95adc32..cf61cc8 100644 --- a/docs/src/guide/custom_objectives.jl +++ b/docs/src/guide/custom_objectives.jl @@ -12,18 +12,18 @@ # # Defining the proximal mapping for a custom function type requires adding a method for [`ProximalCore.prox!`](@ref). # -# To compute gradients, algorithms use [`value_and_gradient_closure`](@ref): -# this relies on [AbstractDifferentiation](https://github.com/JuliaDiff/AbstractDifferentiation.jl), for automatic differentiation +# To compute gradients, algorithms use [`value_and_gradient`](@ref): +# this relies on [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl), for automatic differentiation # with any of its supported backends, when functions are wrapped in [`AutoDifferentiable`](@ref), # as the examples below show. # # If however you would like to provide your own gradient implementation (e.g. for efficiency reasons), -# you can simply implement a method for [`value_and_gradient_closure`](@ref) on your own function type. +# you can simply implement a method for [`value_and_gradient`](@ref) on your own function type. # # ```@docs # ProximalCore.prox # ProximalCore.prox! -# ProximalAlgorithms.value_and_gradient_closure +# ProximalAlgorithms.value_and_gradient # ProximalAlgorithms.AutoDifferentiable # ``` # @@ -32,12 +32,12 @@ # Let's try to minimize the celebrated Rosenbrock function, but constrained to the unit norm ball. The cost function is using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalAlgorithms rosenbrock2D = ProximalAlgorithms.AutoDifferentiable( x -> 100 * (x[2] - x[1]^2)^2 + (1 - x[1])^2, - ZygoteBackend(), + AutoZygote(), ) # To enforce the constraint, we define the indicator of the unit ball, together with its proximal mapping: @@ -63,7 +63,7 @@ end # We can now minimize the function, for which we will use [`PANOC`](@ref), which is a Newton-type method: panoc = ProximalAlgorithms.PANOC() -solution, iterations = panoc(x0 = -ones(2), f = rosenbrock2D, g = IndUnitBall()) +solution, iterations = panoc(x0=-ones(2), f=rosenbrock2D, g=IndUnitBall()) # Plotting the solution against the cost function contour and constraint, gives an idea of its correctness. @@ -73,17 +73,17 @@ contour( -2:0.1:2, -2:0.1:2, (x, y) -> rosenbrock2D([x, y]), - fill = true, - framestyle = :none, - background = nothing, + fill=true, + framestyle=:none, + background=nothing, ) -plot!(Shape(cos.(0:0.01:2*pi), sin.(0:0.01:2*pi)), opacity = 0.5, label = "feasible set") +plot!(Shape(cos.(0:0.01:2*pi), sin.(0:0.01:2*pi)), opacity=0.5, label="feasible set") scatter!( [solution[1]], [solution[2]], - color = :red, - markershape = :star5, - label = "computed solution", + color=:red, + markershape=:star5, + label="computed solution", ) # ## Example: counting operations @@ -105,16 +105,17 @@ end Counting(f::T) where {T} = Counting{T}(f, 0, 0, 0) -# Now we only need to intercept any call to [`value_and_gradient_closure`](@ref) and [`prox!`](@ref) and increase counters there: +function (f::Counting)(x) + f.eval_count += 1 + return f.f(x) +end -function ProximalAlgorithms.value_and_gradient_closure(f::Counting, x) +# Now we only need to intercept any call to [`value_and_gradient`](@ref) and [`prox!`](@ref) and increase counters there: + +function ProximalAlgorithms.value_and_gradient(f::Counting, x) f.eval_count += 1 - fx, pb = ProximalAlgorithms.value_and_gradient_closure(f.f, x) - function counting_pullback() - f.gradient_count += 1 - return pb() - end - return fx, counting_pullback + f.gradient_count += 1 + return ProximalAlgorithms.value_and_gradient(f.f, x) end function ProximalCore.prox!(y, f::Counting, x, gamma) @@ -127,7 +128,7 @@ end f = Counting(rosenbrock2D) g = Counting(IndUnitBall()) -solution, iterations = panoc(x0 = -ones(2), f = f, g = g) +solution, iterations = panoc(x0=-ones(2), f=f, g=g) # and check how many operations where actually performed: diff --git a/docs/src/guide/getting_started.jl b/docs/src/guide/getting_started.jl index 2b4c870..71defe7 100644 --- a/docs/src/guide/getting_started.jl +++ b/docs/src/guide/getting_started.jl @@ -20,7 +20,7 @@ # The literature on proximal operators and algorithms is vast: for an overview, one can refer to [Parikh2014](@cite), [Beck2017](@cite). # # To evaluate these first-order primitives, in ProximalAlgorithms: -# * ``\nabla f_i`` falls back to using automatic differentiation (as provided by [AbstractDifferentiation](https://github.com/JuliaDiff/AbstractDifferentiation.jl) and all of its backends). +# * ``\nabla f_i`` falls back to using automatic differentiation (as provided by [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl) and all of its backends). # * ``\operatorname{prox}_{f_i}`` relies on the intereface of [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl) (>= 0.15). # Both of the above can be implemented for custom function types, as [documented here](@ref custom_terms). # @@ -52,13 +52,13 @@ using LinearAlgebra using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalOperators using ProximalAlgorithms quadratic_cost = ProximalAlgorithms.AutoDifferentiable( x -> dot([3.4 1.2; 1.2 4.5] * x, x) / 2 + dot([-2.3, 9.9], x), - ZygoteBackend(), + AutoZygote(), ) box_indicator = ProximalOperators.IndBox(0, 1) @@ -72,10 +72,9 @@ ffb = ProximalAlgorithms.FastForwardBackward(maxit = 1000, tol = 1e-5, verbose = solution, iterations = ffb(x0 = ones(2), f = quadratic_cost, g = box_indicator) # We can verify the correctness of the solution by checking that the negative gradient is orthogonal to the constraints, pointing outwards: -# for this, we just evaluate the closure `cl` returned as second output of [`value_and_gradient_closure`](@ref). +# for this, we just evaluate the second output of [`value_and_gradient`](@ref). -v, cl = ProximalAlgorithms.value_and_gradient_closure(quadratic_cost, solution) --cl() +last(ProximalAlgorithms.value_and_gradient(quadratic_cost, solution)) # Or by plotting the solution against the cost function and constraint: diff --git a/docs/src/index.md b/docs/src/index.md index ef8c44a..ef7354d 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -14,7 +14,7 @@ Implemented algorithms include: Check out [this section](@ref problems_algorithms) for an overview of the available algorithms. Algorithms rely on: -- [AbstractDifferentiation.jl](https://github.com/JuliaDiff/AbstractDifferentiation.jl) for automatic differentiation (but you can easily bring your own gradients), +- [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl) for automatic differentiation (but you can easily bring your own gradients), - the [ProximalCore API](https://github.com/JuliaFirstOrder/ProximalCore.jl) for proximal mappings, projections, etc, to handle non-differentiable terms (see for example [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl) for an extensive collection of functions). !!! note diff --git a/src/ProximalAlgorithms.jl b/src/ProximalAlgorithms.jl index fb43534..e248613 100644 --- a/src/ProximalAlgorithms.jl +++ b/src/ProximalAlgorithms.jl @@ -1,6 +1,7 @@ module ProximalAlgorithms -using AbstractDifferentiation +using ADTypes: ADTypes +using DifferentiationInterface: DifferentiationInterface using ProximalCore using ProximalCore: prox, prox! @@ -12,11 +13,11 @@ const Maybe{T} = Union{T,Nothing} Callable struct wrapping function `f` to be auto-differentiated using `backend`. -When called, it evaluates the same as `f`, while [`value_and_gradient_closure`](@ref) +When called, it evaluates the same as `f`, while its gradient is implemented using `backend` for automatic differentiation. -The backend can be any from [AbstractDifferentiation](https://github.com/JuliaDiff/AbstractDifferentiation.jl). +The backend can be any of those supported by [DifferentiationInterface.jl](https://github.com/gdalle/DifferentiationInterface.jl). """ -struct AutoDifferentiable{F,B} +struct AutoDifferentiable{F,B<:ADTypes.AbstractADType} f::F backend::B end @@ -24,21 +25,18 @@ end (f::AutoDifferentiable)(x) = f.f(x) """ - value_and_gradient_closure(f, x) + value_and_gradient(f, x) -Return a tuple containing the value of `f` at `x`, and a closure `cl`. - -Function `cl`, once called, yields the gradient of `f` at `x`. +Return a tuple containing the value of `f` at `x` and the gradient of `f` at `x`. """ -value_and_gradient_closure +value_and_gradient -function value_and_gradient_closure(f::AutoDifferentiable, x) - fx, pb = AbstractDifferentiation.value_and_pullback_function(f.backend, f.f, x) - return fx, () -> pb(one(fx))[1] +function value_and_gradient(f::AutoDifferentiable, x) + return DifferentiationInterface.value_and_gradient(f.f, f.backend, x) end -function value_and_gradient_closure(f::ProximalCore.Zero, x) - f(x), () -> zero(x) +function value_and_gradient(f::ProximalCore.Zero, x) + return f(x), zero(x) end # various utilities diff --git a/src/algorithms/davis_yin.jl b/src/algorithms/davis_yin.jl index 1c633ee..55a4c79 100644 --- a/src/algorithms/davis_yin.jl +++ b/src/algorithms/davis_yin.jl @@ -56,8 +56,7 @@ end function Base.iterate(iter::DavisYinIteration) z = copy(iter.x0) xg, = prox(iter.g, z, iter.gamma) - f_xg, cl = value_and_gradient_closure(iter.f, xg) - grad_f_xg = cl() + f_xg, grad_f_xg = value_and_gradient(iter.f, xg) z_half = 2 .* xg .- z .- iter.gamma .* grad_f_xg xh, = prox(iter.h, z_half, iter.gamma) res = xh - xg @@ -68,8 +67,8 @@ end function Base.iterate(iter::DavisYinIteration, state::DavisYinState) prox!(state.xg, iter.g, state.z, iter.gamma) - f_xg, cl = value_and_gradient_closure(iter.f, state.xg) - state.grad_f_xg .= cl() + f_xg, grad_f_xg = value_and_gradient(iter.f, state.xg) + state.grad_f_xg .= grad_f_xg state.z_half .= 2 .* state.xg .- state.z .- iter.gamma .* state.grad_f_xg prox!(state.xh, iter.h, state.z_half, iter.gamma) state.res .= state.xh .- state.xg diff --git a/src/algorithms/fast_forward_backward.jl b/src/algorithms/fast_forward_backward.jl index adf4c64..c4ccb3f 100644 --- a/src/algorithms/fast_forward_backward.jl +++ b/src/algorithms/fast_forward_backward.jl @@ -72,8 +72,7 @@ end function Base.iterate(iter::FastForwardBackwardIteration) x = copy(iter.x0) - f_x, cl = value_and_gradient_closure(iter.f, x) - grad_f_x = cl() + f_x, grad_f_x = value_and_gradient(iter.f, x) gamma = iter.gamma === nothing ? 1 / lower_bound_smoothness_constant(iter.f, I, x, grad_f_x) : iter.gamma @@ -136,8 +135,8 @@ function Base.iterate( state.x .= state.z .+ beta .* (state.z .- state.z_prev) state.z_prev, state.z = state.z, state.z_prev - state.f_x, cl = value_and_gradient_closure(iter.f, state.x) - state.grad_f_x .= cl() + state.f_x, grad_f_x = value_and_gradient(iter.f, state.x) + state.grad_f_x .= grad_f_x state.y .= state.x .- state.gamma .* state.grad_f_x state.g_z = prox!(state.z, iter.g, state.y, state.gamma) state.res .= state.x .- state.z diff --git a/src/algorithms/forward_backward.jl b/src/algorithms/forward_backward.jl index 2ba7f8c..574389b 100644 --- a/src/algorithms/forward_backward.jl +++ b/src/algorithms/forward_backward.jl @@ -64,8 +64,7 @@ end function Base.iterate(iter::ForwardBackwardIteration) x = copy(iter.x0) - f_x, cl = value_and_gradient_closure(iter.f, x) - grad_f_x = cl() + f_x, grad_f_x = value_and_gradient(iter.f, x) gamma = iter.gamma === nothing ? 1 / lower_bound_smoothness_constant(iter.f, I, x, grad_f_x) : iter.gamma @@ -111,8 +110,8 @@ function Base.iterate( state.grad_f_x, state.grad_f_z = state.grad_f_z, state.grad_f_x else state.x, state.z = state.z, state.x - state.f_x, cl = value_and_gradient_closure(iter.f, state.x) - state.grad_f_x .= cl() + state.f_x, grad_f_x = value_and_gradient(iter.f, state.x) + state.grad_f_x .= grad_f_x end state.y .= state.x .- state.gamma .* state.grad_f_x diff --git a/src/algorithms/li_lin.jl b/src/algorithms/li_lin.jl index 6fbcde3..0889024 100644 --- a/src/algorithms/li_lin.jl +++ b/src/algorithms/li_lin.jl @@ -62,8 +62,7 @@ end function Base.iterate(iter::LiLinIteration{R}) where {R} y = copy(iter.x0) - f_y, cl = value_and_gradient_closure(iter.f, y) - grad_f_y = cl() + f_y, grad_f_y = value_and_gradient(iter.f, y) # TODO: initialize gamma if not provided # TODO: authors suggest Barzilai-Borwein rule? @@ -110,8 +109,7 @@ function Base.iterate(iter::LiLinIteration{R}, state::LiLinState{R,Tx}) where {R else # TODO: re-use available space in state? # TODO: backtrack gamma at x - f_x, cl = value_and_gradient_closure(iter.f, x) - grad_f_x = cl() + f_x, grad_f_x = value_and_gradient(iter.f, x) x_forward = state.x - state.gamma .* grad_f_x v, g_v = prox(iter.g, x_forward, state.gamma) Fv = iter.f(v) + g_v @@ -130,8 +128,8 @@ function Base.iterate(iter::LiLinIteration{R}, state::LiLinState{R,Tx}) where {R Fx = Fv end - state.f_y, cl = value_and_gradient_closure(iter.f, state.y) - state.grad_f_y .= cl() + state.f_y, grad_f_y = value_and_gradient(iter.f, state.y) + state.grad_f_y .= grad_f_y state.y_forward .= state.y .- state.gamma .* state.grad_f_y state.g_z = prox!(state.z, iter.g, state.y_forward, state.gamma) diff --git a/src/algorithms/panoc.jl b/src/algorithms/panoc.jl index bea282e..c7f2558 100644 --- a/src/algorithms/panoc.jl +++ b/src/algorithms/panoc.jl @@ -87,8 +87,7 @@ f_model(iter::PANOCIteration, state::PANOCState) = function Base.iterate(iter::PANOCIteration{R}) where {R} x = copy(iter.x0) Ax = iter.A * x - f_Ax, cl = value_and_gradient_closure(iter.f, Ax) - grad_f_Ax = cl() + f_Ax, grad_f_Ax = value_and_gradient(iter.f, Ax) gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : @@ -182,8 +181,8 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R state.x_d .= state.x .+ state.d state.Ax_d .= state.Ax .+ state.Ad - state.f_Ax_d, cl = value_and_gradient_closure(iter.f, state.Ax_d) - state.grad_f_Ax_d .= cl() + state.f_Ax_d, grad_f_Ax_d = value_and_gradient(iter.f, state.Ax_d) + state.grad_f_Ax_d .= grad_f_Ax_d mul!(state.At_grad_f_Ax_d, adjoint(iter.A), state.grad_f_Ax_d) copyto!(state.x, state.x_d) @@ -220,8 +219,8 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R # along a line using interpolation and linear combinations # this allows saving operations if isinf(f_Az) - f_Az, cl = value_and_gradient_closure(iter.f, state.Az) - state.grad_f_Az .= cl() + f_Az, grad_f_Az = value_and_gradient(iter.f, state.Az) + state.grad_f_Az .= grad_f_Az end if isinf(c) mul!(state.At_grad_f_Az, iter.A', state.grad_f_Az) @@ -239,8 +238,8 @@ function Base.iterate(iter::PANOCIteration{R,Tx,Tf}, state::PANOCState) where {R else # otherwise, in the general case where f is only smooth, we compute # one gradient and matvec per backtracking step - state.f_Ax, cl = value_and_gradient_closure(iter.f, state.Ax) - state.grad_f_Ax .= cl() + state.f_Ax, grad_f_Ax = value_and_gradient(iter.f, state.Ax) + state.grad_f_Ax .= grad_f_Ax mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax) end diff --git a/src/algorithms/panocplus.jl b/src/algorithms/panocplus.jl index 553d0a4..d407039 100644 --- a/src/algorithms/panocplus.jl +++ b/src/algorithms/panocplus.jl @@ -80,8 +80,7 @@ f_model(iter::PANOCplusIteration, state::PANOCplusState) = function Base.iterate(iter::PANOCplusIteration{R}) where {R} x = copy(iter.x0) Ax = iter.A * x - f_Ax, cl = value_and_gradient_closure(iter.f, Ax) - grad_f_Ax = cl() + f_Ax, grad_f_Ax = value_and_gradient(iter.f, Ax) gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : @@ -122,8 +121,8 @@ function Base.iterate(iter::PANOCplusIteration{R}) where {R} ) else mul!(state.Az, iter.A, state.z) - f_Az, cl = value_and_gradient_closure(iter.f, state.Az) - state.grad_f_Az = cl() + f_Az, grad_f_Az = value_and_gradient(iter.f, state.Az) + state.grad_f_Az = grad_f_Az end mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az) return state, state @@ -198,8 +197,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where end mul!(state.Ax, iter.A, state.x) - state.f_Ax, cl = value_and_gradient_closure(iter.f, state.Ax) - state.grad_f_Ax .= cl() + state.f_Ax, grad_f_Ax = value_and_gradient(iter.f, state.Ax) + state.grad_f_Ax .= grad_f_Ax mul!(state.At_grad_f_Ax, adjoint(iter.A), state.grad_f_Ax) state.y .= state.x .- state.gamma .* state.At_grad_f_Ax @@ -209,8 +208,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where f_Az_upp = f_model(iter, state) mul!(state.Az, iter.A, state.z) - f_Az, cl = value_and_gradient_closure(iter.f, state.Az) - state.grad_f_Az .= cl() + f_Az, grad_f_Az = value_and_gradient(iter.f, state.Az) + state.grad_f_Az .= grad_f_Az if (iter.gamma === nothing || iter.adaptive == true) tol = 10 * eps(R) * (1 + abs(f_Az)) if f_Az > f_Az_upp + tol && state.gamma >= iter.minimum_gamma diff --git a/src/algorithms/primal_dual.jl b/src/algorithms/primal_dual.jl index 15c0375..9077da7 100644 --- a/src/algorithms/primal_dual.jl +++ b/src/algorithms/primal_dual.jl @@ -175,8 +175,8 @@ function Base.iterate( state::AFBAState = AFBAState(x = copy(iter.x0), y = copy(iter.y0)), ) # perform xbar-update step - f_x, cl = value_and_gradient_closure(iter.f, state.x) - state.gradf .= cl() + f_x, gradf = value_and_gradient(iter.f, state.x) + state.gradf .= gradf mul!(state.temp_x, iter.L', state.y) state.temp_x .+= state.gradf state.temp_x .*= -iter.gamma[1] @@ -184,8 +184,8 @@ function Base.iterate( prox!(state.xbar, iter.g, state.temp_x, iter.gamma[1]) # perform ybar-update step - lc_y, cl = value_and_gradient_closure(convex_conjugate(iter.l), state.y) - state.gradl .= cl() + lc_y, gradl = value_and_gradient(convex_conjugate(iter.l), state.y) + state.gradl .= gradl state.temp_x .= iter.theta .* state.xbar .+ (1 - iter.theta) .* state.x mul!(state.temp_y, iter.L, state.temp_x) state.temp_y .-= state.gradl diff --git a/src/algorithms/sfista.jl b/src/algorithms/sfista.jl index 1c6c6c6..3e9458e 100644 --- a/src/algorithms/sfista.jl +++ b/src/algorithms/sfista.jl @@ -71,8 +71,8 @@ function Base.iterate( state.a = (state.τ + sqrt(state.τ^2 + 4 * state.τ * state.APrev)) / 2 state.A = state.APrev + state.a state.xt .= (state.APrev / state.A) .* state.yPrev + (state.a / state.A) .* state.xPrev - f_xt, cl = value_and_gradient_closure(iter.f, state.xt) - state.gradf_xt .= cl() + f_xt, gradf_xt = value_and_gradient(iter.f, state.xt) + state.gradf_xt .= gradf_xt λ2 = state.λ / (1 + state.λ * iter.mf) # FISTA acceleration steps. prox!(state.y, iter.g, state.xt - λ2 * state.gradf_xt, λ2) @@ -97,8 +97,7 @@ function check_sc(state::SFISTAState, iter::SFISTAIteration, tol, termination_ty else # Classic (approximate) first-order stationary point [4]. The main inclusion is: r ∈ ∇f(y) + ∂h(y). λ2 = state.λ / (1 + state.λ * iter.mf) - f_y, cl = value_and_gradient_closure(iter.f, state.y) - gradf_y = cl() + f_y, gradf_y = value_and_gradient(iter.f, state.y) r = gradf_y - state.gradf_xt + (state.xt - state.y) / λ2 res = norm(r) end diff --git a/src/algorithms/zerofpr.jl b/src/algorithms/zerofpr.jl index cf49257..8830969 100644 --- a/src/algorithms/zerofpr.jl +++ b/src/algorithms/zerofpr.jl @@ -85,8 +85,7 @@ f_model(iter::ZeroFPRIteration, state::ZeroFPRState) = function Base.iterate(iter::ZeroFPRIteration{R}) where {R} x = copy(iter.x0) Ax = iter.A * x - f_Ax, cl = value_and_gradient_closure(iter.f, Ax) - grad_f_Ax = cl() + f_Ax, grad_f_Ax = value_and_gradient(iter.f, Ax) gamma = iter.gamma === nothing ? iter.alpha / lower_bound_smoothness_constant(iter.f, iter.A, x, grad_f_Ax) : @@ -166,8 +165,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R} f_Axbar_upp, f_Axbar else mul!(state.Axbar, iter.A, state.xbar) - f_Axbar, cl = value_and_gradient_closure(iter.f, state.Axbar) - state.grad_f_Axbar .= cl() + f_Axbar, grad_f_Axbar = value_and_gradient(iter.f, state.Axbar) + state.grad_f_Axbar .= grad_f_Axbar f_model(iter, state), f_Axbar end @@ -202,8 +201,8 @@ function Base.iterate(iter::ZeroFPRIteration{R}, state::ZeroFPRState) where {R} state.x .= state.xbar_prev .+ state.tau .* state.d state.Ax .= state.Axbar .+ state.tau .* state.Ad # TODO: can precompute most of next line in case f is quadratic - state.f_Ax, cl = value_and_gradient_closure(iter.f, state.Ax) - state.grad_f_Ax .= cl() + state.f_Ax, grad_f_Ax = value_and_gradient(iter.f, state.Ax) + state.grad_f_Ax .= grad_f_Ax mul!(state.At_grad_f_Ax, iter.A', state.grad_f_Ax) state.y .= state.x .- state.gamma .* state.At_grad_f_Ax state.g_xbar = prox!(state.xbar, iter.g, state.y, state.gamma) diff --git a/src/utilities/fb_tools.jl b/src/utilities/fb_tools.jl index 6ebe71d..0c88c8a 100644 --- a/src/utilities/fb_tools.jl +++ b/src/utilities/fb_tools.jl @@ -7,16 +7,14 @@ end function lower_bound_smoothness_constant(f, A, x, grad_f_Ax) R = real(eltype(x)) xeps = x .+ 1 - f_Axeps, cl = value_and_gradient_closure(f, A * xeps) - grad_f_Axeps = cl() + f_Axeps, grad_f_Axeps = value_and_gradient(f, A * xeps) return norm(A' * (grad_f_Axeps - grad_f_Ax)) / R(sqrt(length(x))) end function lower_bound_smoothness_constant(f, A, x) R = real(eltype(x)) Ax = A * x - f_Ax, cl = value_and_gradient_closure(f, Ax) - grad_f_Ax = cl() + f_Ax, grad_f_Ax = value_and_gradient(f, Ax) return lower_bound_smoothness_constant(f, A, x, grad_f_Ax) end @@ -36,14 +34,14 @@ function backtrack_stepsize!( g_z::R, res, Az, - grad_f_Az = nothing; - alpha = R(1), - minimum_gamma = R(1e-7), - reduce_gamma = R(0.5), + grad_f_Az=nothing; + alpha=R(1), + minimum_gamma=R(1e-7), + reduce_gamma=R(0.5), ) where {R} f_Az_upp = f_model(f_Ax, At_grad_f_Ax, res, alpha / gamma) _mul!(Az, A, z) - f_Az, cl = value_and_gradient_closure(f, Az) + f_Az, grad_f_Az = value_and_gradient(f, Az) tol = 10 * eps(R) * (1 + abs(f_Az)) while f_Az > f_Az_upp + tol && gamma >= minimum_gamma gamma *= reduce_gamma @@ -52,12 +50,9 @@ function backtrack_stepsize!( res .= x .- z f_Az_upp = f_model(f_Ax, At_grad_f_Ax, res, alpha / gamma) _mul!(Az, A, z) - f_Az, cl = value_and_gradient_closure(f, Az) + f_Az, grad_f_Az = value_and_gradient(f, Az) tol = 10 * eps(R) * (1 + abs(f_Az)) end - if grad_f_Az !== nothing - grad_f_Az .= cl() - end if gamma < minimum_gamma @warn "stepsize `gamma` became too small ($(gamma))" end @@ -70,13 +65,12 @@ function backtrack_stepsize!( A, g, x; - alpha = R(1), - minimum_gamma = R(1e-7), - reduce_gamma = R(0.5), + alpha=R(1), + minimum_gamma=R(1e-7), + reduce_gamma=R(0.5), ) where {R} Ax = A * x - f_Ax, cl = value_and_gradient_closure(f, Ax) - grad_f_Ax = cl() + f_Ax, grad_f_Ax = value_and_gradient(f, Ax) At_grad_f_Ax = A' * grad_f_Ax y = x - gamma .* At_grad_f_Ax z, g_z = prox(g, y, gamma) @@ -94,8 +88,8 @@ function backtrack_stepsize!( x - z, Ax, grad_f_Ax; - alpha = alpha, - minimum_gamma = minimum_gamma, - reduce_gamma = reduce_gamma, + alpha=alpha, + minimum_gamma=minimum_gamma, + reduce_gamma=reduce_gamma, ) end diff --git a/test/Project.toml b/test/Project.toml index 7241417..004bce7 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,7 +1,7 @@ [deps] -AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" AbstractOperators = "d9c5613a-d543-52d8-9afd-8f241a8c3f1c" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/test/problems/test_elasticnet.jl b/test/problems/test_elasticnet.jl index 3db580a..274d8ea 100644 --- a/test/problems/test_elasticnet.jl +++ b/test/problems/test_elasticnet.jl @@ -2,7 +2,7 @@ using LinearAlgebra using ProximalOperators: NormL1, SqrNormL2, ElasticNet, Translate using ProximalAlgorithms using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote @testset "Elastic net ($T)" for T in [Float32, Float64, ComplexF32, ComplexF64] A = T[ @@ -22,7 +22,7 @@ using AbstractDifferentiation: ZygoteBackend reg2 = SqrNormL2(R(1)) loss = Translate(SqrNormL2(R(1)), -b) cost = - ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, ZygoteBackend()) + ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, AutoZygote()) L = opnorm(A)^2 @@ -68,7 +68,7 @@ using AbstractDifferentiation: ZygoteBackend (x_afba, y_afba), it_afba = solver( x0 = x0, y0 = y0, - f = ProximalAlgorithms.AutoDifferentiable(reg2, ZygoteBackend()), + f = ProximalAlgorithms.AutoDifferentiable(reg2, AutoZygote()), g = reg1, h = loss, L = A, @@ -92,7 +92,7 @@ using AbstractDifferentiation: ZygoteBackend (x_afba, y_afba), it_afba = solver( x0 = x0, y0 = y0, - f = ProximalAlgorithms.AutoDifferentiable(reg2, ZygoteBackend()), + f = ProximalAlgorithms.AutoDifferentiable(reg2, AutoZygote()), g = reg1, h = loss, L = A, diff --git a/test/problems/test_equivalence.jl b/test/problems/test_equivalence.jl index 42d73ad..54c96f7 100644 --- a/test/problems/test_equivalence.jl +++ b/test/problems/test_equivalence.jl @@ -1,7 +1,7 @@ using LinearAlgebra using Test using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalOperators: LeastSquares, NormL1 using ProximalAlgorithms: DouglasRachfordIteration, @@ -63,7 +63,7 @@ end lam = R(0.1) * norm(A' * b, Inf) - f = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, ZygoteBackend()) + f = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, AutoZygote()) g = NormL1(lam) x0 = zeros(R, n) @@ -98,7 +98,7 @@ end lam = R(0.1) * norm(A' * b, Inf) - f = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, ZygoteBackend()) + f = ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, AutoZygote()) g = NormL1(lam) x0 = zeros(R, n) diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl index 2c2faec..8ffa2df 100644 --- a/test/problems/test_lasso_small.jl +++ b/test/problems/test_lasso_small.jl @@ -2,7 +2,7 @@ using LinearAlgebra using Test using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalOperators: NormL1, LeastSquares using ProximalAlgorithms using ProximalAlgorithms: @@ -30,9 +30,9 @@ using ProximalAlgorithms: @test typeof(lam) == R f_autodiff = - ProximalAlgorithms.AutoDifferentiable(x -> (norm(x - b)^2) / 2, ZygoteBackend()) + ProximalAlgorithms.AutoDifferentiable(x -> (norm(x - b)^2) / 2, AutoZygote()) fA_autodiff = - ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, ZygoteBackend()) + ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, AutoZygote()) f_prox = Translate(SqrNormL2(R(1)), -b) fA_prox = LeastSquares(A, b) g = NormL1(lam) diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl index 476efec..b282c57 100644 --- a/test/problems/test_lasso_small_strongly_convex.jl +++ b/test/problems/test_lasso_small_strongly_convex.jl @@ -2,7 +2,7 @@ using LinearAlgebra using Test using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalOperators: NormL1, LeastSquares using ProximalAlgorithms @@ -45,7 +45,7 @@ using ProximalAlgorithms fA_prox = LeastSquares(A, b) fA_autodiff = - ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, ZygoteBackend()) + ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, AutoZygote()) g = NormL1(lam) TOL = T(1e-4) diff --git a/test/problems/test_linear_programs.jl b/test/problems/test_linear_programs.jl index d0aea87..54a908a 100644 --- a/test/problems/test_linear_programs.jl +++ b/test/problems/test_linear_programs.jl @@ -1,5 +1,5 @@ using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalOperators: Linear, IndNonnegative, IndPoint, IndAffine, SlicedSeparableSum using ProximalAlgorithms using LinearAlgebra @@ -101,7 +101,7 @@ end @testset "AFBA" begin - f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), ZygoteBackend()) + f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), AutoZygote()) g = IndNonnegative() h = IndPoint(b) @@ -127,7 +127,7 @@ end @testset "VuCondat" begin - f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), ZygoteBackend()) + f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), AutoZygote()) g = IndNonnegative() h = IndPoint(b) @@ -176,7 +176,7 @@ end @testset "DavisYin" begin - f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), ZygoteBackend()) + f = ProximalAlgorithms.AutoDifferentiable(x -> dot(c, x), AutoZygote()) g = IndNonnegative() h = IndAffine(A, b) diff --git a/test/problems/test_nonconvex_qp.jl b/test/problems/test_nonconvex_qp.jl index 60558f2..fadc5f2 100644 --- a/test/problems/test_nonconvex_qp.jl +++ b/test/problems/test_nonconvex_qp.jl @@ -1,5 +1,5 @@ using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalAlgorithms using ProximalOperators: IndBox using LinearAlgebra @@ -14,7 +14,7 @@ using Test f = ProximalAlgorithms.AutoDifferentiable( x -> dot(Q * x, x) / 2 + dot(q, x), - ZygoteBackend(), + AutoZygote(), ) g = IndBox(low, upp) @@ -83,7 +83,7 @@ end f = ProximalAlgorithms.AutoDifferentiable( x -> dot(Q * x, x) / 2 + dot(q, x), - ZygoteBackend(), + AutoZygote(), ) g = IndBox(low, upp) diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl index 8c854e5..5103120 100644 --- a/test/problems/test_sparse_logistic_small.jl +++ b/test/problems/test_sparse_logistic_small.jl @@ -1,5 +1,5 @@ using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalOperators: NormL1 using ProximalAlgorithms using LinearAlgebra @@ -23,10 +23,10 @@ using LinearAlgebra end f_autodiff = - ProximalAlgorithms.AutoDifferentiable(x -> logistic_loss(x - b), ZygoteBackend()) + ProximalAlgorithms.AutoDifferentiable(x -> logistic_loss(x - b), AutoZygote()) fA_autodiff = ProximalAlgorithms.AutoDifferentiable( x -> logistic_loss(A * x - b), - ZygoteBackend(), + AutoZygote(), ) lam = R(0.1) g = NormL1(lam) diff --git a/test/problems/test_verbose.jl b/test/problems/test_verbose.jl index ee3f318..ce2da75 100644 --- a/test/problems/test_verbose.jl +++ b/test/problems/test_verbose.jl @@ -1,5 +1,5 @@ using Zygote -using AbstractDifferentiation: ZygoteBackend +using DifferentiationInterface: AutoZygote using ProximalOperators: LeastSquares, NormL1 using ProximalAlgorithms using LinearAlgebra @@ -21,9 +21,9 @@ using LinearAlgebra @test typeof(lam) == R f_autodiff = - ProximalAlgorithms.AutoDifferentiable(x -> (norm(x - b)^2) / 2, ZygoteBackend()) + ProximalAlgorithms.AutoDifferentiable(x -> (norm(x - b)^2) / 2, AutoZygote()) fA_autodiff = - ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, ZygoteBackend()) + ProximalAlgorithms.AutoDifferentiable(x -> (norm(A * x - b)^2) / 2, AutoZygote()) fA_prox = LeastSquares(A, b) g = NormL1(lam) diff --git a/test/runtests.jl b/test/runtests.jl index 957c189..42b4be9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ using Test using Aqua -using AbstractDifferentiation +using DifferentiationInterface using ProximalAlgorithms struct Quadratic{M,V} @@ -10,13 +10,13 @@ end (f::Quadratic)(x) = dot(x, f.Q * x) / 2 + dot(f.q, x) -function ProximalAlgorithms.value_and_gradient_closure(f::Quadratic, x) +function ProximalAlgorithms.value_and_gradient(f::Quadratic, x) grad = f.Q * x + f.q - return dot(grad, x) / 2 + dot(f.q, x), () -> grad + return dot(grad, x) / 2 + dot(f.q, x), grad end @testset "Aqua" begin - Aqua.test_all(ProximalAlgorithms; ambiguities = false) + Aqua.test_all(ProximalAlgorithms; ambiguities=false) end include("utilities/test_ad.jl") diff --git a/test/utilities/test_ad.jl b/test/utilities/test_ad.jl index e8f9c0b..17a21ff 100644 --- a/test/utilities/test_ad.jl +++ b/test/utilities/test_ad.jl @@ -4,13 +4,13 @@ using ProximalOperators: NormL1 using ProximalAlgorithms using Zygote using ReverseDiff -using AbstractDifferentiation: ZygoteBackend, ReverseDiffBackend +using DifferentiationInterface: AutoZygote, AutoReverseDiff @testset "Autodiff backend ($B on $T)" for (T, B) in Iterators.product( [Float32, Float64, ComplexF32, ComplexF64], - [ZygoteBackend, ReverseDiffBackend], + [AutoZygote, AutoReverseDiff], ) - if T <: Complex && B == ReverseDiffBackend + if T <: Complex && B == AutoReverseDiff continue end @@ -28,8 +28,8 @@ using AbstractDifferentiation: ZygoteBackend, ReverseDiffBackend x0 = zeros(T, n) - f_x0, cl = ProximalAlgorithms.value_and_gradient_closure(f, x0) - grad_f_x0 = @inferred cl() + # TODO: I removed the @inferred below, Zygote can infer the output type of the closure once it has the closure but it cannot infer the whole procedure of computing the gradient anyway + f_x0, grad_f_x0 = ProximalAlgorithms.value_and_gradient(f, x0) lam = R(0.1) * norm(A' * b, Inf) @test typeof(lam) == R diff --git a/test/utilities/test_fb_tools.jl b/test/utilities/test_fb_tools.jl index a2c6509..392b20c 100644 --- a/test/utilities/test_fb_tools.jl +++ b/test/utilities/test_fb_tools.jl @@ -2,7 +2,7 @@ using Test using LinearAlgebra using ProximalCore: Zero using ProximalAlgorithms -using AbstractDifferentiation +using DifferentiationInterface @testset "Lipschitz constant estimation" for R in [Float32, Float64] From 92a12049463419baecb404a6f9e44db2fb2ff917 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Wed, 17 Jul 2024 10:08:56 +0200 Subject: [PATCH 2/4] Reduce diff --- benchmark/benchmarks.jl | 40 +++++++++---------- docs/src/examples/sparse_linear_regression.jl | 6 +-- docs/src/guide/custom_objectives.jl | 18 ++++----- src/utilities/fb_tools.jl | 20 +++++----- test/runtests.jl | 2 +- 5 files changed, 43 insertions(+), 43 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 27032f2..7c29af6 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -45,56 +45,56 @@ for (benchmark_name, file_name) in [ m, n = size(A) SUITE[k]["ForwardBackward"] = - @benchmarkable solver(x0=x0, f=f, g=g) setup = begin - solver = ProximalAlgorithms.ForwardBackward(tol=1e-6) + @benchmarkable solver(x0 = x0, f = f, g = g) setup = begin + solver = ProximalAlgorithms.ForwardBackward(tol = 1e-6) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) g = NormL1($lam) end SUITE[k]["FastForwardBackward"] = - @benchmarkable solver(x0=x0, f=f, g=g) setup = begin - solver = ProximalAlgorithms.FastForwardBackward(tol=1e-6) + @benchmarkable solver(x0 = x0, f = f, g = g) setup = begin + solver = ProximalAlgorithms.FastForwardBackward(tol = 1e-6) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) g = NormL1($lam) end SUITE[k]["ZeroFPR"] = - @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup = begin - solver = ProximalAlgorithms.ZeroFPR(tol=1e-6) + @benchmarkable solver(x0 = x0, f = f, A = $A, g = g) setup = begin + solver = ProximalAlgorithms.ZeroFPR(tol = 1e-6) x0 = zeros($T, size($A, 2)) f = SquaredDistance($b) g = NormL1($lam) end SUITE[k]["PANOC"] = - @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup = begin - solver = ProximalAlgorithms.PANOC(tol=1e-6) + @benchmarkable solver(x0 = x0, f = f, A = $A, g = g) setup = begin + solver = ProximalAlgorithms.PANOC(tol = 1e-6) x0 = zeros($T, size($A, 2)) f = SquaredDistance($b) g = NormL1($lam) end SUITE[k]["PANOCplus"] = - @benchmarkable solver(x0=x0, f=f, A=$A, g=g) setup = begin - solver = ProximalAlgorithms.PANOCplus(tol=1e-6) + @benchmarkable solver(x0 = x0, f = f, A = $A, g = g) setup = begin + solver = ProximalAlgorithms.PANOCplus(tol = 1e-6) x0 = zeros($T, size($A, 2)) f = SquaredDistance($b) g = NormL1($lam) end SUITE[k]["DouglasRachford"] = - @benchmarkable solver(x0=x0, f=f, g=g, gamma=$R(1)) setup = begin - solver = ProximalAlgorithms.DouglasRachford(tol=1e-6) + @benchmarkable solver(x0 = x0, f = f, g = g, gamma = $R(1)) setup = begin + solver = ProximalAlgorithms.DouglasRachford(tol = 1e-6) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) g = NormL1($lam) end SUITE[k]["DRLS"] = - @benchmarkable solver(x0=x0, f=f, g=g, Lf=Lf) setup = begin - solver = ProximalAlgorithms.DRLS(tol=1e-6) + @benchmarkable solver(x0 = x0, f = f, g = g, Lf = Lf) setup = begin + solver = ProximalAlgorithms.DRLS(tol = 1e-6) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) Lf = opnorm(($A)' * $A) @@ -102,11 +102,11 @@ for (benchmark_name, file_name) in [ end SUITE[k]["AFBA-1"] = - @benchmarkable solver(x0=x0, y0=y0, f=f, g=g, beta_f=beta_f) setup = + @benchmarkable solver(x0 = x0, y0 = y0, f = f, g = g, beta_f = beta_f) setup = begin beta_f = opnorm($A)^2 solver = - ProximalAlgorithms.AFBA(theta=$R(1), mu=$R(1), tol=$R(1e-6)) + ProximalAlgorithms.AFBA(theta = $R(1), mu = $R(1), tol = $R(1e-6)) x0 = zeros($T, size($A, 2)) y0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) @@ -114,10 +114,10 @@ for (benchmark_name, file_name) in [ end SUITE[k]["AFBA-2"] = - @benchmarkable solver(x0=x0, y0=y0, h=h, L=$A, g=g) setup = begin + @benchmarkable solver(x0 = x0, y0 = y0, h = h, L = $A, g = g) setup = begin beta_f = opnorm($A)^2 solver = - ProximalAlgorithms.AFBA(theta=$R(1), mu=$R(1), tol=$R(1e-6)) + ProximalAlgorithms.AFBA(theta = $R(1), mu = $R(1), tol = $R(1e-6)) x0 = zeros($T, size($A, 2)) y0 = zeros($T, size($A, 1)) h = Translate(SqrNormL2(), -$b) @@ -125,8 +125,8 @@ for (benchmark_name, file_name) in [ end SUITE[k]["SFISTA"] = - @benchmarkable solver(x0=x0, f=f, Lf=Lf, g=g) setup = begin - solver = ProximalAlgorithms.SFISTA(tol=$R(1e-3)) + @benchmarkable solver(x0 = x0, f = f, Lf = Lf, g = g) setup = begin + solver = ProximalAlgorithms.SFISTA(tol = $R(1e-3)) x0 = zeros($T, size($A, 2)) f = LeastSquares($A, $b) g = NormL1($lam) diff --git a/docs/src/examples/sparse_linear_regression.jl b/docs/src/examples/sparse_linear_regression.jl index 8952a86..e087e6a 100644 --- a/docs/src/examples/sparse_linear_regression.jl +++ b/docs/src/examples/sparse_linear_regression.jl @@ -35,8 +35,8 @@ n_training, n_features = size(training_input) using LinearAlgebra using Statistics -input_loc = mean(training_input, dims=1) |> vec -input_scale = std(training_input, dims=1) |> vec +input_loc = mean(training_input, dims = 1) |> vec +input_scale = std(training_input, dims = 1) |> vec linear_model(wb, input) = input * wb[1:end-1] .+ wb[end] @@ -73,7 +73,7 @@ reg = ProximalOperators.NormL1(1) # and the objective terms `f=training_loss` (smooth) and `g=reg` (non smooth). ffb = ProximalAlgorithms.FastForwardBackward() -solution, iterations = ffb(x0=zeros(n_features + 1), f=training_loss, g=reg) +solution, iterations = ffb(x0 = zeros(n_features + 1), f = training_loss, g = reg) # We can now check how well the trained model performs on the test portion of our data. diff --git a/docs/src/guide/custom_objectives.jl b/docs/src/guide/custom_objectives.jl index cf61cc8..dfdee82 100644 --- a/docs/src/guide/custom_objectives.jl +++ b/docs/src/guide/custom_objectives.jl @@ -63,7 +63,7 @@ end # We can now minimize the function, for which we will use [`PANOC`](@ref), which is a Newton-type method: panoc = ProximalAlgorithms.PANOC() -solution, iterations = panoc(x0=-ones(2), f=rosenbrock2D, g=IndUnitBall()) +solution, iterations = panoc(x0 = -ones(2), f = rosenbrock2D, g = IndUnitBall()) # Plotting the solution against the cost function contour and constraint, gives an idea of its correctness. @@ -73,17 +73,17 @@ contour( -2:0.1:2, -2:0.1:2, (x, y) -> rosenbrock2D([x, y]), - fill=true, - framestyle=:none, - background=nothing, + fill = true, + framestyle = :none, + background = nothing, ) -plot!(Shape(cos.(0:0.01:2*pi), sin.(0:0.01:2*pi)), opacity=0.5, label="feasible set") +plot!(Shape(cos.(0:0.01:2*pi), sin.(0:0.01:2*pi)), opacity = 0.5, label = "feasible set") scatter!( [solution[1]], [solution[2]], - color=:red, - markershape=:star5, - label="computed solution", + color = :red, + markershape = :star5, + label = "computed solution", ) # ## Example: counting operations @@ -128,7 +128,7 @@ end f = Counting(rosenbrock2D) g = Counting(IndUnitBall()) -solution, iterations = panoc(x0=-ones(2), f=f, g=g) +solution, iterations = panoc(x0 = -ones(2), f = f, g = g) # and check how many operations where actually performed: diff --git a/src/utilities/fb_tools.jl b/src/utilities/fb_tools.jl index 0c88c8a..42c46f3 100644 --- a/src/utilities/fb_tools.jl +++ b/src/utilities/fb_tools.jl @@ -34,10 +34,10 @@ function backtrack_stepsize!( g_z::R, res, Az, - grad_f_Az=nothing; - alpha=R(1), - minimum_gamma=R(1e-7), - reduce_gamma=R(0.5), + grad_f_Az = nothing; + alpha = R(1), + minimum_gamma = R(1e-7), + reduce_gamma = R(0.5), ) where {R} f_Az_upp = f_model(f_Ax, At_grad_f_Ax, res, alpha / gamma) _mul!(Az, A, z) @@ -65,9 +65,9 @@ function backtrack_stepsize!( A, g, x; - alpha=R(1), - minimum_gamma=R(1e-7), - reduce_gamma=R(0.5), + alpha = R(1), + minimum_gamma = R(1e-7), + reduce_gamma = R(0.5), ) where {R} Ax = A * x f_Ax, grad_f_Ax = value_and_gradient(f, Ax) @@ -88,8 +88,8 @@ function backtrack_stepsize!( x - z, Ax, grad_f_Ax; - alpha=alpha, - minimum_gamma=minimum_gamma, - reduce_gamma=reduce_gamma, + alpha = alpha, + minimum_gamma = minimum_gamma, + reduce_gamma = reduce_gamma, ) end diff --git a/test/runtests.jl b/test/runtests.jl index 42b4be9..d61d66e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,7 +16,7 @@ function ProximalAlgorithms.value_and_gradient(f::Quadratic, x) end @testset "Aqua" begin - Aqua.test_all(ProximalAlgorithms; ambiguities=false) + Aqua.test_all(ProximalAlgorithms; ambiguities = false) end include("utilities/test_ad.jl") From 1836ee6bfb8f128c243f538c933f708f3ca41486 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Wed, 17 Jul 2024 10:12:43 +0200 Subject: [PATCH 3/4] Fix backtracking --- src/utilities/fb_tools.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/utilities/fb_tools.jl b/src/utilities/fb_tools.jl index 42c46f3..4058641 100644 --- a/src/utilities/fb_tools.jl +++ b/src/utilities/fb_tools.jl @@ -41,7 +41,7 @@ function backtrack_stepsize!( ) where {R} f_Az_upp = f_model(f_Ax, At_grad_f_Ax, res, alpha / gamma) _mul!(Az, A, z) - f_Az, grad_f_Az = value_and_gradient(f, Az) + f_Az, grad_f_Az_tmp = value_and_gradient(f, Az) tol = 10 * eps(R) * (1 + abs(f_Az)) while f_Az > f_Az_upp + tol && gamma >= minimum_gamma gamma *= reduce_gamma @@ -50,9 +50,12 @@ function backtrack_stepsize!( res .= x .- z f_Az_upp = f_model(f_Ax, At_grad_f_Ax, res, alpha / gamma) _mul!(Az, A, z) - f_Az, grad_f_Az = value_and_gradient(f, Az) + f_Az, grad_f_Az_tmp = value_and_gradient(f, Az) tol = 10 * eps(R) * (1 + abs(f_Az)) end + if grad_f_Az !== nothing + grad_f_Az .= grad_f_Az_tmp + end if gamma < minimum_gamma @warn "stepsize `gamma` became too small ($(gamma))" end From 5db374c37ca61e49fd8e6980c15234963aee6962 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Wed, 24 Jul 2024 21:45:02 +0200 Subject: [PATCH 4/4] Update Project.toml Co-authored-by: Lorenzo Stella --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 41a6248..744eb5b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "ProximalAlgorithms" uuid = "140ffc9f-1907-541a-a177-7475e0a401e9" -version = "0.6.0" +version = "0.7.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"