diff --git a/docs/Project.toml b/docs/Project.toml
index 0fbc02e..708dd2b 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -9,4 +9,6 @@ ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b"
 ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537"
 
 [compat]
+Documenter = "1"
+DocumenterCitations = "1.2"
 ProximalOperators = "0.15"
diff --git a/docs/make.jl b/docs/make.jl
index ee1ad11..2410c7a 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -22,7 +22,6 @@ for directory in literate_directories
 end
 
 makedocs(
-    bib,
     modules=[ProximalAlgorithms, ProximalCore],
     sitename="ProximalAlgorithms.jl",
     pages=[
@@ -38,6 +37,8 @@ makedocs(
         ],
         "Bibliography" => "bibliography.md",
     ],
+    plugins=[bib],
+    checkdocs=:exported,
 )
 
 deploydocs(
diff --git a/docs/src/examples/sparse_linear_regression.jl b/docs/src/examples/sparse_linear_regression.jl
index 169b96e..9664e3b 100644
--- a/docs/src/examples/sparse_linear_regression.jl
+++ b/docs/src/examples/sparse_linear_regression.jl
@@ -51,7 +51,11 @@ end
 
 mean_squared_error(label, output) = mean((output .- label) .^ 2) / 2
 
-training_loss(wb) = mean_squared_error(training_label, standardized_linear_model(wb, training_input))
+using ProximalAlgorithms
+
+training_loss = ProximalAlgorithms.ZygoteFunction(
+    wb -> mean_squared_error(training_label, standardized_linear_model(wb, training_input))
+)
 
 # As regularization we will use the L1 norm, implemented in [ProximalOperators](https://github.com/JuliaFirstOrder/ProximalOperators.jl):
 
@@ -64,8 +68,6 @@ reg = ProximalOperators.NormL1(1)
 # Therefore we construct the algorithm, then apply it to our problem by providing a starting point,
 # and the objective terms `f=training_loss` (smooth) and `g=reg` (non smooth).
 
-using ProximalAlgorithms
-
 ffb = ProximalAlgorithms.FastForwardBackward()
 solution, iterations = ffb(x0=zeros(n_features + 1), f=training_loss, g=reg)
 
diff --git a/docs/src/guide/custom_objectives.jl b/docs/src/guide/custom_objectives.jl
index 15d2146..56fc3c7 100644
--- a/docs/src/guide/custom_objectives.jl
+++ b/docs/src/guide/custom_objectives.jl
@@ -31,7 +31,11 @@
 # 
 # Let's try to minimize the celebrated Rosenbrock function, but constrained to the unit norm ball. The cost function is
 
-rosenbrock2D(x) = 100 * (x[2] - x[1]^2)^2 + (1 - x[1])^2
+using ProximalAlgorithms
+
+rosenbrock2D = ProximalAlgorithms.ZygoteFunction(
+    x -> 100 * (x[2] - x[1]^2)^2 + (1 - x[1])^2
+)
 
 # To enforce the constraint, we define the indicator of the unit ball, together with its proximal mapping:
 # this is simply projection onto the unit norm ball, so it is sufficient to normalize any given point that lies
@@ -55,8 +59,6 @@ end
 
 # We can now minimize the function, for which we will use [`PANOC`](@ref), which is a Newton-type method:
 
-using ProximalAlgorithms
-
 panoc = ProximalAlgorithms.PANOC()
 solution, iterations = panoc(x0=-ones(2), f=rosenbrock2D, g=IndUnitBall())
 
diff --git a/docs/src/guide/getting_started.jl b/docs/src/guide/getting_started.jl
index 7329f4a..228df29 100644
--- a/docs/src/guide/getting_started.jl
+++ b/docs/src/guide/getting_started.jl
@@ -54,7 +54,9 @@ using LinearAlgebra
 using ProximalOperators
 using ProximalAlgorithms
 
-quadratic_cost(x) = dot([3.4 1.2; 1.2 4.5] * x, x) / 2 + dot([-2.3, 9.9], x)
+quadratic_cost = ProximalAlgorithms.ZygoteFunction(
+    x -> dot([3.4 1.2; 1.2 4.5] * x, x) / 2 + dot([-2.3, 9.9], x)
+)
 box_indicator = ProximalOperators.IndBox(0, 1)
 
 ffb = ProximalAlgorithms.FastForwardBackward(maxit=1000, tol=1e-5, verbose=true)
diff --git a/docs/src/guide/implemented_algorithms.md b/docs/src/guide/implemented_algorithms.md
index 052d6ad..a297a22 100644
--- a/docs/src/guide/implemented_algorithms.md
+++ b/docs/src/guide/implemented_algorithms.md
@@ -72,7 +72,7 @@ For this reason, specific algorithms by the name of "primal-dual" splitting sche
 Algorithm | Assumptions | Oracle | Implementation | References
 ----------|-------------|--------|----------------|-----------
 Chambolle-Pock | ``f\equiv 0``, ``g, h`` convex, ``L`` linear operator | ``\operatorname{prox}_{\gamma g}``, ``\operatorname{prox}_{\gamma h}``, ``L``, ``L^*`` | [`ChambollePock`](@ref) | [Chambolle2011](@cite)
-Vu-Condat | ``f`` convex and smooth, ``g, h`` convex, ``L`` linear operator | ``\nabla f``, ``\operatorname{prox}_{\gamma g}``, ``\operatorname{prox}_{\gamma h}``, ``L``, ``L^*`` | [`VuCodat`](@ref) | [Vu2013](@cite), [Condat2013](@cite)
+Vu-Condat | ``f`` convex and smooth, ``g, h`` convex, ``L`` linear operator | ``\nabla f``, ``\operatorname{prox}_{\gamma g}``, ``\operatorname{prox}_{\gamma h}``, ``L``, ``L^*`` | [`VuCondat`](@ref) | [Vu2013](@cite), [Condat2013](@cite)
 AFBA      | ``f`` convex and smooth, ``g, h`` convex, ``L`` linear operator | ``\nabla f``, ``\operatorname{prox}_{\gamma g}``, ``\operatorname{prox}_{\gamma h}``, ``L``, ``L^*`` | [`AFBA`](@ref) | [Latafat2017](@cite)
 
 ```@docs
diff --git a/src/utilities/ad.jl b/src/utilities/ad.jl
index f14c097..f1e43b7 100644
--- a/src/utilities/ad.jl
+++ b/src/utilities/ad.jl
@@ -1,8 +1,14 @@
 using Zygote: pullback
 using ProximalCore
 
-function ProximalCore.gradient!(grad, f, x)
-    fx, pb = pullback(f, x)
+struct ZygoteFunction{F}
+    f::F
+end
+
+(f::ZygoteFunction)(x) = f.f(x)
+
+function ProximalCore.gradient!(grad, f::ZygoteFunction, x)
+    fx, pb = pullback(f.f, x)
     grad .= pb(one(fx))[1]
     return fx
 end
diff --git a/test/utilities/test_ad.jl b/test/utilities/test_ad.jl
index 8abb327..a4fa27f 100644
--- a/test/utilities/test_ad.jl
+++ b/test/utilities/test_ad.jl
@@ -12,7 +12,9 @@ using ProximalAlgorithms
         -1.0 -1.0 -1.0 1.0 3.0
     ]
     b = T[1.0, 2.0, 3.0, 4.0]
-    f(x) = R(1/2) * norm(A * x - b, 2)^2
+    f = ProximalAlgorithms.ZygoteFunction(
+        x -> R(1/2) * norm(A * x - b, 2)^2
+    )
     Lf = opnorm(A)^2
     m, n = size(A)