JuliaNLSolvers · pkofod · Sep 22, 2017 · Jun 22, 2017 · Jun 22, 2017 · Jun 22, 2017
diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,6 @@
 # Optim v0.9.0
+* Support for optimization on Riemannian manifolds
+* Support for optimization of functions of complex variables
 * Drop support for Julia versions less than v0.6.0-pre
 * Fminbox: If an initial guess is on the boundary of the box, the guess is moved inside the box and a warning is produced, as opposed to crashing with an error.
 * Significant changes to the Non-, Once-, and TwiceDifferentiable setup; these now hold temporaries relevant to the evaluation of objectives, gradients, and Hessians. They also hold f-, g-, and h_calls counters

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -47,6 +47,8 @@ pages:
         - Automatic Differentiation: 'algo/autodiff.md'
         - Linesearch: 'algo/linesearch.md'
         - Preconditioners: 'algo/precondition.md'
+        - Complex optimization: 'algo/complex.md'
+        - Manifolds: 'algo/manifolds.md'
     - 'Contributing':
         - 'Contributing': 'dev/contributing.md'
     - License: 'LICENSE.md'
diff --git a/docs/src/algo/complex.md b/docs/src/algo/complex.md
@@ -0,0 +1,6 @@
+# Complex optimization
+Optimization of functions defined on complex inputs (C^n to R) is supported by simply passing a complex `x0` as input. All zeroth and first order optimization algorithms are supported. For now, only explicit gradients are supported.
+
+The gradient of a complex-to-real function is defined as the only vector `g` such that `f(x+h) = f(x) + real(g' * h) + O(h^2)`. This is sometimes written `g = df/d(z*) = df/d(re(z)) + i df/d(im(z))`.
+
+The gradient of a C^n to R function is a C^n to C^n map. Even if it is differentiable when seen as a function of R^2n to R^2n, it might not be complex-differentiable. For instance, take f(z) = Re(z)^2. Then g(z) = 2 Re(z), which is not complex-differentiable (holomorphic). Therefore, the Hessian of a C^n to R function is not well-defined as a n x n complex matrix (only as a 2n x 2n real matrix), and therefore second-order optimization algorithms are not applicable directly. To use second-order optimization, convert to real variables. 
diff --git a/docs/src/algo/manifolds.md b/docs/src/algo/manifolds.md
@@ -0,0 +1,37 @@
+# Manifold optimization
+Optim.jl supports the minimization of functions defined on Riemannian manifolds, i.e. with simple constraints such as normalization and orthogonality. The basic idea of such algorithms is to project back ("retract") each iterate of an unconstrained minimization method onto the manifold. This is used by passing a `manifold` keyword argument to the optimizer.
+
+## Howto
+Here is a simple test case where we minimize the Rayleigh quotient `<x, A x>` of a symmetric matrix `A` under the constraint `||x|| = 1`, finding an eigenvector associated with the lowest eigenvalue of `A`.
+```julia
+n = 10
+A = Diagonal(linspace(1,2,n))
+f(x) = vecdot(x,A*x)/2
+g(x) = A*x
+g!(stor,x) = copy!(stor,g(x))
+x0 = randn(n)
+
+manif = Optim.Sphere()
+Optim.optimize(f, g!, x0, Optim.ConjugateGradient(manifold=manif))
+```
+
+## Supported solvers and manifolds
+All first-order optimization methods are supported.
+
+The following manifolds are currently supported:
+* Flat: Euclidean space, default. Standard unconstrained optimization.
+* Sphere: spherical constraint `||x|| = 1`
+* Stiefel: Stiefel manifold of N by n matrices with orthogonal columns, i.e. `X'*X = I`
+
+The following meta-manifolds construct manifolds out of pre-existing ones:
+* PowerManifold: identical copies of a specified manifold
+* ProductManifold: product of two (potentially different) manifolds
+
+See `test/multivariate/manifolds.jl` for usage examples.
+
+Implementing new manifolds is as simple as adding methods `project_tangent!(M::YourManifold,x)` and `retract!(M::YourManifold,g,x)`. If you implement another manifold or optimization method, please contribute a PR!
+
+## References
+The Geometry of Algorithms with Orthogonality Constraints, Alan Edelman, Tomás A. Arias, Steven T. Smith, SIAM. J. Matrix Anal. & Appl., 20(2), 303–353
+
+Optimization Algorithms on Matrix Manifolds, P.-A. Absil, R. Mahony, R. Sepulchre, Princeton University Press, 2008
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -2,7 +2,7 @@
 
 ## What
 Optim is a Julia package for optimizing functions of
-various kinds. While there is some support for box constrained optimization, most
+various kinds. While there is some support for box constrained and Riemannian optimization, most
 of the solvers try to find an $x$ that minimizes a function $f(x)$ without any constraints.
 Thus, the main focus is on unconstrained optimization.
 The provided solvers, under certain conditions, will converge to a local minimum.

diff --git a/src/Manifolds.jl b/src/Manifolds.jl
@@ -0,0 +1,157 @@
+# Manifold interface: every manifold (subtype of Manifold) defines the functions
+# project_tangent!(m, g, x): project g on the tangent space to m at x
+# retract!(m, x): map x back to a point on the manifold m
+
+# For mathematical references, see e.g.
+
+# The Geometry of Algorithms with Orthogonality Constraints
+# Alan Edelman, Tomás A. Arias, and Steven T. Smith
+# SIAM. J. Matrix Anal. & Appl., 20(2), 303–353. (51 pages)
+
+# Optimization Algorithms on Matrix Manifolds 
+# P.-A. Absil, R. Mahony, R. Sepulchre 
+# Princeton University Press, 2008
+
+
+abstract type Manifold
+end
+
+
+# Fake objective function implementing a retraction
+type ManifoldObjective{T<:NLSolversBase.AbstractObjective} <: NLSolversBase.AbstractObjective
+    manifold::Manifold
+    inner_obj::T
+end
+iscomplex(obj::ManifoldObjective) = iscomplex(obj.inner_obj)
+# TODO is it safe here to call retract! and change x?
+function NLSolversBase.value!(obj::ManifoldObjective, x)
+    xin = complex_to_real(obj, retract(obj.manifold, real_to_complex(obj,x)))
+    value!(obj.inner_obj, xin)
+end
+function NLSolversBase.value(obj::ManifoldObjective)
+    value(obj.inner_obj)
+end
+function NLSolversBase.gradient(obj::ManifoldObjective)
+    gradient(obj.inner_obj)
+end
+function NLSolversBase.gradient(obj::ManifoldObjective,i::Int)
+    gradient(obj.inner_obj,i)
+end
+function NLSolversBase.gradient!(obj::ManifoldObjective,x)
+    xin = complex_to_real(obj, retract(obj.manifold, real_to_complex(obj,x)))
+    gradient!(obj.inner_obj,xin)
+    project_tangent!(obj.manifold,real_to_complex(obj,gradient(obj.inner_obj)),real_to_complex(obj,xin))
+    return gradient(obj.inner_obj)
+end
+function NLSolversBase.value_gradient!(obj::ManifoldObjective,x)
+    xin = complex_to_real(obj, retract(obj.manifold, real_to_complex(obj,x)))
+    value_gradient!(obj.inner_obj,xin)
+    project_tangent!(obj.manifold,real_to_complex(obj,gradient(obj.inner_obj)),real_to_complex(obj,xin))
+    return value(obj.inner_obj)
+end
+
+# fallback for out-of-place ops
+project_tangent(M::Manifold,x) = project_tangent!(M, similar(x), x)
+retract(M::Manifold,x) = retract!(M, copy(x))
+
+# Flat manifold = {R,C}^n
+# all the functions below are no-ops, and therefore the generated code
+# for the flat manifold should be exactly the same as the one with all
+# the manifold stuff removed
+struct Flat <: Manifold
+end
+retract(M::Flat, x) = x
+retract!(M::Flat,x) = x
+project_tangent(M::Flat, g, x) = g
+project_tangent!(M::Flat, g, x) = g
+
+# {||x|| = 1}
+struct Sphere <: Manifold
+end
+retract!(S::Sphere, x) = normalize!(x)
+project_tangent!(S::Sphere,g,x) = (g .= g .- real(vecdot(x,g)).*x)
+
+# N x n matrices with orthonormal columns, i.e. such that X'X = I
+# Special cases: N x 1 = sphere, N x N = O(N) / U(N)
+abstract type Stiefel <: Manifold end
+# Two types of retraction: SVD is the most stable, CholQR the fastest
+struct Stiefel_CholQR <: Stiefel end
+struct Stiefel_SVD <: Stiefel end
+function Stiefel(retraction=:SVD)
+    if retraction == :CholQR
+        Stiefel_CholQR()
+    elseif retraction == :SVD
+        Stiefel_SVD()
+    end
+end
+
+function retract!(S::Stiefel_SVD, X)
+    U,S,V = svd(copy(X))
+    X .= U*V'
+end
+function retract!(S::Stiefel_CholQR, X)
+    overlap = X'X
+    X .= X/chol(overlap)
+end
+project_tangent!(S::Stiefel, G, X) = (G .-= X*((X'G .+ G'X)./2))
+
+
+# multiple copies of the same manifold. Points are arrays of arbitrary
+# dimensions, and the first (given by inner_dims) are points of the
+# inner manifold. E.g. the product of 2x2 Stiefel manifolds of
+# dimension N x n would be a N x n x 2 x 2 matrix
+struct PowerManifold<:Manifold
+    inner_manifold::Manifold #type of embedded manifold
+    inner_dims::Tuple #dimension of the embedded manifolds
+    outer_dims::Tuple #number of embedded manifolds
+end
+function retract!(m::PowerManifold, x)
+    for i=1:prod(m.outer_dims)
+        retract!(m.inner_manifold,get_inner(m, x, i))
+    end
+    x
+end
+function project_tangent!(m::PowerManifold, g, x)
+    for i=1:prod(m.outer_dims)
+        project_tangent!(m.inner_manifold,get_inner(m, g, i),get_inner(m, x, i))
+    end
+    g
+end
+@inline function get_inner(m::PowerManifold, x, i::Int)
+    size_inner = prod(m.inner_dims)
+    size_outer = prod(m.outer_dims)
+    @assert 1 <= i <= size_outer
+    return reshape(view(x, (i-1)*size_inner+1:i*size_inner), m.inner_dims)
+end
+@inline get_inner(m::PowerManifold, x, i::Tuple) = get_inner(m, x, ind2sub(m.outer_dims, i...))
+
+#Product of two manifolds {P = (x1,x2), x1 ∈ m1, x2 ∈ m2}.
+#P is stored as a flat 1D array, and x1 is before x2 in memory
+struct ProductManifold<:Manifold
+    m1::Manifold
+    m2::Manifold
+    dims1::Tuple
+    dims2::Tuple
+end
+function retract!(m::ProductManifold, x)
+    retract!(m.m1, get_inner(m,x,1))
+    retract!(m.m2, get_inner(m,x,2))
+    x
+end
+function project_tangent!(m::ProductManifold, g, x)
+    project_tangent!(m.m1, get_inner(m, g, 1), get_inner(m, x, 1))
+    project_tangent!(m.m2, get_inner(m, g, 2), get_inner(m, x, 2))
+    g
+end
+function get_inner(m::ProductManifold, x, i)
+    N1 = prod(m.dims1)
+    N2 = prod(m.dims2)
+    @assert length(x) == N1+N2
+    if i == 1
+        return reshape(view(x, 1:N1),m.dims1)
+    elseif i == 2
+        return reshape(view(x, N1+1:N1+N2), m.dims2)
+    else
+        error("Only two components in a product manifold")
+    end
+end
diff --git a/src/Optim.jl b/src/Optim.jl
@@ -18,6 +18,8 @@ module Optim
            Base.getindex,
            Base.setindex!
 
+    import NLSolversBase.iscomplex
+
     export optimize,
            NonDifferentiable,
            OnceDifferentiable,
@@ -39,12 +41,20 @@ module Optim
            Newton,
            NewtonTrustRegion,
            SimulatedAnnealing,
-           ParticleSwarm
+           ParticleSwarm,
+
+           Manifold,
+           Flat,
+           Sphere,
+           Stiefel
 
     # Types
     include("types.jl")
     include("objective_types.jl")
 
+    # Manifolds
+    include("Manifolds.jl")
+
     # Generic stuff
     include("utilities/generic.jl")
 

diff --git a/src/api.jl b/src/api.jl
@@ -1,5 +1,5 @@
 Base.summary(r::OptimizationResults) = summary(r.method) # might want to do more here than just return summary of the method used
-minimizer(r::OptimizationResults) = r.minimizer
+minimizer(r::OptimizationResults) = iscomplex(r) ? real_to_complex(r.minimizer) : r.minimizer
 minimum(r::OptimizationResults) = r.minimum
 iterations(r::OptimizationResults) = r.iterations
 iteration_limit_reached(r::OptimizationResults) = r.iteration_converged

diff --git a/src/multivariate/optimize/optimize.jl b/src/multivariate/optimize/optimize.jl
@@ -12,10 +12,12 @@ update_h!(d, state, method::SecondOrderSolver) = hessian!(d, state.x)
 after_while!(d, state, method, options) = nothing
 
 function optimize{D<:AbstractObjective, M<:Optimizer}(d::D, initial_x::AbstractArray, method::M,
-    options::Options = Options(), state = initial_state(method, options, d, initial_x))
+    options::Options = Options(), state = initial_state(method, options, d, complex_to_real(d, initial_x)))
 
     t0 = time() # Initial time stamp used to control early stopping by options.time_limit
 
+    initial_x = complex_to_real(d, initial_x)
+
     if length(initial_x) == 1 && typeof(method) <: NelderMead
         error("Use optimize(f, scalar, scalar) for 1D problems")
     end
@@ -70,6 +72,7 @@ function optimize{D<:AbstractObjective, M<:Optimizer}(d::D, initial_x::AbstractA
 
     try
     return MultivariateOptimizationResults(method,
+                                            NLSolversBase.iscomplex(d),
                                             initial_x,
                                             f_increased ? state.x_previous : state.x,
                                             f_increased ? state.f_x_previous : value(d),
@@ -91,6 +94,7 @@ function optimize{D<:AbstractObjective, M<:Optimizer}(d::D, initial_x::AbstractA
                                             h_calls(d))
     catch
       return MultivariateOptimizationResults(method,
+                                              NLSolversBase.iscomplex(d),
                                               initial_x,
                                               f_increased ? state.x_previous : state.x,
                                               f_increased ? state.f_x_previous : value(d),

diff --git a/src/multivariate/solvers/constrained/fminbox.jl b/src/multivariate/solvers/constrained/fminbox.jl
@@ -223,7 +223,7 @@ function optimize{T<:AbstractFloat,O<:Optimizer}(
         results.x_converged, results.f_converged, results.g_converged, converged, f_increased = assess_convergence(x, xold, minimum(results), fval0, g, x_tol, f_tol, g_tol)
         f_increased && !allow_f_increases && break
     end
-    return MultivariateOptimizationResults(Fminbox{O}(), initial_x, minimizer(results), df.f(minimizer(results)),
+    return MultivariateOptimizationResults(Fminbox{O}(), false, initial_x, minimizer(results), df.f(minimizer(results)),
             iteration, results.iteration_converged,
             results.x_converged, results.x_tol, vecnorm(x - xold),
             results.f_converged, results.f_tol, f_residual(minimum(results), fval0, f_tol),

diff --git a/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl b/src/multivariate/solvers/first_order/accelerated_gradient_descent.jl
@@ -9,6 +9,7 @@
 # L should be function or any other callable
 struct AcceleratedGradientDescent{L} <: Optimizer
     linesearch!::L
+    manifold::Manifold
 end
 
 Base.summary(::AcceleratedGradientDescent) = "Accelerated Gradient Descent"
@@ -17,8 +18,8 @@ Base.summary(::AcceleratedGradientDescent) = "Accelerated Gradient Descent"
 AcceleratedGradientDescent(; linesearch = LineSearches.HagerZhang()) =
   AcceleratedGradientDescent(linesearch)
 =#
-function AcceleratedGradientDescent(; linesearch = LineSearches.HagerZhang())
-    AcceleratedGradientDescent(linesearch)
+function AcceleratedGradientDescent(; linesearch = LineSearches.HagerZhang(), manifold::Manifold=Flat())
+    AcceleratedGradientDescent(linesearch, manifold)
 end
 
 mutable struct AcceleratedGradientDescentState{T,N}
@@ -33,9 +34,12 @@ mutable struct AcceleratedGradientDescentState{T,N}
 end
 
 function initial_state{T}(method::AcceleratedGradientDescent, options, d, initial_x::Array{T})
+    initial_x = copy(initial_x)
+    retract!(method.manifold, real_to_complex(d,initial_x))
     value_gradient!(d, initial_x)
+    project_tangent!(method.manifold, real_to_complex(d,gradient(d)), real_to_complex(d,initial_x))
 
-    AcceleratedGradientDescentState(copy(initial_x), # Maintain current state in state.x
+    AcceleratedGradientDescentState(initial_x, # Maintain current state in state.x
                          copy(initial_x), # Maintain previous state in state.x_previous
                          T(NaN), # Store previous f in state.f_x_previous
                          0, # Iteration
@@ -47,22 +51,25 @@ end
 
 function update_state!{T}(d, state::AcceleratedGradientDescentState{T}, method::AcceleratedGradientDescent)
     state.iteration += 1
+    project_tangent!(method.manifold, real_to_complex(d,gradient(d)), real_to_complex(d,state.x))
     # Search direction is always the negative gradient
     state.s .= .-gradient(d)
 
     # Determine the distance of movement along the search line
-    lssuccess = perform_linesearch!(state, method, d)
+    lssuccess = perform_linesearch!(state, method, ManifoldObjective(method.manifold, d))
 
     # Record previous state
     copy!(state.x_previous, state.x)
 
     # Make one move in the direction of the gradient
     copy!(state.y_previous, state.y)
     state.y .= state.x .+ state.alpha.*state.s
+    retract!(method.manifold, real_to_complex(d,state.y))
 
     # Update current position with Nesterov correction
     scaling = (state.iteration - 1) / (state.iteration + 2)
     state.x .= state.y .+ scaling.*(state.y .- state.y_previous)
+    retract!(method.manifold, real_to_complex(d,state.x))
 
     lssuccess == false # break on linesearch error
 end