diff --git a/tf_quant_finance/experimental/BUILD b/tf_quant_finance/experimental/BUILD
index 67020518b..13b16bb96 100644
--- a/tf_quant_finance/experimental/BUILD
+++ b/tf_quant_finance/experimental/BUILD
@@ -13,6 +13,7 @@ py_library(
         "//tf_quant_finance/experimental/finite_difference",
         "//tf_quant_finance/experimental/instruments",
         "//tf_quant_finance/experimental/lsm_algorithm",
+        "//tf_quant_finance/experimental/optimizer",
     ],
 )
 
diff --git a/tf_quant_finance/experimental/optimizer/BUILD b/tf_quant_finance/experimental/optimizer/BUILD
new file mode 100644
index 000000000..fa7819639
--- /dev/null
+++ b/tf_quant_finance/experimental/optimizer/BUILD
@@ -0,0 +1,46 @@
+# Optimization methods.
+
+licenses(["notice"])
+
+package(default_visibility = ["//tf_quant_finance:__subpackages__"])
+
+py_library(
+    name = "optimizer",
+    srcs = ["__init__.py"],
+    srcs_version = "PY3",
+    deps = [
+        ":levenberg_marquardt",
+    ],
+)
+
+py_library(
+    name = "levenberg_marquardt",
+    srcs = ["levenberg_marquardt.py"],
+    deps = [
+        # attr dep,
+        # tensorflow dep,
+        # tensorflow_probability dep,
+    ],
+)
+
+py_test(
+    name = "levenberg_marquardt_test",
+    size = "medium",
+    timeout = "long",
+    srcs = ["levenberg_marquardt_test.py"],
+    python_version = "PY3",
+    shard_count = 10,
+    deps = [
+        "//tf_quant_finance",
+        # test util,
+        # numpy dep,
+        # tensorflow dep,
+    ],
+)
+
+filegroup(
+    name = "docs",
+    srcs = [
+        "README.md",
+    ],
+)
diff --git a/tf_quant_finance/experimental/optimizer/__init__.py b/tf_quant_finance/experimental/optimizer/__init__.py
new file mode 100644
index 000000000..538b330f2
--- /dev/null
+++ b/tf_quant_finance/experimental/optimizer/__init__.py
@@ -0,0 +1,30 @@
+# Lint as: python3
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Optimization methods."""
+
+
+from tf_quant_finance.experimental.optimizer.levenberg_marquardt import (
+    lm_fit as levenberg_marquardt_fit,
+)
+
+from tensorflow.python.util.all_util import (
+    remove_undocumented,
+)  # pylint: disable=g-direct-tensorflow-import
+
+_allowed_symbols = [
+    "levenberg_marquardt_fit",
+]
+
+remove_undocumented(__name__, _allowed_symbols)
diff --git a/tf_quant_finance/experimental/optimizer/levenberg_marquardt.py b/tf_quant_finance/experimental/optimizer/levenberg_marquardt.py
new file mode 100644
index 000000000..8ff3dcd51
--- /dev/null
+++ b/tf_quant_finance/experimental/optimizer/levenberg_marquardt.py
@@ -0,0 +1,320 @@
+# Lint as: python3
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The Levenberg marquardt optimization algorithm.
+
+References:
+  MINPACK Library
+    https://www.netlib.org/minpack/lmder.f
+
+  Jorge J. Moré, "The Levenberg-Marquardt algorithm: Implementation and theory"
+    https://www.osti.gov/biblio/7256021-levenberg-marquardt-algorithm-implementation-theory
+
+  J. J. Moré, B. S. Garbow, and K. E. Hillstrom, "User Guide for MINPACK-1"
+    http://cds.cern.ch/record/126569/files/CM-P00068642.pdf
+"""
+
+OptimizerResult = collections.namedtuple(
+    "OptimizerResult",
+    [
+        # Scalar boolean tensor indicating whether the minimum
+        # was found within tolerance.
+        "converged",
+        # Scalar boolean tensor indicating whether a line search
+        # step failed to find a suitable step size satisfying Wolfe
+        # conditions. In the absence of any constraints on the
+        # number of objective evaluations permitted, this value will
+        # be the complement of `converged`. However, if there is
+        # a constraint and the search stopped due to available
+        # evaluations being exhausted, both `failed` and `converged`
+        # will be simultaneously False.
+        "failed",
+        # The number of iterations.
+        "num_iterations",
+        # The total number of objective evaluations performed.
+        "num_objective_evaluations",
+        # A tensor containing the last argument value found during the search.
+        # If the search converged, then this value is the argmin of the
+        # objective function (within some tolerance).
+        "position",
+        # A tensor containing the value of the objective
+        # function at the `position`. If the search
+        # converged, then this is the (local) minimum of
+        # the objective function.
+        "objective_value",
+        # A tensor containing the jocobian, where each row (i) is the gradient
+        # of the position on the ith data point.
+        "jacobian",
+    ],
+)
+
+
+# Internal state of optimizer.
+_OptimizerState = collections.namedtuple(
+    "_OptimizerState",
+    [
+        # Fields from OptimizerResult.
+        "converged",
+        "failed",
+        "num_iterations",
+        "num_objective_evaluations",
+        # Position (x_k).
+        "position",
+        # Objective (f_k).
+        "objective_value",
+        # Jacobian (j_k).
+        "jacobian",
+        # Levenberg-Marquardt parameter (lm_param_k).
+        "lm_parameter",
+        # Scaling factors on the position (d_k).
+        "scaling_factors",
+        # Step size (delta_k).
+        "step_size",
+        # Reduction ratio (Actual / Predicted) (rho_k).
+        "reduction_ratio",
+        # Actual reduction
+        "actual_reduction",
+        # Predicted reduction
+        "predicted_reduction",
+        # Previous iteration state. If unsuccessful, reuse the position and
+        # the jacobian with a new step size
+        "prev_iteration_state",
+    ],
+)
+
+
+def lm_fit(
+    value_and_gradients_function,
+    x_data,
+    y_data,
+    initial_position,
+    x_tolerance=1e-8,
+    f_tolerance=1e-8,
+    g_tolerance=1e-8,
+    factor=0.1,
+    max_iterations=50,
+    parallel_iterations=1,
+    stopping_condition=None,
+    name=None,
+):
+    """Fit the function with Levenberg Marquardt algorithm.
+
+  ### Reference:
+  MINPACK Library
+    https://www.netlib.org/minpack/lmder.f
+
+  Jorge J. Moré, "The Levenberg-Marquardt algorithm: Implementation and theory"
+    https://www.osti.gov/biblio/7256021-levenberg-marquardt-algorithm-implementation-theory
+
+  J. J. Moré, B. S. Garbow, and K. E. Hillstrom, "User Guide for MINPACK-1"
+    http://cds.cern.ch/record/126569/files/CM-P00068642.pdf
+
+
+  ### Usage:
+  The following example demonstrates this optimizer attempting to minimize the
+  least squares loss objective of a simple quadratic function.
+
+  ```python
+    x_data = np.array([-1.0, 0.0, 1.0])
+    y_data = np.array([-4.0, 0.0, 2.0])
+
+    # The objective function
+    def quadratic(x, p):
+      return p[0] * (x ** x) + p[1] * x + p[2]
+
+    start = tf.constant([1.0, 1.0, 1.0])
+    optim_results = lm_fit(
+      quadratic, x_data, y_data, initial_position=start, tolerance=1e-8)
+
+    with tf.Session() as session:
+      results = session.run(optim_results)
+      # Check that the search converged
+      assert(results.converged)
+      # Check that the argmin is close to the actual value.
+      np.testing.assert_allclose(results.position, minimum)
+  ```
+
+  Args:
+    value_and_gradients_function:  A Python callable that accepts a point as a
+      real `Tensor` and returns a tuple of `Tensor`s of real dtype containing
+      the value of the function and its gradient at that point. The function to
+      be minimized. The input should be of shape `[..., n]`, where `n` is the
+      size of the domain of input points, and all others are batching
+      dimensions. The first component of the return value should be a real
+      `Tensor` of matching shape `[...]`. The second component (the gradient)
+      should also be of shape `[..., n]` like the input value to the function.
+    x_data: Real `Tensor` of shape `[..., m]. The input values of the fitted
+      function.
+    y_data: Real `Tensor` of shape `[..., m]. The return values of the fitted
+      function.
+    initial_position: Real `Tensor` of shape `[..., n]`. The starting point, or
+      points when using batching dimensions, of the search procedure. At these
+      points the function value and the gradient norm should be finite.
+    x_tolerance: Scalar `Tensor` of real dtype. If the absolute change in the
+      position between one iteration and the next is smaller than this number,
+      the algorithm is stopped.
+    f_tolerance: Scalar `Tensor` of real dtype. If the relative change
+      in the objective value between one iteration and the next is smaller than
+      this value, the algorithm is stopped.
+    g_tolerance: Scalar `Tensor` of real dtype. If the cosine angle between the
+      position and any column of the jacobian is not more than the tolerance
+      in absolute value, the algorithm is stopped.
+    factor: Scalar `Tensor` of read dtype. It is used in determining the
+      initial step bound. The bound is set to the product of the factor and
+      the euclidean norm of the scaled position. In most cases the factor
+      should lie in the interval (0.1, 100.0).
+    max_iterations: Scalar positive int32 `Tensor`. The maximum number of
+      iterations.
+    parallel_iterations: Positive integer. The number of iterations allowed to
+      run in parallel.
+    stopping_condition: (Optional) A Python function that takes as input two
+      Boolean tensors of shape `[...]`, and returns a Boolean scalar tensor. The
+      input tensors are `converged` and `failed`, indicating the current status
+      of each respective batch member; the return value states whether the
+      algorithm should stop. The default is tfp.optimizer.converged_all which
+      only stops when all batch members have either converged or failed. An
+      alternative is tfp.optimizer.converged_any which stops as soon as one
+      batch member has converged, or when all have failed.
+    params: LevenbergMarquardtParams object with adjustable parameters of the
+      algorithm. If not supplied, default parameters will be used.
+    name: (Optional) Python str. The name prefixed to the ops created by this
+      function. If not supplied, the default name 'minimize' is used.
+  Returns:
+    optimizer_results: A namedtuple containing the following items:
+      converged: boolean tensor of shape `[...]` indicating for each batch
+        member whether the minimum was found within tolerance.
+      failed:  boolean tensor of shape `[...]` indicating for each batch
+        member whether a line search step failed to find a suitable step size
+        satisfying Wolfe conditions. In the absence of any constraints on the
+        number of objective evaluations permitted, this value will
+        be the complement of `converged`. However, if there is
+        a constraint and the search stopped due to available
+        evaluations being exhausted, both `failed` and `converged`
+        will be simultaneously False.
+      num_objective_evaluations: The total number of objective
+        evaluations performed.
+      position: A tensor of shape `[..., n]` containing the last argument value
+        found during the search from each starting point. If the search
+        converged, then this value is the argmin of the objective function.
+      objective_value: A tensor of shape `[...]` with the value of the
+        objective function at the `position`. If the search converged, then
+        this is the (local) minimum of the objective function.
+      objective_gradient: A tensor of shape `[..., n]` containing the gradient
+        of the objective function at the `position`. If the search converged
+        the max-norm of this tensor should be below the tolerance.
+  """
+    with tf.compat.v1.name_scope(name, "minimize", [initial_position, x_tolerance]):
+        initial_position = tf.convert_to_tensor(
+            value=initial_position, name="initial_position"
+        )
+        dtype = initial_position.dtype
+        x_data = tf.convert_to_tensor(value=x_data, dtype=dtype, name="x_data")
+        y_data = tf.convert_to_tensor(value=y_data, dtype=dtype, name="y_data")
+        x_tolerance = tf.convert_to_tensor(
+            value=x_tolerance, dtype=dtype, name="x_tolerance"
+        )
+        f_tolerance = tf.convert_to_tensor(
+            value=f_tolerance, dtype=dtype, name="f_tolerance"
+        )
+        g_tolerance = tf.convert_to_tensor(
+            value=g_tolerance, dtype=dtype, name="g_tolerance"
+        )
+        max_iterations = tf.convert_to_tensor(
+            value=max_iterations, name="max_iterations"
+        )
+        stopping_condition = stopping_condition or converged_all
+
+        f0, j0 = value_and_gradients_function(initial_position, x_data) - y_data
+        converged = tf.norm(f0, axis=-1) < f_tolerance
+
+        # Initialize the scaling factor
+        d0 = tf.convert_to_tensor([0.0])
+
+        # Initialize the step size
+        delta0 = 1000.0
+
+        initial_state = _OptimizerState(
+            converged=converged,
+            failed=tf.zeros_like(converged),  # All false.
+            num_iterations=tf.convert_to_tensor(value=0),
+            num_objective_evaluations=tf.convert_to_tensor(value=1),
+            position=initial_position,
+            objective_value=f0,
+            jacobian=j0,
+            lm_parameter=0.0,
+            scaling_factors=d0,
+            step_size=delta0,
+            reduction_ratio=0.0,
+            actual_reduction=0.0,
+            predicted_reduction=0.0,
+            prev_iteration_state=True,
+        )
+
+        def _cond(state):
+            """Continue if iterations remain and stopping condition is not met."""
+            return (state.num_iterations < max_iterations) & tf.logical_not(
+                stopping_condition(state.converged, state.failed)
+            )
+
+        def _body(state):
+            """Main optimization loop."""
+            # TODO: Main body
+            new_state = _OptimizerState(
+                converged=state.converged,
+                failed=state.failed,
+                num_iterations=state.num_iterations + 1,
+                num_objective_evaluations=state.num_objective_evaluations + 1,
+                position=state.position,
+                objective_value=state.objective_value,
+                jacobian=state.jacobian,
+                lm_parameter=state.lm_parameter,
+                scaling_factors=state.scaling_factors,
+                step_size=state.step_size,
+                reduction_ratio=state.reduction_ratio,
+                actual_reduction=state.actual_reduction,
+                predicted_reduction=state.predicted_reduction,
+                prev_iteration_state=True,
+            )
+            return (new_state,)
+
+        final_state = tf.while_loop(
+            _cond, _body, (initial_state,), parallel_iterations=parallel_iterations
+        )[0]
+
+        return OptimizerResult(
+            converged=final_state.converged,
+            failed=final_state.failed,
+            num_iterations=final_state.num_iterations,
+            num_objective_evaluations=final_state.num_objective_evaluations,
+            position=final_state.position,
+            objective_value=final_state.objective_value,
+            jacobian=final_state.jacobian,
+        )
+
+        return initial_state
+
+
+def _value_and_jacobian(value_and_gradients_function, variables, x_data, y_data):
+    """Get the value and jacobian matrix from the tensor of x and y data.
+  """
+    values = []
+    gradients = []
+
+    for idx in range(x_data.shape.as_list()[0]):
+        v, g = value_and_gradients_function(variables, x_data[idx])
+        values.append(v - y_data[idx])
+        gradients.append(g)
+
+    return tf.stack(values), tf.stack(gradients)
diff --git a/tf_quant_finance/experimental/optimizer/levenberg_marquardt_test.py b/tf_quant_finance/experimental/optimizer/levenberg_marquardt_test.py
new file mode 100644
index 000000000..69f9ecb96
--- /dev/null
+++ b/tf_quant_finance/experimental/optimizer/levenberg_marquardt_test.py
@@ -0,0 +1,89 @@
+# Lint as: python3
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for Levenberg Marquardt algorithm."""
+
+
+from functools import partial
+
+import numpy as np
+import tensorflow.compat.v2 as tf
+
+import tf_quant_finance as tff
+from tensorflow.python.framework import (
+    test_util,
+)  # pylint: disable=g-direct-tensorflow-import
+
+
+# Test functions.
+def _linear(p, x):
+    a = p[0]
+    b = p[1]
+    return a * x + b
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class LevenbergMarquardtTest(tf.test.TestCase):
+    def _check_algorithm(
+        self,
+        func=None,
+        x_data=None,
+        y_data=None,
+        start_point=None,
+        xtol=1e-4,
+        ftol=1e-4,
+        gtol=1e-4,
+        expected_position=None,
+    ):
+        """Runs algorithm on given test case and verifies result."""
+        val_grad_func = lambda p, x: tff.math.value_and_gradient(partial(func, x=x))
+        x_data = tf.constant(x_data, dtype=tf.float64)
+        y_data = tf.constant(y_data, dtype=tf.float64)
+        start_point = tf.constant(start_point, dtype=tf.float64)
+        expected_position = np.array(expected_position, dtype=np.float64)
+
+        f_call_ctr = tf.Variable(0, dtype=tf.int32)
+
+        def val_grad_func_with_counter(x):
+            with tf.compat.v1.control_dependencies(
+                [tf.compat.v1.assign_add(f_call_ctr, 1)]
+            ):
+                return val_grad_func
+
+        result = tff.math.experimental.optimizer.levenberg_marquardt_fit(
+            val_grad_func_with_counter,
+            x_data=x_data,
+            y_data=y_data,
+            initial_position=start_point,
+            x_tolerance=xtol,
+            f_tolerance=ftol,
+            g_tolerance=gtol,
+            max_iterations=1,
+        )  # TODO: Max iterations
+        self.evaluate(tf.compat.v1.global_variables_initializer())
+        result = self.evaluate(result)
+        f_call_ctr = self.evaluate(f_call_ctr)
+
+    def test_linear(self):
+        self._check_algorithm(
+            func=_linear,
+            x_data=[-1.0, 0.0, 1.0],
+            y_data=[0.0, 1.0, 2.0],
+            start_point=[100.0],
+            expected_position=[1.0, 1.0],
+        )
+
+
+if __name__ == "__main__":
+    tf.test.main()