update)
+{
+ GPU_1D_KERNEL_LOOP(n, index.size(0)) {
+ const int p = patches.size(2);
+ const int ix = index[n];
+
+ float d = patches[ix][2][0][0];
+ d = d + update[n];
+ d = (d > 20) ? 1.0 : d;
+ d = max(d, 1e-4);
+
+ for (int i=0; i poses,
+ const torch::PackedTensorAccessor32 patches,
+ const torch::PackedTensorAccessor32 intrinsics,
+ const torch::PackedTensorAccessor32 target,
+ const torch::PackedTensorAccessor32 weight,
+ const torch::PackedTensorAccessor32 lmbda,
+ const torch::PackedTensorAccessor32 ii,
+ const torch::PackedTensorAccessor32 jj,
+ const torch::PackedTensorAccessor32 kk,
+ const torch::PackedTensorAccessor32 ku,
+ torch::PackedTensorAccessor32 B,
+ torch::PackedTensorAccessor32 E,
+ torch::PackedTensorAccessor32 C,
+ torch::PackedTensorAccessor32 v,
+ torch::PackedTensorAccessor32 u, const int t0)
+{
+
+ __shared__ float fx, fy, cx, cy;
+ if (threadIdx.x == 0) {
+ fx = intrinsics[0][0];
+ fy = intrinsics[0][1];
+ cx = intrinsics[0][2];
+ cy = intrinsics[0][3];
+ }
+
+ __syncthreads();
+
+ GPU_1D_KERNEL_LOOP(n, ii.size(0)) {
+ int k = ku[n];
+ int ix = ii[n];
+ int jx = jj[n];
+ int kx = kk[n];
+
+ float ti[3] = { poses[ix][0], poses[ix][1], poses[ix][2] };
+ float tj[3] = { poses[jx][0], poses[jx][1], poses[jx][2] };
+ float qi[4] = { poses[ix][3], poses[ix][4], poses[ix][5], poses[ix][6] };
+ float qj[4] = { poses[jx][3], poses[jx][4], poses[jx][5], poses[jx][6] };
+
+ float Xi[4], Xj[4];
+ Xi[0] = (patches[kx][0][1][1] - cx) / fx;
+ Xi[1] = (patches[kx][1][1][1] - cy) / fy;
+ Xi[2] = 1.0;
+ Xi[3] = patches[kx][2][1][1];
+
+ float tij[3], qij[4];
+ relSE3(ti, qi, tj, qj, tij, qij);
+ actSE3(tij, qij, Xi, Xj);
+
+ const float X = Xj[0];
+ const float Y = Xj[1];
+ const float Z = Xj[2];
+ const float W = Xj[3];
+
+ const float d = (Z >= 0.2) ? 1.0 / Z : 0.0;
+ const float d2 = d * d;
+
+ const float x1 = fx * (X / Z) + cx;
+ const float y1 = fy * (Y / Z) + cy;
+
+ const float rx = target[n][0] - x1;
+ const float ry = target[n][1] - y1;
+
+ const bool in_bounds = (sqrt(rx*rx + ry*ry) < 128) && (Z > 0.2) &&
+ (x1 > -64) && (y1 > -64) && (x1 < 2*cx + 64) && (y1 < 2*cy + 64);
+
+ const float mask = in_bounds ? 1.0 : 0.0;
+
+ ix = ix - t0;
+ jx = jx - t0;
+
+ {
+ const float r = target[n][0] - x1;
+ const float w = mask * weight[n][0];
+
+ float Jz = fx * (tij[0] * d - tij[2] * (X * d2));
+ float Ji[6], Jj[6] = {fx*W*d, 0, fx*-X*W*d2, fx*-X*Y*d2, fx*(1+X*X*d2), fx*-Y*d};
+
+ adjSE3(tij, qij, Jj, Ji);
+
+ for (int i=0; i<6; i++) {
+ for (int j=0; j<6; j++) {
+ if (ix >= 0)
+ atomicAdd(&B[6*ix+i][6*ix+j], w * Ji[i] * Ji[j]);
+ if (jx >= 0)
+ atomicAdd(&B[6*jx+i][6*jx+j], w * Jj[i] * Jj[j]);
+ if (ix >= 0 && jx >= 0) {
+ atomicAdd(&B[6*ix+i][6*jx+j], -w * Ji[i] * Jj[j]);
+ atomicAdd(&B[6*jx+i][6*ix+j], -w * Jj[i] * Ji[j]);
+ }
+ }
+ }
+
+ for (int i=0; i<6; i++) {
+ if (ix >= 0)
+ atomicAdd(&E[6*ix+i][k], -w * Jz * Ji[i]);
+ if (jx >= 0)
+ atomicAdd(&E[6*jx+i][k], w * Jz * Jj[i]);
+ }
+
+ for (int i=0; i<6; i++) {
+ if (ix >= 0)
+ atomicAdd(&v[6*ix+i], -w * r * Ji[i]);
+ if (jx >= 0)
+ atomicAdd(&v[6*jx+i], w * r * Jj[i]);
+ }
+
+ atomicAdd(&C[k], w * Jz * Jz);
+ atomicAdd(&u[k], w * r * Jz);
+ }
+
+ {
+ const float r = target[n][1] - y1;
+ const float w = mask * weight[n][1];
+
+ float Jz = fy * (tij[1] * d - tij[2] * (Y * d2));
+ float Ji[6], Jj[6] = {0, fy*W*d, fy*-Y*W*d2, fy*(-1-Y*Y*d2), fy*(X*Y*d2), fy*X*d};
+
+ adjSE3(tij, qij, Jj, Ji);
+
+ for (int i=0; i<6; i++) {
+ for (int j=0; j<6; j++) {
+ if (ix >= 0)
+ atomicAdd(&B[6*ix+i][6*ix+j], w * Ji[i] * Ji[j]);
+ if (jx >= 0)
+ atomicAdd(&B[6*jx+i][6*jx+j], w * Jj[i] * Jj[j]);
+ if (ix >= 0 && jx >= 0) {
+ atomicAdd(&B[6*ix+i][6*jx+j], -w * Ji[i] * Jj[j]);
+ atomicAdd(&B[6*jx+i][6*ix+j], -w * Jj[i] * Ji[j]);
+ }
+ }
+ }
+
+ for (int i=0; i<6; i++) {
+ if (ix >= 0)
+ atomicAdd(&E[6*ix+i][k], -w * Jz * Ji[i]);
+ if (jx >= 0)
+ atomicAdd(&E[6*jx+i][k], w * Jz * Jj[i]);
+ }
+
+ for (int i=0; i<6; i++) {
+ if (ix >= 0)
+ atomicAdd(&v[6*ix+i], -w * r * Ji[i]);
+ if (jx >= 0)
+ atomicAdd(&v[6*jx+i], w * r * Jj[i]);
+ }
+
+ atomicAdd(&C[k], w * Jz * Jz);
+ atomicAdd(&u[k], w * r * Jz);
+ }
+ }
+}
+
+
+__global__ void reproject(
+ const torch::PackedTensorAccessor32 poses,
+ const torch::PackedTensorAccessor32 patches,
+ const torch::PackedTensorAccessor32 intrinsics,
+ const torch::PackedTensorAccessor32 ii,
+ const torch::PackedTensorAccessor32 jj,
+ const torch::PackedTensorAccessor32 kk,
+ torch::PackedTensorAccessor32 coords) {
+
+ __shared__ float fx, fy, cx, cy;
+ if (threadIdx.x == 0) {
+ fx = intrinsics[0][0];
+ fy = intrinsics[0][1];
+ cx = intrinsics[0][2];
+ cy = intrinsics[0][3];
+ }
+
+ __syncthreads();
+
+ GPU_1D_KERNEL_LOOP(n, ii.size(0)) {
+ int ix = ii[n];
+ int jx = jj[n];
+ int kx = kk[n];
+
+ float ti[3] = { poses[ix][0], poses[ix][1], poses[ix][2] };
+ float tj[3] = { poses[jx][0], poses[jx][1], poses[jx][2] };
+ float qi[4] = { poses[ix][3], poses[ix][4], poses[ix][5], poses[ix][6] };
+ float qj[4] = { poses[jx][3], poses[jx][4], poses[jx][5], poses[jx][6] };
+
+ float tij[3], qij[4];
+ relSE3(ti, qi, tj, qj, tij, qij);
+
+ float Xi[4], Xj[4];
+ for (int i=0; i cuda_ba(
+ torch::Tensor poses,
+ torch::Tensor patches,
+ torch::Tensor intrinsics,
+ torch::Tensor target,
+ torch::Tensor weight,
+ torch::Tensor lmbda,
+ torch::Tensor ii,
+ torch::Tensor jj,
+ torch::Tensor kk,
+ const int t0, const int t1, const int iterations)
+{
+
+ auto ktuple = torch::_unique(kk, true, true);
+ torch::Tensor kx = std::get<0>(ktuple);
+ torch::Tensor ku = std::get<1>(ktuple);
+
+ const int N = t1 - t0; // number of poses
+ const int M = kx.size(0); // number of patches
+ const int P = patches.size(3); // patch size
+
+ auto opts = torch::TensorOptions()
+ .dtype(torch::kFloat32).device(torch::kCUDA);
+
+ poses = poses.view({-1, 7});
+ patches = patches.view({-1,3,P,P});
+ intrinsics = intrinsics.view({-1, 4});
+
+ target = target.view({-1, 2});
+ weight = weight.view({-1, 2});
+
+ const int num = ii.size(0);
+ torch::Tensor B = torch::empty({6*N, 6*N}, opts);
+ torch::Tensor E = torch::empty({6*N, 1*M}, opts);
+ torch::Tensor C = torch::empty({M}, opts);
+
+ torch::Tensor v = torch::empty({6*N}, opts);
+ torch::Tensor u = torch::empty({1*M}, opts);
+
+ for (int itr=0; itr < iterations; itr++) {
+
+ B.zero_();
+ E.zero_();
+ C.zero_();
+ v.zero_();
+ u.zero_();
+
+ v = v.view({6*N});
+ u = u.view({1*M});
+
+ reprojection_residuals_and_hessian<<>>(
+ poses.packed_accessor32(),
+ patches.packed_accessor32(),
+ intrinsics.packed_accessor32(),
+ target.packed_accessor32(),
+ weight.packed_accessor32(),
+ lmbda.packed_accessor32(),
+ ii.packed_accessor32(),
+ jj.packed_accessor32(),
+ kk.packed_accessor32(),
+ ku.packed_accessor32(),
+ B.packed_accessor32(),
+ E.packed_accessor32(),
+ C.packed_accessor32(),
+ v.packed_accessor32(),
+ u.packed_accessor32(), t0);
+
+ v = v.view({6*N, 1});
+ u = u.view({1*M, 1});
+
+ torch::Tensor Q = 1.0 / (C + lmbda).view({1, M});
+
+ if (t1 - t0 == 0) {
+
+ torch::Tensor Qt = torch::transpose(Q, 0, 1);
+ torch::Tensor dZ = Qt * u;
+
+ dZ = dZ.view({M});
+
+ patch_retr_kernel<<>>(
+ kx.packed_accessor32(),
+ patches.packed_accessor32(),
+ dZ.packed_accessor32());
+
+ }
+
+ else {
+
+ torch::Tensor EQ = E * Q;
+ torch::Tensor Et = torch::transpose(E, 0, 1);
+ torch::Tensor Qt = torch::transpose(Q, 0, 1);
+
+ torch::Tensor S = B - torch::matmul(EQ, Et);
+ torch::Tensor y = v - torch::matmul(EQ, u);
+
+ torch::Tensor I = torch::eye(6*N, opts);
+ S += I * (1e-4 * S + 1.0);
+
+
+ torch::Tensor U = torch::linalg::cholesky(S);
+ torch::Tensor dX = torch::cholesky_solve(y, U);
+ torch::Tensor dZ = Qt * (u - torch::matmul(Et, dX));
+
+ dX = dX.view({N, 6});
+ dZ = dZ.view({M});
+
+ pose_retr_kernel<<>>(t0, t1,
+ poses.packed_accessor32(),
+ dX.packed_accessor32());
+
+ patch_retr_kernel<<>>(
+ kx.packed_accessor32(),
+ patches.packed_accessor32(),
+ dZ.packed_accessor32());
+ }
+ }
+
+ return {};
+}
+
+
+torch::Tensor cuda_reproject(
+ torch::Tensor poses,
+ torch::Tensor patches,
+ torch::Tensor intrinsics,
+ torch::Tensor ii,
+ torch::Tensor jj,
+ torch::Tensor kk)
+{
+
+ const int N = ii.size(0);
+ const int P = patches.size(3); // patch size
+
+ poses = poses.view({-1, 7});
+ patches = patches.view({-1,3,P,P});
+ intrinsics = intrinsics.view({-1, 4});
+
+ auto opts = torch::TensorOptions()
+ .dtype(torch::kFloat32).device(torch::kCUDA);
+
+ torch::Tensor coords = torch::empty({N, 2, P, P}, opts);
+
+ reproject<<>>(
+ poses.packed_accessor32(),
+ patches.packed_accessor32(),
+ intrinsics.packed_accessor32(),
+ ii.packed_accessor32(),
+ jj.packed_accessor32(),
+ kk.packed_accessor32(),
+ coords.packed_accessor32());
+
+ return coords.view({1, N, 2, P, P});
+
+}
diff --git a/third_party/dpvo_ext/fastba/ba_dpvo.cpp b/third_party/dpvo_ext/fastba/ba_dpvo.cpp
new file mode 100644
index 0000000..3f77eeb
--- /dev/null
+++ b/third_party/dpvo_ext/fastba/ba_dpvo.cpp
@@ -0,0 +1,157 @@
+#include
+#include
+#include
+#include
+#include
+
+
+std::vector cuda_ba(
+ torch::Tensor poses,
+ torch::Tensor patches,
+ torch::Tensor intrinsics,
+ torch::Tensor target,
+ torch::Tensor weight,
+ torch::Tensor lmbda,
+ torch::Tensor ii,
+ torch::Tensor jj,
+ torch::Tensor kk,
+ int t0, int t1, int iterations);
+
+
+torch::Tensor cuda_reproject(
+ torch::Tensor poses,
+ torch::Tensor patches,
+ torch::Tensor intrinsics,
+ torch::Tensor ii,
+ torch::Tensor jj,
+ torch::Tensor kk);
+
+std::vector bundle_adjust(
+ torch::Tensor poses,
+ torch::Tensor patches,
+ torch::Tensor intrinsics,
+ torch::Tensor target,
+ torch::Tensor weight,
+ torch::Tensor lmbda,
+ torch::Tensor ii,
+ torch::Tensor jj,
+ torch::Tensor kk,
+ int t0, int t1, int iterations) {
+ return cuda_ba(poses, patches, intrinsics, target, weight, lmbda, ii, jj, kk, t0, t1, iterations);
+}
+
+
+torch::Tensor reproject(
+ torch::Tensor poses,
+ torch::Tensor patches,
+ torch::Tensor intrinsics,
+ torch::Tensor ii,
+ torch::Tensor jj,
+ torch::Tensor kk) {
+ return cuda_reproject(poses, patches, intrinsics, ii, jj, kk);
+}
+
+// std::vector neighbors(torch::Tensor ii, torch::Tensor jj)
+// {
+// ii = ii.to(torch::kCPU);
+// jj = jj.to(torch::kCPU);
+// auto ii_data = ii.accessor();
+// auto jj_data = jj.accessor();
+
+// std::unordered_map> graph;
+// std::unordered_map> index;
+// for (int i=0; i < ii.size(0); i++) {
+// const long ix = ii_data[i];
+// const long jx = jj_data[i];
+// if (graph.find(ix) == graph.end()) {
+// graph[ix] = std::vector();
+// index[ix] = std::vector();
+// }
+// graph[ix].push_back(jx);
+// index[ix].push_back( i);
+// }
+
+// auto opts = torch::TensorOptions().dtype(torch::kInt64);
+// torch::Tensor ix = torch::empty({ii.size(0)}, opts);
+// torch::Tensor jx = torch::empty({jj.size(0)}, opts);
+
+// auto ix_data = ix.accessor();
+// auto jx_data = jx.accessor();
+
+// for (std::pair> element : graph) {
+// std::vector& v = graph[element.first];
+// std::vector& idx = index[element.first];
+
+// std::stable_sort(idx.begin(), idx.end(),
+// [&v](size_t i, size_t j) {return v[i] < v[j];});
+
+// ix_data[idx.front()] = -1;
+// jx_data[idx.back()] = -1;
+
+// for (int i=0; i < idx.size(); i++) {
+// ix_data[idx[i]] = (i > 0) ? idx[i-1] : -1;
+// jx_data[idx[i]] = (i < idx.size() - 1) ? idx[i+1] : -1;
+// }
+// }
+
+// ix = ix.to(torch::kCUDA);
+// jx = jx.to(torch::kCUDA);
+
+// return {ix, jx};
+// }
+
+
+std::vector neighbors(torch::Tensor ii, torch::Tensor jj)
+{
+
+ auto tup = torch::_unique(ii, true, true);
+ torch::Tensor uniq = std::get<0>(tup).to(torch::kCPU);
+ torch::Tensor perm = std::get<1>(tup).to(torch::kCPU);
+
+ jj = jj.to(torch::kCPU);
+ auto jj_accessor = jj.accessor();
+
+ auto perm_accessor = perm.accessor();
+ std::vector> index(uniq.size(0));
+ for (int i=0; i < ii.size(0); i++) {
+ index[perm_accessor[i]].push_back(i);
+ }
+
+ auto opts = torch::TensorOptions().dtype(torch::kInt64);
+ torch::Tensor ix = torch::empty({ii.size(0)}, opts);
+ torch::Tensor jx = torch::empty({ii.size(0)}, opts);
+
+ auto ix_accessor = ix.accessor();
+ auto jx_accessor = jx.accessor();
+
+ for (int i=0; i& idx = index[i];
+ std::stable_sort(idx.begin(), idx.end(),
+ [&jj_accessor](size_t i, size_t j) {return jj_accessor[i] < jj_accessor[j];});
+
+ for (int i=0; i < idx.size(); i++) {
+ ix_accessor[idx[i]] = (i > 0) ? idx[i-1] : -1;
+ jx_accessor[idx[i]] = (i < idx.size() - 1) ? idx[i+1] : -1;
+ }
+ }
+
+ // for (int i=0; i= 0) std::cout << jj_accessor[ix_accessor[i]] << " ";
+ // if (jx_accessor[i] >= 0) std::cout << jj_accessor[jx_accessor[i]] << " ";
+ // std::cout << std::endl;
+ // }
+
+ ix = ix.to(torch::kCUDA);
+ jx = jx.to(torch::kCUDA);
+
+ return {ix, jx};
+}
+
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("forward", &bundle_adjust, "bundle_adjust_dpvo forward operator");
+ m.def("neighbors", &neighbors, "temporal neighboor indicies");
+ m.def("reproject", &reproject, "temporal neighboor indicies");
+
+}
diff --git a/third_party/dpvo_ext/fastba/ba_dpvo.py b/third_party/dpvo_ext/fastba/ba_dpvo.py
new file mode 100644
index 0000000..6672773
--- /dev/null
+++ b/third_party/dpvo_ext/fastba/ba_dpvo.py
@@ -0,0 +1,20 @@
+import cuda_ba
+
+neighbors = cuda_ba.neighbors
+reproject = cuda_ba.reproject
+
+
+def bundle_adjust_dpvo(poses,
+ patches,
+ intrinsics,
+ target,
+ weight,
+ lmbda,
+ ii,
+ jj,
+ kk,
+ t0,
+ t1,
+ iterations=2):
+ return cuda_ba.forward(poses.data, patches, intrinsics, target, weight,
+ lmbda, ii, jj, kk, t0, t1, iterations)
diff --git a/third_party/dpvo_ext/lietorch/__init__.py b/third_party/dpvo_ext/lietorch/__init__.py
new file mode 100644
index 0000000..dee9cba
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/__init__.py
@@ -0,0 +1,2 @@
+__all__ = ['groups']
+from .groups import SE3, SO3, LieGroupParameter, RxSO3, Sim3, cat, stack
diff --git a/third_party/dpvo_ext/lietorch/broadcasting.py b/third_party/dpvo_ext/lietorch/broadcasting.py
new file mode 100644
index 0000000..1685201
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/broadcasting.py
@@ -0,0 +1,33 @@
+import numpy as np
+import torch
+
+
+def check_broadcastable(x, y):
+ assert len(x.shape) == len(y.shape)
+ for (n, m) in zip(x.shape[:-1], y.shape[:-1]):
+ assert n == m or n == 1 or m == 1
+
+
+def broadcast_inputs(x, y):
+ """Automatic broadcasting of missing dimensions."""
+ if y is None:
+ xs, xd = x.shape[:-1], x.shape[-1]
+ return (x.view(-1, xd).contiguous(), ), x.shape[:-1]
+
+ check_broadcastable(x, y)
+
+ xs, xd = x.shape[:-1], x.shape[-1]
+ ys, yd = y.shape[:-1], y.shape[-1]
+ out_shape = [max(n, m) for (n, m) in zip(xs, ys)]
+
+ if x.shape[:-1] == y.shape[-1]:
+ x1 = x.view(-1, xd)
+ y1 = y.view(-1, yd)
+
+ else:
+ x_expand = [m if n == 1 else 1 for (n, m) in zip(xs, ys)]
+ y_expand = [n if m == 1 else 1 for (n, m) in zip(xs, ys)]
+ x1 = x.repeat(x_expand + [1]).reshape(-1, xd).contiguous()
+ y1 = y.repeat(y_expand + [1]).reshape(-1, yd).contiguous()
+
+ return (x1, y1), tuple(out_shape)
diff --git a/third_party/dpvo_ext/lietorch/gradcheck.py b/third_party/dpvo_ext/lietorch/gradcheck.py
new file mode 100644
index 0000000..610a7ca
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/gradcheck.py
@@ -0,0 +1,674 @@
+import torch
+
+TORCH_MAJOR = int(torch.__version__.split('.')[0])
+TORCH_MINOR = int(torch.__version__.split('.')[1])
+
+from torch.types import _TensorOrTensors
+
+if TORCH_MAJOR == 1 and TORCH_MINOR < 8:
+ from torch._six import container_abcs, istuple
+else:
+ import collections.abc as container_abcs
+
+import warnings
+from itertools import product
+from typing import Callable, Iterable, List, Optional, Union
+
+import torch.testing
+from torch.overrides import is_tensor_like
+
+
+def zero_gradients(x):
+ if isinstance(x, torch.Tensor):
+ if x.grad is not None:
+ x.grad.detach_()
+ x.grad.zero_()
+ elif isinstance(x, container_abcs.Iterable):
+ for elem in x:
+ zero_gradients(elem)
+
+
+def make_jacobian(input, num_out):
+ if is_tensor_like(input):
+ if not input.is_floating_point() and not input.is_complex():
+ return None
+ if not input.requires_grad:
+ return None
+ return input.new_zeros((input.nelement(), num_out),
+ dtype=input.dtype,
+ layout=torch.strided)
+ elif isinstance(input,
+ container_abcs.Iterable) and not isinstance(input, str):
+ jacobians = list(
+ filter(lambda x: x is not None,
+ (make_jacobian(elem, num_out) for elem in input)))
+ if not jacobians:
+ return None
+ return type(input)(jacobians) # type: ignore
+ else:
+ return None
+
+
+def iter_tensors(x: Union[torch.Tensor, Iterable[torch.Tensor]],
+ only_requiring_grad: bool = False) -> Iterable[torch.Tensor]:
+ if is_tensor_like(x):
+ # mypy doesn't narrow type of `x` to torch.Tensor
+ if x.requires_grad or not only_requiring_grad: # type: ignore
+ yield x # type: ignore
+ elif isinstance(x, container_abcs.Iterable) and not isinstance(x, str):
+ for elem in x:
+ for result in iter_tensors(elem, only_requiring_grad):
+ yield result
+
+
+def get_numerical_jacobian(fn, input, target=None, eps=1e-3, grad_out=1.0):
+ """
+ input: input to `fn`
+ target: the Tensors wrt whom Jacobians are calculated (default=`input`)
+ grad_out: grad output value used to calculate gradients.
+
+ Note that `target` may not even be part of `input` to `fn`, so please be
+ **very careful** in this to not clone `target`.
+ """
+ if target is None:
+ target = input
+ output_size = fn(input).numel()
+ jacobian = make_jacobian(target, output_size)
+
+ # It's much easier to iterate over flattened lists of tensors.
+ # These are reference to the same objects in jacobian, so any changes
+ # will be reflected in it as well.
+ x_tensors = iter_tensors(target, True)
+ j_tensors = iter_tensors(jacobian)
+
+ def update_jacobians(x, idx, d, d_idx, is_mkldnn=False):
+
+ # compute_jacobian only works for pure real
+ # or pure imaginary delta
+ def compute_gradient(delta):
+ # we currently assume that the norm of delta equals eps
+ assert (delta == eps or delta == (eps * 1j))
+
+ def fn_out():
+ if not is_mkldnn:
+ # x is a view into input and so this works
+ return fn(input).clone()
+ else:
+ # convert the dense tensor back to have mkldnn layout
+ return fn([x.to_mkldnn()])
+
+ orig = x[idx].item()
+ x[idx] = orig - delta
+ outa = fn_out()
+ x[idx] = orig + delta
+ outb = fn_out()
+ x[idx] = orig
+ r = (outb - outa) / (2 * eps)
+ return r.detach().reshape(-1)
+
+ # for details on the algorithm used here, refer:
+ # Section 3.5.3 https://arxiv.org/pdf/1701.00392.pdf
+ # s = fn(z) where z = x for real valued input
+ # and z = x + yj for complex valued input
+ ds_dx = compute_gradient(eps)
+ if x.is_complex(): # C -> C, C -> R
+ ds_dy = compute_gradient(eps * 1j)
+ # conjugate wirtinger derivative
+ conj_w_d = 0.5 * (ds_dx + ds_dy * 1j)
+ # wirtinger derivative
+ w_d = 0.5 * (ds_dx - ds_dy * 1j)
+ d[d_idx] = grad_out.conjugate() * conj_w_d + grad_out * w_d.conj()
+ elif ds_dx.is_complex(): # R -> C
+ # w_d = conj_w_d = 0.5 * ds_dx
+ # dL_dz_conj = 0.5 * [grad_out.conj() * ds_dx + grad_out * ds_dx.conj()]
+ # = 0.5 * [grad_out.conj() * ds_dx + (grad_out.conj() * ds_dx).conj()]
+ # = 0.5 * 2 * real(grad_out.conj() * ds_dx)
+ # = real(grad_out.conj() * ds_dx)
+ d[d_idx] = torch.real(grad_out.conjugate() * ds_dx)
+ else: # R -> R
+ d[d_idx] = ds_dx * grad_out
+
+ # TODO: compare structure
+ for x_tensor, d_tensor in zip(x_tensors, j_tensors):
+ if x_tensor.is_sparse:
+
+ def get_stride(size):
+ dim = len(size)
+ tmp = 1
+ stride = [0] * dim
+ for i in reversed(range(dim)):
+ stride[i] = tmp
+ tmp *= size[i]
+ return stride
+
+ x_nnz = x_tensor._nnz()
+ x_size = list(x_tensor.size())
+ x_indices = x_tensor._indices().t()
+ x_values = x_tensor._values()
+ x_stride = get_stride(x_size)
+
+ # Use .data here to get around the version check
+ x_values = x_values.data
+
+ for i in range(x_nnz):
+ x_value = x_values[i]
+ for x_idx in product(*[range(m) for m in x_values.size()[1:]]):
+ indices = x_indices[i].tolist() + list(x_idx)
+ d_idx = sum(indices[k] * x_stride[k]
+ for k in range(len(x_size)))
+ update_jacobians(x_value, x_idx, d_tensor, d_idx)
+ elif x_tensor.layout == torch._mkldnn: # type: ignore
+ # Use .data here to get around the version check
+ x_tensor = x_tensor.data
+ if len(input) != 1:
+ raise ValueError(
+ 'gradcheck currently only supports functions with 1 input, but got: ',
+ len(input))
+ for d_idx, x_idx in enumerate(
+ product(*[range(m) for m in x_tensor.size()])):
+ # this is really inefficient, but without indexing implemented, there's
+ # not really a better way than converting back and forth
+ x_tensor_dense = x_tensor.to_dense()
+ update_jacobians(x_tensor_dense,
+ x_idx,
+ d_tensor,
+ d_idx,
+ is_mkldnn=True)
+ else:
+ # Use .data here to get around the version check
+ x_tensor = x_tensor.data
+ for d_idx, x_idx in enumerate(
+ product(*[range(m) for m in x_tensor.size()])):
+ update_jacobians(x_tensor, x_idx, d_tensor, d_idx)
+
+ return jacobian
+
+
+def get_analytical_jacobian(input, output, nondet_tol=0.0, grad_out=1.0):
+ # it is easier to call to_dense() on the sparse output than
+ # to modify analytical jacobian
+ if output.is_sparse:
+ raise ValueError(
+ 'Sparse output is not supported at gradcheck yet. '
+ 'Please call to_dense() on the output of fn for gradcheck.')
+ if output.layout == torch._mkldnn: # type: ignore
+ raise ValueError(
+ 'MKLDNN output is not supported at gradcheck yet. '
+ 'Please call to_dense() on the output of fn for gradcheck.')
+ diff_input_list = list(iter_tensors(input, True))
+ jacobian = make_jacobian(input, output.numel())
+ jacobian_reentrant = make_jacobian(input, output.numel())
+ grad_output = torch.zeros_like(
+ output, memory_format=torch.legacy_contiguous_format)
+ flat_grad_output = grad_output.view(-1)
+ reentrant = True
+ correct_grad_sizes = True
+ correct_grad_types = True
+
+ for i in range(flat_grad_output.numel()):
+ flat_grad_output.zero_()
+ flat_grad_output[i] = grad_out
+ for jacobian_c in (jacobian, jacobian_reentrant):
+ grads_input = torch.autograd.grad(output,
+ diff_input_list,
+ grad_output,
+ retain_graph=True,
+ allow_unused=True)
+ for jacobian_x, d_x, x in zip(jacobian_c, grads_input,
+ diff_input_list):
+ if d_x is not None and d_x.size() != x.size():
+ correct_grad_sizes = False
+ elif d_x is not None and d_x.dtype != x.dtype:
+ correct_grad_types = False
+ elif jacobian_x.numel() != 0:
+ if d_x is None:
+ jacobian_x[:, i].zero_()
+ else:
+ d_x_dense = d_x.to_dense(
+ ) if not d_x.layout == torch.strided else d_x
+ assert jacobian_x[:, i].numel() == d_x_dense.numel()
+ jacobian_x[:, i] = d_x_dense.contiguous().view(-1)
+
+ for jacobian_x, jacobian_reentrant_x in zip(jacobian, jacobian_reentrant):
+ if jacobian_x.numel() != 0 and (
+ jacobian_x - jacobian_reentrant_x).abs().max() > nondet_tol:
+ reentrant = False
+
+ return jacobian, reentrant, correct_grad_sizes, correct_grad_types
+
+
+def _as_tuple(x):
+ if TORCH_MAJOR == 1 and TORCH_MINOR < 8:
+ b_tuple = istuple(x)
+ else:
+ b_tuple = isinstance(x, tuple)
+
+ if b_tuple:
+ return x
+ elif isinstance(x, list):
+ return tuple(x)
+ else:
+ return x,
+
+
+def _differentiable_outputs(x):
+ return tuple(o for o in _as_tuple(x) if o.requires_grad)
+
+
+# Note [VarArg of Tensors]
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+# 'func' accepts a vararg of tensors, which isn't expressable in the type system at the moment.
+# If https://mypy.readthedocs.io/en/latest/additional_features.html?highlight=callable#extended-callable-types is accepted,
+# the '...' first argument of Callable can be replaced with VarArg(Tensor).
+# For now, we permit any input.
+# the '...' first argument of Callable can be replaced with VarArg(Tensor).
+# For now, we permit any input.
+
+
+def gradcheck(
+ func: Callable[
+ ..., Union[_TensorOrTensors]], # See Note [VarArg of Tensors]
+ inputs: _TensorOrTensors,
+ eps: float = 1e-6,
+ atol: float = 1e-5,
+ rtol: float = 1e-3,
+ raise_exception: bool = True,
+ check_sparse_nnz: bool = False,
+ nondet_tol: float = 0.0,
+ check_undefined_grad: bool = True,
+ check_grad_dtypes: bool = False) -> bool:
+ r"""Check gradients computed via small finite differences against analytical
+ gradients w.r.t. tensors in :attr:`inputs` that are of floating point or complex type
+ and with ``requires_grad=True``.
+
+ The check between numerical and analytical gradients uses :func:`~torch.allclose`.
+
+ For complex functions, no notion of Jacobian exists. Gradcheck verifies if the numerical and
+ analytical values of Wirtinger and Conjugate Wirtinger derivative are consistent. The gradient
+ computation is done under the assumption that the overall function has a real valued output.
+ For functions with complex output, gradcheck compares the numerical and analytical gradients
+ for two values of :attr:`grad_output`: 1 and 1j. For more details, check out
+ :ref:`complex_autograd-doc`.
+
+ .. note::
+ The default values are designed for :attr:`input` of double precision.
+ This check will likely fail if :attr:`input` is of less precision, e.g.,
+ ``FloatTensor``.
+
+ .. warning::
+ If any checked tensor in :attr:`input` has overlapping memory, i.e.,
+ different indices pointing to the same memory address (e.g., from
+ :func:`torch.expand`), this check will likely fail because the numerical
+ gradients computed by point perturbation at such indices will change
+ values at all other indices that share the same memory address.
+
+ Args:
+ func (function): a Python function that takes Tensor inputs and returns
+ a Tensor or a tuple of Tensors
+ inputs (tuple of Tensor or Tensor): inputs to the function
+ eps (float, optional): perturbation for finite differences
+ atol (float, optional): absolute tolerance
+ rtol (float, optional): relative tolerance
+ raise_exception (bool, optional): indicating whether to raise an exception if
+ the check fails. The exception gives more information about the
+ exact nature of the failure. This is helpful when debugging gradchecks.
+ check_sparse_nnz (bool, optional): if True, gradcheck allows for SparseTensor input,
+ and for any SparseTensor at input, gradcheck will perform check at nnz positions only.
+ nondet_tol (float, optional): tolerance for non-determinism. When running
+ identical inputs through the differentiation, the results must either match
+ exactly (default, 0.0) or be within this tolerance.
+ check_undefined_grad (bool, options): if True, check if undefined output grads
+ are supported and treated as zeros, for ``Tensor`` outputs.
+
+ Returns:
+ True if all differences satisfy allclose condition
+ """
+ def fail_test(msg):
+ if raise_exception:
+ raise RuntimeError(msg)
+ return False
+
+ tupled_inputs = _as_tuple(inputs)
+ if not check_sparse_nnz and any(
+ t.is_sparse for t in tupled_inputs if isinstance(t, torch.Tensor)):
+ return fail_test(
+ 'gradcheck expects all tensor inputs are dense when check_sparse_nnz is set to False.'
+ )
+
+ # Make sure that gradients are saved for at least one input
+ any_input_requiring_grad = False
+ for idx, inp in enumerate(tupled_inputs):
+ if is_tensor_like(inp) and inp.requires_grad:
+ if not (inp.dtype == torch.float64
+ or inp.dtype == torch.complex128):
+ warnings.warn(
+ f'Input #{idx} requires gradient and '
+ 'is not a double precision floating point or complex. '
+ 'This check will likely fail if all the inputs are '
+ 'not of double precision floating point or complex. ')
+ content = inp._values() if inp.is_sparse else inp
+ # TODO: To cover more problematic cases, replace stride = 0 check with
+ # "any overlap in memory" once we have a proper function to check it.
+ if content.layout is not torch._mkldnn: # type: ignore
+ if not all(
+ st > 0 or sz <= 1
+ for st, sz in zip(content.stride(), content.size())):
+ raise RuntimeError(
+ 'The {}th input has a dimension with stride 0. gradcheck only '
+ 'supports inputs that are non-overlapping to be able to '
+ 'compute the numerical gradients correctly. You should call '
+ '.contiguous on the input before passing it to gradcheck.'
+ )
+ any_input_requiring_grad = True
+ inp.retain_grad()
+ if not any_input_requiring_grad:
+ raise ValueError(
+ 'gradcheck expects at least one input tensor to require gradient, '
+ 'but none of the them have requires_grad=True.')
+
+ func_out = func(*tupled_inputs)
+ output = _differentiable_outputs(func_out)
+
+ if not output:
+ for i, o in enumerate(func_out):
+
+ def fn(input):
+ return _as_tuple(func(*input))[i]
+
+ numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
+ for n in numerical:
+ if torch.ne(n, 0).sum() > 0:
+ return fail_test(
+ 'Numerical gradient for function expected to be zero')
+ return True
+
+ for i, o in enumerate(output):
+ if not o.requires_grad:
+ continue
+
+ def fn(input):
+ return _as_tuple(func(*input))[i]
+
+ analytical, reentrant, correct_grad_sizes, correct_grad_types = get_analytical_jacobian(
+ tupled_inputs, o, nondet_tol=nondet_tol)
+ numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
+
+ return analytical, numerical
+
+ out_is_complex = o.is_complex()
+
+ if out_is_complex:
+ # analytical vjp with grad_out = 1.0j
+ analytical_with_imag_grad_out, reentrant_with_imag_grad_out, \
+ correct_grad_sizes_with_imag_grad_out, correct_grad_types_with_imag_grad_out \
+ = get_analytical_jacobian(tupled_inputs, o, nondet_tol=nondet_tol, grad_out=1j)
+ numerical_with_imag_grad_out = get_numerical_jacobian(
+ fn, tupled_inputs, eps=eps, grad_out=1j)
+
+ if not correct_grad_types and check_grad_dtypes:
+ return fail_test('Gradient has dtype mismatch')
+
+ if out_is_complex and not correct_grad_types_with_imag_grad_out and check_grad_dtypes:
+ return fail_test(
+ 'Gradient (calculated using complex valued grad output) has dtype mismatch'
+ )
+
+ if not correct_grad_sizes:
+ return fail_test('Analytical gradient has incorrect size')
+
+ if out_is_complex and not correct_grad_sizes_with_imag_grad_out:
+ return fail_test(
+ 'Analytical gradient (calculated using complex valued grad output) has incorrect size'
+ )
+
+ def checkIfNumericalAnalyticAreClose(a, n, j, error_str=''):
+ if not torch.allclose(a, n, rtol, atol):
+ return fail_test(
+ error_str +
+ 'Jacobian mismatch for output %d with respect to input %d,\n'
+ 'numerical:%s\nanalytical:%s\n' % (i, j, n, a))
+
+ inp_tensors = iter_tensors(tupled_inputs, True)
+
+ for j, (a, n, inp) in enumerate(zip(analytical, numerical,
+ inp_tensors)):
+ if a.numel() != 0 or n.numel() != 0:
+ if o.is_complex():
+ # C -> C, R -> C
+ a_with_imag_grad_out = analytical_with_imag_grad_out[j]
+ n_with_imag_grad_out = numerical_with_imag_grad_out[j]
+ checkIfNumericalAnalyticAreClose(
+ a_with_imag_grad_out, n_with_imag_grad_out, j,
+ 'Gradients failed to compare equal for grad output = 1j. '
+ )
+ if inp.is_complex():
+ # C -> R, C -> C
+ checkIfNumericalAnalyticAreClose(
+ a, n, j,
+ 'Gradients failed to compare equal for grad output = 1. '
+ )
+ else:
+ # R -> R, R -> C
+ checkIfNumericalAnalyticAreClose(a, n, j)
+
+ def not_reentrant_error(error_str=''):
+ error_msg = 'Backward' + error_str + ' is not reentrant, i.e., running backward with same \
+ input and grad_output multiple times gives different values, \
+ although analytical gradient matches numerical gradient. \
+ The tolerance for nondeterminism was {}.'.format(
+ nondet_tol)
+ return fail_test(error_msg)
+
+ if not reentrant:
+ return not_reentrant_error()
+
+ if out_is_complex and not reentrant_with_imag_grad_out:
+ return not_reentrant_error(
+ ' (calculated using complex valued grad output)')
+
+ # check if the backward multiplies by grad_output
+ output = _differentiable_outputs(func(*tupled_inputs))
+ if any([o.requires_grad for o in output]):
+ diff_input_list: List[torch.Tensor] = list(
+ iter_tensors(tupled_inputs, True))
+ if not diff_input_list:
+ raise RuntimeError('no Tensors requiring grad found in input')
+ grads_input = torch.autograd.grad(
+ output,
+ diff_input_list, [
+ torch.zeros_like(o,
+ memory_format=torch.legacy_contiguous_format)
+ for o in output
+ ],
+ allow_unused=True)
+ for gi, di in zip(grads_input, diff_input_list):
+ if gi is None:
+ continue
+ if isinstance(gi, torch.Tensor) and gi.layout != torch.strided:
+ if gi.layout != di.layout:
+ return fail_test('grad is incorrect layout (' +
+ str(gi.layout) + ' is not ' +
+ str(di.layout) + ')')
+ if gi.layout == torch.sparse_coo:
+ if gi.sparse_dim() != di.sparse_dim():
+ return fail_test(
+ 'grad is sparse tensor, but has incorrect sparse_dim'
+ )
+ if gi.dense_dim() != di.dense_dim():
+ return fail_test(
+ 'grad is sparse tensor, but has incorrect dense_dim'
+ )
+ gi = gi.to_dense()
+ di = di.to_dense()
+ if not gi.eq(0).all():
+ return fail_test('backward not multiplied by grad_output')
+ if gi.dtype != di.dtype or gi.device != di.device or gi.is_sparse != di.is_sparse:
+ return fail_test('grad is incorrect type')
+ if gi.size() != di.size():
+ return fail_test('grad is incorrect size')
+
+ if check_undefined_grad:
+
+ def warn_bc_breaking():
+ warnings.warn((
+ 'Backwards compatibility: New undefined gradient support checking '
+ 'feature is enabled by default, but it may break existing callers '
+ 'of this function. If this is true for you, you can call this '
+ 'function with "check_undefined_grad=False" to disable the feature'
+ ))
+
+ def check_undefined_grad_support(output_to_check):
+ grads_output = [
+ torch.zeros_like(
+ o, memory_format=torch.legacy_contiguous_format)
+ for o in output_to_check
+ ]
+ try:
+ grads_input = torch.autograd.grad(output_to_check,
+ diff_input_list,
+ grads_output,
+ allow_unused=True)
+ except RuntimeError:
+ warn_bc_breaking()
+ return fail_test((
+ 'Expected backward function to handle undefined output grads. '
+ 'Please look at "Notes about undefined output gradients" in '
+ '"tools/autograd/derivatives.yaml"'))
+
+ for gi, i in zip(grads_input, diff_input_list):
+ if (gi is not None) and (not gi.eq(0).all()):
+ warn_bc_breaking()
+ return fail_test((
+ 'Expected all input grads to be undefined or zero when all output grads are undefined '
+ 'or zero. Please look at "Notes about undefined output gradients" in '
+ '"tools/autograd/derivatives.yaml"'))
+ return True
+
+ # All backward functions must work properly if all output grads are undefined
+ outputs_to_check = [[
+ torch._C._functions.UndefinedGrad()(o)
+ for o in _differentiable_outputs(func(*tupled_inputs))
+ # This check filters out Tensor-likes that aren't instances of Tensor.
+ if isinstance(o, torch.Tensor)
+ ]]
+
+ # If there are multiple output grads, we should be able to undef one at a time without error
+ if len(outputs_to_check[0]) > 1:
+ for undef_grad_idx in range(len(output)):
+ output_to_check = _differentiable_outputs(
+ func(*tupled_inputs))
+ outputs_to_check.append([
+ torch._C._functions.UndefinedGrad()(o)
+ if idx == undef_grad_idx else o
+ for idx, o in enumerate(output_to_check)
+ ])
+
+ for output_to_check in outputs_to_check:
+ if not check_undefined_grad_support(output_to_check):
+ return False
+
+ return True
+
+
+def gradgradcheck(
+ func: Callable[..., _TensorOrTensors], # See Note [VarArg of Tensors]
+ inputs: _TensorOrTensors,
+ grad_outputs: Optional[_TensorOrTensors] = None,
+ eps: float = 1e-6,
+ atol: float = 1e-5,
+ rtol: float = 1e-3,
+ gen_non_contig_grad_outputs: bool = False,
+ raise_exception: bool = True,
+ nondet_tol: float = 0.0,
+ check_undefined_grad: bool = True,
+ check_grad_dtypes: bool = False) -> bool:
+ r"""Check gradients of gradients computed via small finite differences
+ against analytical gradients w.r.t. tensors in :attr:`inputs` and
+ :attr:`grad_outputs` that are of floating point or complex type and with
+ ``requires_grad=True``.
+
+ This function checks that backpropagating through the gradients computed
+ to the given :attr:`grad_outputs` are correct.
+
+ The check between numerical and analytical gradients uses :func:`~torch.allclose`.
+
+ .. note::
+ The default values are designed for :attr:`input` and
+ :attr:`grad_outputs` of double precision. This check will likely fail if
+ they are of less precision, e.g., ``FloatTensor``.
+
+ .. warning::
+ If any checked tensor in :attr:`input` and :attr:`grad_outputs` has
+ overlapping memory, i.e., different indices pointing to the same memory
+ address (e.g., from :func:`torch.expand`), this check will likely fail
+ because the numerical gradients computed by point perturbation at such
+ indices will change values at all other indices that share the same
+ memory address.
+
+ Args:
+ func (function): a Python function that takes Tensor inputs and returns
+ a Tensor or a tuple of Tensors
+ inputs (tuple of Tensor or Tensor): inputs to the function
+ grad_outputs (tuple of Tensor or Tensor, optional): The gradients with
+ respect to the function's outputs.
+ eps (float, optional): perturbation for finite differences
+ atol (float, optional): absolute tolerance
+ rtol (float, optional): relative tolerance
+ gen_non_contig_grad_outputs (bool, optional): if :attr:`grad_outputs` is
+ ``None`` and :attr:`gen_non_contig_grad_outputs` is ``True``, the
+ randomly generated gradient outputs are made to be noncontiguous
+ raise_exception (bool, optional): indicating whether to raise an exception if
+ the check fails. The exception gives more information about the
+ exact nature of the failure. This is helpful when debugging gradchecks.
+ nondet_tol (float, optional): tolerance for non-determinism. When running
+ identical inputs through the differentiation, the results must either match
+ exactly (default, 0.0) or be within this tolerance. Note that a small amount
+ of nondeterminism in the gradient will lead to larger inaccuracies in
+ the second derivative.
+ check_undefined_grad (bool, options): if True, check if undefined output grads
+ are supported and treated as zeros
+
+ Returns:
+ True if all differences satisfy allclose condition
+ """
+ tupled_inputs = _as_tuple(inputs)
+
+ if grad_outputs is None:
+ # If grad_outputs is not specified, create random Tensors of the same
+ # shape, type, and device as the outputs
+ def randn_like(x):
+ y = torch.testing.randn_like(
+ x if (x.is_floating_point() or x.is_complex()) else x.double(),
+ memory_format=torch.legacy_contiguous_format)
+ if gen_non_contig_grad_outputs:
+ y = torch.testing.make_non_contiguous(y)
+ return y.requires_grad_()
+
+ outputs = _as_tuple(func(*tupled_inputs))
+ tupled_grad_outputs = tuple(randn_like(x) for x in outputs)
+ else:
+ tupled_grad_outputs = _as_tuple(grad_outputs)
+
+ num_outputs = len(tupled_grad_outputs)
+
+ def new_func(*args):
+ input_args = args[:-num_outputs]
+ grad_outputs = args[-num_outputs:]
+ outputs = _differentiable_outputs(func(*input_args))
+ input_args = tuple(x for x in input_args
+ if isinstance(x, torch.Tensor) and x.requires_grad)
+ grad_inputs = torch.autograd.grad(outputs,
+ input_args,
+ grad_outputs,
+ create_graph=True)
+ return grad_inputs
+
+ return gradcheck(new_func,
+ tupled_inputs + tupled_grad_outputs,
+ eps,
+ atol,
+ rtol,
+ raise_exception,
+ nondet_tol=nondet_tol,
+ check_undefined_grad=check_undefined_grad,
+ check_grad_dtypes=check_grad_dtypes)
diff --git a/third_party/dpvo_ext/lietorch/group_ops.py b/third_party/dpvo_ext/lietorch/group_ops.py
new file mode 100644
index 0000000..0f19318
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/group_ops.py
@@ -0,0 +1,107 @@
+import lietorch_backends
+import torch
+import torch.nn.functional as F
+
+
+class GroupOp(torch.autograd.Function):
+ """group operation base class."""
+ @classmethod
+ def forward(cls, ctx, group_id, *inputs):
+ ctx.group_id = group_id
+ ctx.save_for_backward(*inputs)
+ out = cls.forward_op(ctx.group_id, *inputs)
+ return out
+
+ @classmethod
+ def backward(cls, ctx, grad):
+ error_str = 'Backward operation not implemented for {}'.format(cls)
+ assert cls.backward_op is not None, error_str
+
+ inputs = ctx.saved_tensors
+ grad = grad.contiguous()
+ grad_inputs = cls.backward_op(ctx.group_id, grad, *inputs)
+ return (None, ) + tuple(grad_inputs)
+
+
+class Exp(GroupOp):
+ """exponential map."""
+ forward_op, backward_op = lietorch_backends.expm, lietorch_backends.expm_backward
+
+
+class Log(GroupOp):
+ """logarithm map."""
+ forward_op, backward_op = lietorch_backends.logm, lietorch_backends.logm_backward
+
+
+class Inv(GroupOp):
+ """group inverse."""
+ forward_op, backward_op = lietorch_backends.inv, lietorch_backends.inv_backward
+
+
+class Mul(GroupOp):
+ """group multiplication."""
+ forward_op, backward_op = lietorch_backends.mul, lietorch_backends.mul_backward
+
+
+class Adj(GroupOp):
+ """adjoint operator."""
+ forward_op, backward_op = lietorch_backends.adj, lietorch_backends.adj_backward
+
+
+class AdjT(GroupOp):
+ """adjoint operator."""
+ forward_op, backward_op = lietorch_backends.adjT, lietorch_backends.adjT_backward
+
+
+class Act3(GroupOp):
+ """action on point."""
+ forward_op, backward_op = lietorch_backends.act, lietorch_backends.act_backward
+
+
+class Act4(GroupOp):
+ """action on point."""
+ forward_op, backward_op = lietorch_backends.act4, lietorch_backends.act4_backward
+
+
+class Jinv(GroupOp):
+ """adjoint operator."""
+ forward_op, backward_op = lietorch_backends.Jinv, None
+
+
+class ToMatrix(GroupOp):
+ """convert to matrix representation."""
+ forward_op, backward_op = lietorch_backends.as_matrix, None
+
+
+### conversion operations to/from Euclidean embeddings ###
+
+
+class FromVec(torch.autograd.Function):
+ """convert vector into group object."""
+ @classmethod
+ def forward(cls, ctx, group_id, *inputs):
+ ctx.group_id = group_id
+ ctx.save_for_backward(*inputs)
+ return inputs[0]
+
+ @classmethod
+ def backward(cls, ctx, grad):
+ inputs = ctx.saved_tensors
+ J = lietorch_backends.projector(ctx.group_id, *inputs)
+ return None, torch.matmul(grad.unsqueeze(-2),
+ torch.linalg.pinv(J)).squeeze(-2)
+
+
+class ToVec(torch.autograd.Function):
+ """convert group object to vector."""
+ @classmethod
+ def forward(cls, ctx, group_id, *inputs):
+ ctx.group_id = group_id
+ ctx.save_for_backward(*inputs)
+ return inputs[0]
+
+ @classmethod
+ def backward(cls, ctx, grad):
+ inputs = ctx.saved_tensors
+ J = lietorch_backends.projector(ctx.group_id, *inputs)
+ return None, torch.matmul(grad.unsqueeze(-2), J).squeeze(-2)
diff --git a/third_party/dpvo_ext/lietorch/groups.py b/third_party/dpvo_ext/lietorch/groups.py
new file mode 100644
index 0000000..bca42c0
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/groups.py
@@ -0,0 +1,328 @@
+import numpy as np
+import torch
+
+from .broadcasting import broadcast_inputs
+# group operations implemented in cuda
+from .group_ops import (Act3, Act4, Adj, AdjT, Exp, FromVec, Inv, Jinv, Log,
+ Mul, ToMatrix, ToVec)
+
+
+class LieGroupParameter(torch.Tensor):
+ """Wrapper class for LieGroup."""
+
+ from torch._C import _disabled_torch_function_impl
+ __torch_function__ = _disabled_torch_function_impl
+
+ def __new__(cls, group, requires_grad=True):
+ data = torch.zeros(group.tangent_shape,
+ device=group.data.device,
+ dtype=group.data.dtype,
+ requires_grad=True)
+
+ return torch.Tensor._make_subclass(cls, data, requires_grad)
+
+ def __init__(self, group):
+ self.group = group
+
+ def retr(self):
+ return self.group.retr(self)
+
+ def log(self):
+ return self.retr().log()
+
+ def inv(self):
+ return self.retr().inv()
+
+ def adj(self, a):
+ return self.retr().adj(a)
+
+ def __mul__(self, other):
+ if isinstance(other, LieGroupParameter):
+ return self.retr() * other.retr()
+ else:
+ return self.retr() * other
+
+ def add_(self, update, alpha):
+ self.group = self.group.exp(alpha * update) * self.group
+
+ def __getitem__(self, index):
+ return self.retr().__getitem__(index)
+
+
+class LieGroup:
+ """Base class for Lie Group."""
+ def __init__(self, data):
+ self.data = data
+
+ def __repr__(self):
+ return '{}: size={}, device={}, dtype={}'.format(
+ self.group_name, self.shape, self.device, self.dtype)
+
+ @property
+ def shape(self):
+ return self.data.shape[:-1]
+
+ @property
+ def device(self):
+ return self.data.device
+
+ @property
+ def dtype(self):
+ return self.data.dtype
+
+ def vec(self):
+ return self.apply_op(ToVec, self.data)
+
+ @property
+ def tangent_shape(self):
+ return self.data.shape[:-1] + (self.manifold_dim, )
+
+ @classmethod
+ def Identity(cls, *batch_shape, **kwargs):
+ """Construct identity element with batch shape."""
+
+ if isinstance(batch_shape[0], tuple):
+ batch_shape = batch_shape[0]
+
+ elif isinstance(batch_shape[0], list):
+ batch_shape = tuple(batch_shape[0])
+
+ numel = np.prod(batch_shape)
+ data = cls.id_elem.reshape(1, -1)
+
+ if 'device' in kwargs:
+ data = data.to(kwargs['device'])
+
+ if 'dtype' in kwargs:
+ data = data.type(kwargs['dtype'])
+
+ data = data.repeat(numel, 1)
+ return cls(data).view(batch_shape)
+
+ @classmethod
+ def IdentityLike(cls, G):
+ return cls.Identity(G.shape, device=G.data.device, dtype=G.data.dtype)
+
+ @classmethod
+ def InitFromVec(cls, data):
+ return cls(cls.apply_op(FromVec, data))
+
+ @classmethod
+ def Random(cls, *batch_shape, sigma=1.0, **kwargs):
+ """Construct random element with batch_shape by random sampling in
+ tangent space."""
+
+ if isinstance(batch_shape[0], tuple):
+ batch_shape = batch_shape[0]
+
+ elif isinstance(batch_shape[0], list):
+ batch_shape = tuple(batch_shape[0])
+
+ tangent_shape = batch_shape + (cls.manifold_dim, )
+ xi = torch.randn(tangent_shape, **kwargs)
+ return cls.exp(sigma * xi)
+
+ @classmethod
+ def apply_op(cls, op, x, y=None):
+ """Apply group operator."""
+ inputs, out_shape = broadcast_inputs(x, y)
+
+ data = op.apply(cls.group_id, *inputs)
+ return data.view(out_shape + (-1, ))
+
+ @classmethod
+ def exp(cls, x):
+ """exponential map: x -> X."""
+ return cls(cls.apply_op(Exp, x))
+
+ def quaternion(self):
+ """extract quaternion."""
+ return self.apply_op(Quat, self.data)
+
+ def log(self):
+ """logarithm map."""
+ return self.apply_op(Log, self.data)
+
+ def inv(self):
+ """group inverse."""
+ return self.__class__(self.apply_op(Inv, self.data))
+
+ def mul(self, other):
+ """group multiplication."""
+ return self.__class__(self.apply_op(Mul, self.data, other.data))
+
+ def retr(self, a):
+ """ retraction: Exp(a) * X """
+ dX = self.__class__.apply_op(Exp, a)
+ return self.__class__(self.apply_op(Mul, dX, self.data))
+
+ def adj(self, a):
+ """ adjoint operator: b = A(X) * a """
+ return self.apply_op(Adj, self.data, a)
+
+ def adjT(self, a):
+ """ transposed adjoint operator: b = a * A(X) """
+ return self.apply_op(AdjT, self.data, a)
+
+ def Jinv(self, a):
+ return self.apply_op(Jinv, self.data, a)
+
+ def act(self, p):
+ """action on a point cloud."""
+
+ # action on point
+ if p.shape[-1] == 3:
+ return self.apply_op(Act3, self.data, p)
+
+ # action on homogeneous point
+ elif p.shape[-1] == 4:
+ return self.apply_op(Act4, self.data, p)
+
+ def matrix(self):
+ """convert element to 4x4 matrix."""
+ I = torch.eye(4, dtype=self.dtype, device=self.device)
+ I = I.view([1] * (len(self.data.shape) - 1) + [4, 4])
+ return self.__class__(self.data[..., None, :]).act(I).transpose(-1, -2)
+
+ def translation(self):
+ """extract translation component."""
+ p = torch.as_tensor([0.0, 0.0, 0.0, 1.0],
+ dtype=self.dtype,
+ device=self.device)
+ p = p.view([1] * (len(self.data.shape) - 1) + [
+ 4,
+ ])
+ return self.apply_op(Act4, self.data, p)
+
+ def detach(self):
+ return self.__class__(self.data.detach())
+
+ def view(self, dims):
+ data_reshaped = self.data.view(dims + (self.embedded_dim, ))
+ return self.__class__(data_reshaped)
+
+ def __mul__(self, other):
+ # group multiplication
+
+ if isinstance(other, LieGroup):
+ return self.mul(other)
+
+ # action on point
+ elif isinstance(other, torch.Tensor):
+ return self.act(other)
+
+ def __getitem__(self, index):
+ return self.__class__(self.data[index])
+
+ def __setitem__(self, index, item):
+ self.data[index] = item.data
+
+ def to(self, *args, **kwargs):
+ return self.__class__(self.data.to(*args, **kwargs))
+
+ def cpu(self):
+ return self.__class__(self.data.cpu())
+
+ def cuda(self):
+ return self.__class__(self.data.cuda())
+
+ def float(self, device):
+ return self.__class__(self.data.float())
+
+ def double(self, device):
+ return self.__class__(self.data.double())
+
+ def unbind(self, dim=0):
+ return [self.__class__(x) for x in self.data.unbind(dim=dim)]
+
+
+class SO3(LieGroup):
+ group_name = 'SO3'
+ group_id = 1
+ manifold_dim = 3
+ embedded_dim = 4
+
+ # unit quaternion
+ id_elem = torch.as_tensor([0.0, 0.0, 0.0, 1.0])
+
+ def __init__(self, data):
+ if isinstance(data, SE3):
+ data = data.data[..., 3:7]
+
+ super(SO3, self).__init__(data)
+
+
+class RxSO3(LieGroup):
+ group_name = 'RxSO3'
+ group_id = 2
+ manifold_dim = 4
+ embedded_dim = 5
+
+ # unit quaternion
+ id_elem = torch.as_tensor([0.0, 0.0, 0.0, 1.0, 1.0])
+
+ def __init__(self, data):
+ if isinstance(data, Sim3):
+ data = data.data[..., 3:8]
+
+ super(RxSO3, self).__init__(data)
+
+
+class SE3(LieGroup):
+ group_name = 'SE3'
+ group_id = 3
+ manifold_dim = 6
+ embedded_dim = 7
+
+ # translation, unit quaternion
+ id_elem = torch.as_tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
+
+ def __init__(self, data):
+ if isinstance(data, SO3):
+ translation = torch.zeros_like(data.data[..., :3])
+ data = torch.cat([translation, data.data], -1)
+
+ super(SE3, self).__init__(data)
+
+ def scale(self, s):
+ t, q = self.data.split([3, 4], -1)
+ t = t * s.unsqueeze(-1)
+ return SE3(torch.cat([t, q], dim=-1))
+
+
+class Sim3(LieGroup):
+ group_name = 'Sim3'
+ group_id = 4
+ manifold_dim = 7
+ embedded_dim = 8
+
+ # translation, unit quaternion, scale
+ id_elem = torch.as_tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0])
+
+ def __init__(self, data):
+
+ if isinstance(data, SO3):
+ scale = torch.ones_like(SO3.data[..., :1])
+ translation = torch.zeros_like(SO3.data[..., :3])
+ data = torch.cat([translation, SO3.data, scale], -1)
+
+ elif isinstance(data, SE3):
+ scale = torch.ones_like(data.data[..., :1])
+ data = torch.cat([data.data, scale], -1)
+
+ elif isinstance(data, Sim3):
+ data = data.data
+
+ super(Sim3, self).__init__(data)
+
+
+def cat(group_list, dim):
+ """Concatenate groups along dimension."""
+ data = torch.cat([X.data for X in group_list], dim=dim)
+ return group_list[0].__class__(data)
+
+
+def stack(group_list, dim):
+ """Concatenate groups along dimension."""
+ data = torch.stack([X.data for X in group_list], dim=dim)
+ return group_list[0].__class__(data)
diff --git a/third_party/dpvo_ext/lietorch/include/common.h b/third_party/dpvo_ext/lietorch/include/common.h
new file mode 100644
index 0000000..80809ed
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/include/common.h
@@ -0,0 +1,11 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_RUNTIME_NO_MALLOC
+
+#define EPS 1e-6
+#define PI 3.14159265358979323846
+
+
+#endif
diff --git a/third_party/dpvo_ext/lietorch/include/dispatch.h b/third_party/dpvo_ext/lietorch/include/dispatch.h
new file mode 100644
index 0000000..578b292
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/include/dispatch.h
@@ -0,0 +1,47 @@
+#ifndef DISPATCH_H
+#define DISPATCH_H
+
+#include
+
+#include "so3.h"
+#include "rxso3.h"
+#include "se3.h"
+#include "sim3.h"
+
+
+#define PRIVATE_CASE_TYPE(group_index, enum_type, type, ...) \
+ case enum_type: { \
+ using scalar_t = type; \
+ switch (group_index) { \
+ case 1: { \
+ using group_t = SO3; \
+ return __VA_ARGS__(); \
+ } \
+ case 2: { \
+ using group_t = RxSO3; \
+ return __VA_ARGS__(); \
+ } \
+ case 3: { \
+ using group_t = SE3; \
+ return __VA_ARGS__(); \
+ } \
+ case 4: { \
+ using group_t = Sim3; \
+ return __VA_ARGS__(); \
+ } \
+ } \
+ } \
+
+#define DISPATCH_GROUP_AND_FLOATING_TYPES(GROUP_INDEX, TYPE, NAME, ...) \
+ [&] { \
+ const auto& the_type = TYPE; \
+ /* don't use TYPE again in case it is an expensive or side-effect op */ \
+ at::ScalarType _st = ::detail::scalar_type(the_type); \
+ switch (_st) { \
+ PRIVATE_CASE_TYPE(GROUP_INDEX, at::ScalarType::Double, double, __VA_ARGS__) \
+ PRIVATE_CASE_TYPE(GROUP_INDEX, at::ScalarType::Float, float, __VA_ARGS__) \
+ default: break; \
+ } \
+ }()
+
+#endif
diff --git a/third_party/dpvo_ext/lietorch/include/lietorch_cpu.h b/third_party/dpvo_ext/lietorch/include/lietorch_cpu.h
new file mode 100644
index 0000000..8510443
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/include/lietorch_cpu.h
@@ -0,0 +1,48 @@
+
+#ifndef LIETORCH_CPU_H_
+#define LIETORCH_CPU_H_
+
+#include
+#include
+
+
+// unary operations
+torch::Tensor exp_forward_cpu(int, torch::Tensor);
+std::vector exp_backward_cpu(int, torch::Tensor, torch::Tensor);
+
+torch::Tensor log_forward_cpu(int, torch::Tensor);
+std::vector log_backward_cpu(int, torch::Tensor, torch::Tensor);
+
+torch::Tensor inv_forward_cpu(int, torch::Tensor);
+std::vector inv_backward_cpu(int, torch::Tensor, torch::Tensor);
+
+// binary operations
+torch::Tensor mul_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector mul_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+torch::Tensor adj_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector adj_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+torch::Tensor adjT_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector adjT_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+torch::Tensor act_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector act_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+torch::Tensor act4_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector act4_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+
+// conversion operations
+// std::vector to_vec_backward_cpu(int, torch::Tensor, torch::Tensor);
+// std::vector from_vec_backward_cpu(int, torch::Tensor, torch::Tensor);
+
+// utility operations
+torch::Tensor orthogonal_projector_cpu(int, torch::Tensor);
+
+torch::Tensor as_matrix_forward_cpu(int, torch::Tensor);
+
+torch::Tensor jleft_forward_cpu(int, torch::Tensor, torch::Tensor);
+
+
+#endif
diff --git a/third_party/dpvo_ext/lietorch/include/lietorch_gpu.h b/third_party/dpvo_ext/lietorch/include/lietorch_gpu.h
new file mode 100644
index 0000000..344b629
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/include/lietorch_gpu.h
@@ -0,0 +1,48 @@
+
+#ifndef LIETORCH_GPU_H_
+#define LIETORCH_GPU_H_
+
+#include
+#include
+#include
+#include
+
+
+// unary operations
+torch::Tensor exp_forward_gpu(int, torch::Tensor);
+std::vector exp_backward_gpu(int, torch::Tensor, torch::Tensor);
+
+torch::Tensor log_forward_gpu(int, torch::Tensor);
+std::vector log_backward_gpu(int, torch::Tensor, torch::Tensor);
+
+torch::Tensor inv_forward_gpu(int, torch::Tensor);
+std::vector inv_backward_gpu(int, torch::Tensor, torch::Tensor);
+
+// binary operations
+torch::Tensor mul_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector mul_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+torch::Tensor adj_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector adj_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+torch::Tensor adjT_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector adjT_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+torch::Tensor act_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector act_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+torch::Tensor act4_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector act4_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+
+// conversion operations
+// std::vector to_vec_backward_gpu(int, torch::Tensor, torch::Tensor);
+// std::vector from_vec_backward_gpu(int, torch::Tensor, torch::Tensor);
+
+// utility operators
+torch::Tensor orthogonal_projector_gpu(int, torch::Tensor);
+
+torch::Tensor as_matrix_forward_gpu(int, torch::Tensor);
+
+torch::Tensor jleft_forward_gpu(int, torch::Tensor, torch::Tensor);
+
+#endif
diff --git a/third_party/dpvo_ext/lietorch/include/rxso3.h b/third_party/dpvo_ext/lietorch/include/rxso3.h
new file mode 100644
index 0000000..e536472
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/include/rxso3.h
@@ -0,0 +1,322 @@
+
+#ifndef RxSO3_HEADER
+#define RxSO3_HEADER
+
+#include
+#include
+#include
+
+#include "common.h"
+
+template
+class RxSO3 {
+ public:
+ const static int constexpr K = 4; // manifold dimension
+ const static int constexpr N = 5; // embedding dimension
+
+ using Vector3 = Eigen::Matrix;
+ using Vector4 = Eigen::Matrix;
+ using Matrix3 = Eigen::Matrix;
+
+ using Tangent = Eigen::Matrix;
+ using Data = Eigen::Matrix;
+
+ using Point = Eigen::Matrix;
+ using Point4 = Eigen::Matrix;
+
+ using Quaternion = Eigen::Quaternion;
+ using Transformation = Eigen::Matrix;
+ using Adjoint = Eigen::Matrix;
+
+
+ EIGEN_DEVICE_FUNC RxSO3(Quaternion const& q, Scalar const s)
+ : unit_quaternion(q), scale(s) {
+ unit_quaternion.normalize();
+ };
+
+ EIGEN_DEVICE_FUNC RxSO3(const Scalar *data) : unit_quaternion(data), scale(data[4]) {
+ unit_quaternion.normalize();
+ };
+
+ EIGEN_DEVICE_FUNC RxSO3() {
+ unit_quaternion = Quaternion::Identity();
+ scale = Scalar(1.0);
+ }
+
+ EIGEN_DEVICE_FUNC RxSO3 inv() {
+ return RxSO3(unit_quaternion.conjugate(), 1.0/scale);
+ }
+
+ EIGEN_DEVICE_FUNC Data data() const {
+ Data data_vec; data_vec << unit_quaternion.coeffs(), scale;
+ return data_vec;
+ }
+
+ EIGEN_DEVICE_FUNC RxSO3 operator*(RxSO3 const& other) {
+ return RxSO3(unit_quaternion * other.unit_quaternion, scale * other.scale);
+ }
+
+ EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
+ const Quaternion& q = unit_quaternion;
+ Point uv = q.vec().cross(p); uv += uv;
+ return scale * (p + q.w()*uv + q.vec().cross(uv));
+ }
+
+ EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
+ Point4 p1; p1 << this->operator*(p.template segment<3>(0)), p(3);
+ return p1;
+ }
+
+ EIGEN_DEVICE_FUNC Adjoint Adj() const {
+ Adjoint Ad = Adjoint::Identity();
+ Ad.template block<3,3>(0,0) = unit_quaternion.toRotationMatrix();
+ return Ad;
+ }
+
+ EIGEN_DEVICE_FUNC Transformation Matrix() const {
+ return scale * unit_quaternion.toRotationMatrix();
+ }
+
+ EIGEN_DEVICE_FUNC Eigen::Matrix Matrix4x4() const {
+ Eigen::Matrix T;
+ T = Eigen::Matrix::Identity();
+ T.template block<3,3>(0,0) = Matrix();
+ return T;
+ }
+
+ EIGEN_DEVICE_FUNC Eigen::Matrix orthogonal_projector() const {
+ // jacobian action on a point
+ Eigen::Matrix J = Eigen::Matrix::Zero();
+
+ J.template block<3,3>(0,0) = 0.5 * (
+ unit_quaternion.w() * Matrix3::Identity() +
+ SO3::hat(-unit_quaternion.vec())
+ );
+
+ J.template block<1,3>(3,0) = 0.5 * (-unit_quaternion.vec());
+
+ // scale
+ J(4,3) = scale;
+
+ return J;
+ }
+
+ EIGEN_DEVICE_FUNC Transformation Rotation() const {
+ return unit_quaternion.toRotationMatrix();
+ }
+
+ EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
+ return Adj() * a;
+ }
+
+ EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
+ return Adj().transpose() * a;
+ }
+
+ EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& phi_sigma) {
+ Vector3 const phi = phi_sigma.template segment<3>(0);
+ return SO3::hat(phi) + phi(3) * Transformation::Identity();
+ }
+
+ EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& phi_sigma) {
+ Vector3 const phi = phi_sigma.template segment<3>(0);
+ Matrix3 const Phi = SO3::hat(phi);
+
+ Adjoint ad = Adjoint::Zero();
+ ad.template block<3,3>(0,0) = Phi;
+
+ return ad;
+ }
+
+ EIGEN_DEVICE_FUNC Tangent Log() const {
+ using std::abs;
+ using std::atan;
+ using std::sqrt;
+
+ Scalar squared_n = unit_quaternion.vec().squaredNorm();
+ Scalar w = unit_quaternion.w();
+ Scalar two_atan_nbyw_by_n;
+
+ /// Atan-based log thanks to
+ ///
+ /// C. Hertzberg et al.:
+ /// "Integrating Generic Sensor Fusion Algorithms with Sound State
+ /// Representation through Encapsulation of Manifolds"
+ /// Information Fusion, 2011
+
+ if (squared_n < EPS * EPS) {
+ two_atan_nbyw_by_n = Scalar(2) / w - Scalar(2.0/3.0) * (squared_n) / (w * w * w);
+ } else {
+ Scalar n = sqrt(squared_n);
+ if (abs(w) < EPS) {
+ if (w > Scalar(0)) {
+ two_atan_nbyw_by_n = PI / n;
+ } else {
+ two_atan_nbyw_by_n = -PI / n;
+ }
+ } else {
+ two_atan_nbyw_by_n = Scalar(2) * atan(n / w) / n;
+ }
+ }
+
+ Tangent phi_sigma;
+ phi_sigma << two_atan_nbyw_by_n * unit_quaternion.vec(), log(scale);
+
+ return phi_sigma;
+ }
+
+ EIGEN_DEVICE_FUNC static RxSO3 Exp(Tangent const& phi_sigma) {
+ Vector3 phi = phi_sigma.template segment<3>(0);
+ Scalar scale = exp(phi_sigma(3));
+
+ Scalar theta2 = phi.squaredNorm();
+ Scalar theta = sqrt(theta2);
+ Scalar imag_factor;
+ Scalar real_factor;
+
+ if (theta < EPS) {
+ Scalar theta4 = theta2 * theta2;
+ imag_factor = Scalar(0.5) - Scalar(1.0/48.0) * theta2 + Scalar(1.0/3840.0) * theta4;
+ real_factor = Scalar(1) - Scalar(1.0/8.0) * theta2 + Scalar(1.0/384.0) * theta4;
+ } else {
+ imag_factor = sin(.5 * theta) / theta;
+ real_factor = cos(.5 * theta);
+ }
+
+ Quaternion q(real_factor, imag_factor*phi.x(), imag_factor*phi.y(), imag_factor*phi.z());
+ return RxSO3(q, scale);
+ }
+
+ EIGEN_DEVICE_FUNC static Matrix3 calcW(Tangent const& phi_sigma) {
+ // left jacobian
+ using std::abs;
+ Matrix3 const I = Matrix3::Identity();
+ Scalar const one(1);
+ Scalar const half(0.5);
+
+ Vector3 const phi = phi_sigma.template segment<3>(0);
+ Scalar const sigma = phi_sigma(3);
+ Scalar const theta = phi.norm();
+
+ Matrix3 const Phi = SO3::hat(phi);
+ Matrix3 const Phi2 = Phi * Phi;
+ Scalar const scale = exp(sigma);
+
+ Scalar A, B, C;
+ if (abs(sigma) < EPS) {
+ C = one;
+ if (abs(theta) < EPS) {
+ A = half;
+ B = Scalar(1. / 6.);
+ } else {
+ Scalar theta_sq = theta * theta;
+ A = (one - cos(theta)) / theta_sq;
+ B = (theta - sin(theta)) / (theta_sq * theta);
+ }
+ } else {
+ C = (scale - one) / sigma;
+ if (abs(theta) < EPS) {
+ Scalar sigma_sq = sigma * sigma;
+ A = ((sigma - one) * scale + one) / sigma_sq;
+ B = (scale * half * sigma_sq + scale - one - sigma * scale) /
+ (sigma_sq * sigma);
+ } else {
+ Scalar theta_sq = theta * theta;
+ Scalar a = scale * sin(theta);
+ Scalar b = scale * cos(theta);
+ Scalar c = theta_sq + sigma * sigma;
+ A = (a * sigma + (one - b) * theta) / (theta * c);
+ B = (C - ((b - one) * sigma + a * theta) / (c)) * one / (theta_sq);
+ }
+ }
+ return A * Phi + B * Phi2 + C * I;
+ }
+
+ EIGEN_DEVICE_FUNC static Matrix3 calcWInv(Tangent const& phi_sigma) {
+ // left jacobian inverse
+ Matrix3 const I = Matrix3::Identity();
+ Scalar const half(0.5);
+ Scalar const one(1);
+ Scalar const two(2);
+
+ Vector3 const phi = phi_sigma.template segment<3>(0);
+ Scalar const sigma = phi_sigma(3);
+ Scalar const theta = phi.norm();
+ Scalar const scale = exp(sigma);
+
+ Matrix3 const Phi = SO3::hat(phi);
+ Matrix3 const Phi2 = Phi * Phi;
+ Scalar const scale_sq = scale * scale;
+ Scalar const theta_sq = theta * theta;
+ Scalar const sin_theta = sin(theta);
+ Scalar const cos_theta = cos(theta);
+
+ Scalar a, b, c;
+ if (abs(sigma * sigma) < EPS) {
+ c = one - half * sigma;
+ a = -half;
+ if (abs(theta_sq) < EPS) {
+ b = Scalar(1. / 12.);
+ } else {
+ b = (theta * sin_theta + two * cos_theta - two) /
+ (two * theta_sq * (cos_theta - one));
+ }
+ } else {
+ Scalar const scale_cu = scale_sq * scale;
+ c = sigma / (scale - one);
+ if (abs(theta_sq) < EPS) {
+ a = (-sigma * scale + scale - one) / ((scale - one) * (scale - one));
+ b = (scale_sq * sigma - two * scale_sq + scale * sigma + two * scale) /
+ (two * scale_cu - Scalar(6) * scale_sq + Scalar(6) * scale - two);
+ } else {
+ Scalar const s_sin_theta = scale * sin_theta;
+ Scalar const s_cos_theta = scale * cos_theta;
+ a = (theta * s_cos_theta - theta - sigma * s_sin_theta) /
+ (theta * (scale_sq - two * s_cos_theta + one));
+ b = -scale *
+ (theta * s_sin_theta - theta * sin_theta + sigma * s_cos_theta -
+ scale * sigma + sigma * cos_theta - sigma) /
+ (theta_sq * (scale_cu - two * scale * s_cos_theta - scale_sq +
+ two * s_cos_theta + scale - one));
+ }
+ }
+ return a * Phi + b * Phi2 + c * I;
+ }
+
+ EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& phi_sigma) {
+ // left jacobian
+ Adjoint J = Adjoint::Identity();
+ Vector3 phi = phi_sigma.template segment<3>(0);
+ J.template block<3,3>(0,0) = SO3::left_jacobian(phi);
+ return J;
+ }
+
+ EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& phi_sigma) {
+ // left jacobian inverse
+ Adjoint Jinv = Adjoint::Identity();
+ Vector3 phi = phi_sigma.template segment<3>(0);
+ Jinv.template block<3,3>(0,0) = SO3::left_jacobian_inverse(phi);
+ return Jinv;
+ }
+
+ EIGEN_DEVICE_FUNC static Eigen::Matrix act_jacobian(Point const& p) {
+ // jacobian action on a point
+ Eigen::Matrix Ja;
+ Ja << SO3::hat(-p), p;
+ return Ja;
+ }
+
+ EIGEN_DEVICE_FUNC static Eigen::Matrix act4_jacobian(Point4 const& p) {
+ // jacobian action on a point
+ Eigen::Matrix J = Eigen::Matrix::Zero();
+ J.template block<3,3>(0,0) = SO3::hat(-p.template segment<3>(0));
+ J.template block<3,1>(0,3) = p.template segment<3>(0);
+ return J;
+ }
+
+ private:
+ Quaternion unit_quaternion;
+ Scalar scale;
+};
+
+#endif
diff --git a/third_party/dpvo_ext/lietorch/include/se3.h b/third_party/dpvo_ext/lietorch/include/se3.h
new file mode 100644
index 0000000..c5bca16
--- /dev/null
+++ b/third_party/dpvo_ext/lietorch/include/se3.h
@@ -0,0 +1,228 @@
+
+#ifndef SE3_HEADER
+#define SE3_HEADER
+
+#include
+#include
+#include
+
+#include "common.h"
+#include "so3.h"
+
+
+template
+class SE3 {
+ public:
+ const static int constexpr K = 6; // manifold dimension
+ const static int constexpr N = 7; // embedding dimension
+
+ using Vector3 = Eigen::Matrix;
+ using Vector4 = Eigen::Matrix;
+ using Matrix3 = Eigen::Matrix