From 4b980f1a0db06d8638d637e5c492128f5c0b0b5d Mon Sep 17 00:00:00 2001
From: Jeffrey <jeffrey@newthinker.net>
Date: Wed, 11 Sep 2024 14:42:08 +0800
Subject: [PATCH 1/2] Use bilinear interpolation in refine_edges()

A fix in refine_edges(): Use continuous coordinates
and bilinear interpolation (instead of discrete
coordinates) to fetch grayscale values from the input
image.

Using discrete coordinates and pixel values, as done
in the previous versions before this commit, might
incur errors caused by rounding effects, resulting
in instability. While in this commit, the bilinear
interpolation is employed, which doesn't change the
*average output* of refine_edges(), but just makes
the output more smooth and stable.
---
 apriltag.c | 47 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 11 deletions(-)

diff --git a/apriltag.c b/apriltag.c
index a513cb98..e36b1d99 100644
--- a/apriltag.c
+++ b/apriltag.c
@@ -789,10 +789,18 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu
             // search on another pixel in the first place. Likewise,
             // for very small tags, we don't want the range to be too
             // big.
-            double range = td->quad_decimate + 1;
+
+            int range = td->quad_decimate + 1;
+
+            // To reduce the overhead of bilinear interpolation, we can
+            // reduce the number of steps per unit.
+            int steps_per_unit = 4;
+            double step_length = 1.0 / steps_per_unit;
+            int max_steps = 2 * steps_per_unit * range + 1;
 
             // XXX tunable step size.
-            for (double n = -range; n <= range; n +=  0.25) {
+            for (int step = 0; step < max_steps; ++step) {
+                double n = -range + step_length * step;
                 // Because of the guaranteed winding order of the
                 // points in the quad, we will start inside the white
                 // portion of the quad and work our way outward.
@@ -802,19 +810,36 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu
                 // gradient more precisely, but are more sensitive to
                 // noise.
                 double grange = 1;
-                int x1 = x0 + (n + grange)*nx;
-                int y1 = y0 + (n + grange)*ny;
-                if (x1 < 0 || x1 >= im_orig->width || y1 < 0 || y1 >= im_orig->height)
-                    continue;
 
-                int x2 = x0 + (n - grange)*nx;
-                int y2 = y0 + (n - grange)*ny;
-                if (x2 < 0 || x2 >= im_orig->width || y2 < 0 || y2 >= im_orig->height)
+                double x1 = x0 + (n + grange)*nx;
+                double y1 = y0 + (n + grange)*ny;
+                double x1i_d, y1i_d, a1, b1;
+                a1 = modf(x1, &x1i_d);
+                b1 = modf(y1, &y1i_d);
+                int x1i = x1i_d, y1i = y1i_d;
+
+                if (x1i < 0 || x1i + 1 >= im_orig->width || y1i < 0 || y1i + 1 >= im_orig->height)
                     continue;
 
-                int g1 = im_orig->buf[y1*im_orig->stride + x1];
-                int g2 = im_orig->buf[y2*im_orig->stride + x2];
+                double x2 = x0 + (n - grange)*nx;
+                double y2 = y0 + (n - grange)*ny;
+                double x2i_d, y2i_d, a2, b2;
+                a2 = modf(x2, &x2i_d);
+                b2 = modf(y2, &y2i_d);
+                int x2i = x2i_d, y2i = y2i_d;
+
+                if (x2i < 0 || x2i + 1 >= im_orig->width || y2i < 0 || y2i + 1 >= im_orig->height)
+                    continue;
 
+                // interpolate
+                double g1 = (1 - a1) * (1 - b1) * im_orig->buf[y1i*im_orig->stride + x1i] +
+                                  a1 * (1 - b1) * im_orig->buf[y1i*im_orig->stride + x1i + 1] +
+                            (1 - a1) *    b1    * im_orig->buf[(y1i + 1)*im_orig->stride + x1i] +
+                                  a1 *    b1    * im_orig->buf[(y1i + 1)*im_orig->stride + x1i + 1];
+                double g2 = (1 - a2) * (1 - b2) * im_orig->buf[y2i*im_orig->stride + x2i] +
+                                  a2 * (1 - b2) * im_orig->buf[y2i*im_orig->stride + x2i + 1] +
+                            (1 - a2) *    b2    * im_orig->buf[(y2i + 1)*im_orig->stride + x2i] +
+                                  a2 *    b2    * im_orig->buf[(y2i + 1)*im_orig->stride + x2i + 1];
                 if (g1 < g2) // reject points whose gradient is "backwards". They can only hurt us.
                     continue;
 

From cb522aec14941f284e022a2a80f2fa5eda6f6fa3 Mon Sep 17 00:00:00 2001
From: Jeffrey <jeffrey@newthinker.net>
Date: Wed, 11 Sep 2024 14:53:23 +0800
Subject: [PATCH 2/2] Fix the inconsistent image coordinate conventions.

The convention that (0,0) be the left top corner of
the first pixel is adopted in this commit.

In the previous versions, the coordinate convention
used for `quad_decimate = 1, refine_edges = false` is
already `left-top-corner = (0,0)`;
while for `quad_decimate > 1` or `refine_edges = true`,
the convention is vague since it seems mixed conventions
are used and some code even be wrong (see
https://github.com/AprilRobotics/apriltag/issues/345).

This fix ensures the same convention be used no matter
what values for `quad_decimate` and `refine_edges`.
---
 apriltag.c                | 18 +++++++-----------
 apriltag_detect.docstring |  4 +++-
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/apriltag.c b/apriltag.c
index e36b1d99..275a4aa6 100644
--- a/apriltag.c
+++ b/apriltag.c
@@ -797,6 +797,7 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu
             int steps_per_unit = 4;
             double step_length = 1.0 / steps_per_unit;
             int max_steps = 2 * steps_per_unit * range + 1;
+            double delta = 0.5;
 
             // XXX tunable step size.
             for (int step = 0; step < max_steps; ++step) {
@@ -811,8 +812,8 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu
                 // noise.
                 double grange = 1;
 
-                double x1 = x0 + (n + grange)*nx;
-                double y1 = y0 + (n + grange)*ny;
+                double x1 = x0 + (n + grange)*nx - delta;
+                double y1 = y0 + (n + grange)*ny - delta;
                 double x1i_d, y1i_d, a1, b1;
                 a1 = modf(x1, &x1i_d);
                 b1 = modf(y1, &y1i_d);
@@ -821,8 +822,8 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu
                 if (x1i < 0 || x1i + 1 >= im_orig->width || y1i < 0 || y1i + 1 >= im_orig->height)
                     continue;
 
-                double x2 = x0 + (n - grange)*nx;
-                double y2 = y0 + (n - grange)*ny;
+                double x2 = x0 + (n - grange)*nx - delta;
+                double y2 = y0 + (n - grange)*ny - delta;
                 double x2i_d, y2i_d, a2, b2;
                 a2 = modf(x2, &x2i_d);
                 b2 = modf(y2, &y2i_d);
@@ -1115,13 +1116,8 @@ zarray_t *apriltag_detector_detect(apriltag_detector_t *td, image_u8_t *im_orig)
             zarray_get_volatile(quads, i, &q);
 
             for (int j = 0; j < 4; j++) {
-                if (td->quad_decimate == 1.5) {
-                    q->p[j][0] *= td->quad_decimate;
-                    q->p[j][1] *= td->quad_decimate;
-                } else {
-                    q->p[j][0] = (q->p[j][0] - 0.5)*td->quad_decimate + 0.5;
-                    q->p[j][1] = (q->p[j][1] - 0.5)*td->quad_decimate + 0.5;
-                }
+                q->p[j][0] *= td->quad_decimate;
+                q->p[j][1] *= td->quad_decimate;
             }
         }
     }
diff --git a/apriltag_detect.docstring b/apriltag_detect.docstring
index 0f9bca59..6f455f09 100644
--- a/apriltag_detect.docstring
+++ b/apriltag_detect.docstring
@@ -42,7 +42,9 @@ a tuple containing the detections. Each detection is a dict with keys:
 
 - id: integer identifying each detected tag
 
-- center: pixel coordinates of the center of each detection
+- center: pixel coordinates of the center of each detection.  NOTE: Please be
+  cautious regarding the image coordinate convention. Here, we define (0,0) as
+  the left-top corner (not the center point) of the left-top-most pixel.
 
 - lb-rb-rt-lt: pixel coordinates of the 4 corners of each detection. The order
   is left-bottom, right-bottom, right-top, left-top