From 4b980f1a0db06d8638d637e5c492128f5c0b0b5d Mon Sep 17 00:00:00 2001 From: Jeffrey Date: Wed, 11 Sep 2024 14:42:08 +0800 Subject: [PATCH 1/2] Use bilinear interpolation in refine_edges() A fix in refine_edges(): Use continuous coordinates and bilinear interpolation (instead of discrete coordinates) to fetch grayscale values from the input image. Using discrete coordinates and pixel values, as done in the previous versions before this commit, might incur errors caused by rounding effects, resulting in instability. While in this commit, the bilinear interpolation is employed, which doesn't change the *average output* of refine_edges(), but just makes the output more smooth and stable. --- apriltag.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/apriltag.c b/apriltag.c index a513cb98..e36b1d99 100644 --- a/apriltag.c +++ b/apriltag.c @@ -789,10 +789,18 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu // search on another pixel in the first place. Likewise, // for very small tags, we don't want the range to be too // big. - double range = td->quad_decimate + 1; + + int range = td->quad_decimate + 1; + + // To reduce the overhead of bilinear interpolation, we can + // reduce the number of steps per unit. + int steps_per_unit = 4; + double step_length = 1.0 / steps_per_unit; + int max_steps = 2 * steps_per_unit * range + 1; // XXX tunable step size. - for (double n = -range; n <= range; n += 0.25) { + for (int step = 0; step < max_steps; ++step) { + double n = -range + step_length * step; // Because of the guaranteed winding order of the // points in the quad, we will start inside the white // portion of the quad and work our way outward. @@ -802,19 +810,36 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu // gradient more precisely, but are more sensitive to // noise. double grange = 1; - int x1 = x0 + (n + grange)*nx; - int y1 = y0 + (n + grange)*ny; - if (x1 < 0 || x1 >= im_orig->width || y1 < 0 || y1 >= im_orig->height) - continue; - int x2 = x0 + (n - grange)*nx; - int y2 = y0 + (n - grange)*ny; - if (x2 < 0 || x2 >= im_orig->width || y2 < 0 || y2 >= im_orig->height) + double x1 = x0 + (n + grange)*nx; + double y1 = y0 + (n + grange)*ny; + double x1i_d, y1i_d, a1, b1; + a1 = modf(x1, &x1i_d); + b1 = modf(y1, &y1i_d); + int x1i = x1i_d, y1i = y1i_d; + + if (x1i < 0 || x1i + 1 >= im_orig->width || y1i < 0 || y1i + 1 >= im_orig->height) continue; - int g1 = im_orig->buf[y1*im_orig->stride + x1]; - int g2 = im_orig->buf[y2*im_orig->stride + x2]; + double x2 = x0 + (n - grange)*nx; + double y2 = y0 + (n - grange)*ny; + double x2i_d, y2i_d, a2, b2; + a2 = modf(x2, &x2i_d); + b2 = modf(y2, &y2i_d); + int x2i = x2i_d, y2i = y2i_d; + + if (x2i < 0 || x2i + 1 >= im_orig->width || y2i < 0 || y2i + 1 >= im_orig->height) + continue; + // interpolate + double g1 = (1 - a1) * (1 - b1) * im_orig->buf[y1i*im_orig->stride + x1i] + + a1 * (1 - b1) * im_orig->buf[y1i*im_orig->stride + x1i + 1] + + (1 - a1) * b1 * im_orig->buf[(y1i + 1)*im_orig->stride + x1i] + + a1 * b1 * im_orig->buf[(y1i + 1)*im_orig->stride + x1i + 1]; + double g2 = (1 - a2) * (1 - b2) * im_orig->buf[y2i*im_orig->stride + x2i] + + a2 * (1 - b2) * im_orig->buf[y2i*im_orig->stride + x2i + 1] + + (1 - a2) * b2 * im_orig->buf[(y2i + 1)*im_orig->stride + x2i] + + a2 * b2 * im_orig->buf[(y2i + 1)*im_orig->stride + x2i + 1]; if (g1 < g2) // reject points whose gradient is "backwards". They can only hurt us. continue; From cb522aec14941f284e022a2a80f2fa5eda6f6fa3 Mon Sep 17 00:00:00 2001 From: Jeffrey Date: Wed, 11 Sep 2024 14:53:23 +0800 Subject: [PATCH 2/2] Fix the inconsistent image coordinate conventions. The convention that (0,0) be the left top corner of the first pixel is adopted in this commit. In the previous versions, the coordinate convention used for `quad_decimate = 1, refine_edges = false` is already `left-top-corner = (0,0)`; while for `quad_decimate > 1` or `refine_edges = true`, the convention is vague since it seems mixed conventions are used and some code even be wrong (see https://github.com/AprilRobotics/apriltag/issues/345). This fix ensures the same convention be used no matter what values for `quad_decimate` and `refine_edges`. --- apriltag.c | 18 +++++++----------- apriltag_detect.docstring | 4 +++- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/apriltag.c b/apriltag.c index e36b1d99..275a4aa6 100644 --- a/apriltag.c +++ b/apriltag.c @@ -797,6 +797,7 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu int steps_per_unit = 4; double step_length = 1.0 / steps_per_unit; int max_steps = 2 * steps_per_unit * range + 1; + double delta = 0.5; // XXX tunable step size. for (int step = 0; step < max_steps; ++step) { @@ -811,8 +812,8 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu // noise. double grange = 1; - double x1 = x0 + (n + grange)*nx; - double y1 = y0 + (n + grange)*ny; + double x1 = x0 + (n + grange)*nx - delta; + double y1 = y0 + (n + grange)*ny - delta; double x1i_d, y1i_d, a1, b1; a1 = modf(x1, &x1i_d); b1 = modf(y1, &y1i_d); @@ -821,8 +822,8 @@ static void refine_edges(apriltag_detector_t *td, image_u8_t *im_orig, struct qu if (x1i < 0 || x1i + 1 >= im_orig->width || y1i < 0 || y1i + 1 >= im_orig->height) continue; - double x2 = x0 + (n - grange)*nx; - double y2 = y0 + (n - grange)*ny; + double x2 = x0 + (n - grange)*nx - delta; + double y2 = y0 + (n - grange)*ny - delta; double x2i_d, y2i_d, a2, b2; a2 = modf(x2, &x2i_d); b2 = modf(y2, &y2i_d); @@ -1115,13 +1116,8 @@ zarray_t *apriltag_detector_detect(apriltag_detector_t *td, image_u8_t *im_orig) zarray_get_volatile(quads, i, &q); for (int j = 0; j < 4; j++) { - if (td->quad_decimate == 1.5) { - q->p[j][0] *= td->quad_decimate; - q->p[j][1] *= td->quad_decimate; - } else { - q->p[j][0] = (q->p[j][0] - 0.5)*td->quad_decimate + 0.5; - q->p[j][1] = (q->p[j][1] - 0.5)*td->quad_decimate + 0.5; - } + q->p[j][0] *= td->quad_decimate; + q->p[j][1] *= td->quad_decimate; } } } diff --git a/apriltag_detect.docstring b/apriltag_detect.docstring index 0f9bca59..6f455f09 100644 --- a/apriltag_detect.docstring +++ b/apriltag_detect.docstring @@ -42,7 +42,9 @@ a tuple containing the detections. Each detection is a dict with keys: - id: integer identifying each detected tag -- center: pixel coordinates of the center of each detection +- center: pixel coordinates of the center of each detection. NOTE: Please be + cautious regarding the image coordinate convention. Here, we define (0,0) as + the left-top corner (not the center point) of the left-top-most pixel. - lb-rb-rt-lt: pixel coordinates of the 4 corners of each detection. The order is left-bottom, right-bottom, right-top, left-top