Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up and test drop rate logic #120

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ C = $(wildcard *.c) $(wildcard *.cpp)
INCLUDES = -I/usr/local/include -I.
LIBS = -L/usr/local/lib

tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o pmtiles_file.o plugin.o read_json.o write_json.o geobuf.o flatgeobuf.o evaluator.o geocsv.o csv.o geojson-loop.o json_logger.o visvalingam.o compression.o
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o drop.o text.o dirtiles.o pmtiles_file.o plugin.o read_json.o write_json.o geobuf.o flatgeobuf.o evaluator.o geocsv.o csv.o geojson-loop.o json_logger.o visvalingam.o compression.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread

tippecanoe-enumerate: enumerate.o
Expand All @@ -72,7 +72,7 @@ tile-join: tile-join.o projection.o mbtiles.o mvt.o memfile.o dirtiles.o jsonpul
tippecanoe-json-tool: jsontool.o jsonpull/jsonpull.o csv.o text.o geojson-loop.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread

unit: unit.o text.o
unit: unit.o text.o drop.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread

-include $(wildcard *.d)
Expand Down
87 changes: 87 additions & 0 deletions drop.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include <cmath>
#include "drop.hpp"
#include "options.hpp"
#include "geometry.hpp"

unsigned long long preserve_point_density_threshold = 0;

int calc_feature_minzoom(struct index *ix, struct drop_state ds[], int maxzoom, double gamma) {
int feature_minzoom = 0;

if (gamma >= 0 && (ix->t == VT_POINT ||
(additional[A_LINE_DROP] && ix->t == VT_LINE) ||
(additional[A_POLYGON_DROP] && ix->t == VT_POLYGON))) {
for (ssize_t i = 0; i <= maxzoom; i++) {
// This zoom level is now lighter on features
ds[i].error -= 1.0;
}

ssize_t chosen = maxzoom + 1;
for (ssize_t i = 0; i <= maxzoom; i++) {
if (ds[i].error < 0) {
// this zoom level is too light, so it is time to emit a feature.
feature_minzoom = i;

// this feature now appears in this zoom level and all higher zoom levels,
// so each of them has this feature as its last feature, and each of them
// is now one feature (which has a weight of `interval`) heavier than before.
for (ssize_t j = i; j <= maxzoom; j++) {
ds[j].previndex = ix->ix;
ds[j].error += ds[j].interval;
}

chosen = i;
break;
}
}

// If this feature has been chosen only for a high zoom level,
// check whether at a low zoom level it is nevertheless too far
// from the last feature chosen for that low zoom, in which case
// we will go ahead and push it out.

if (preserve_point_density_threshold > 0) {
for (ssize_t i = 0; i < chosen && i < maxzoom; i++) {
if (ix->ix - ds[i].previndex > ((1LL << (32 - i)) / preserve_point_density_threshold) * ((1LL << (32 - i)) / preserve_point_density_threshold)) {
feature_minzoom = i;

// this feature now appears in this zoom level and all higher zoom levels
// (below `chosen`, beyond which were already credited with this feature)
// so each of them has this feature as its last feature, and each of them
// is now one feature (which has a weight of `interval`) heavier than before.
for (ssize_t j = i; j < chosen; j++) {
ds[j].previndex = ix->ix;
ds[j].error += ds[j].interval;
}

break;
}
}
}
}

return feature_minzoom;
}

void prep_drop_states(struct drop_state ds[], int maxzoom, int basezoom, double droprate) {
// Needs to be signed for interval calculation
for (ssize_t i = 0; i <= maxzoom; i++) {
ds[i].previndex = 0;
ds[i].interval = 1; // every feature appears in every zoom level at or above the basezoom

if (i < basezoom) {
// at zoom levels below the basezoom, the fraction of points that are dropped is
// the drop rate to the power of the number of zooms this zoom is below the basezoom
//
// for example:
// basezoom: 1 (droprate ^ 0)
// basezoom - 1: 2.5 (droprate ^ 1)
// basezoom - 2: 6.25 (droprate ^ 2)
// ...
// basezoom - n: (droprate ^ n)
ds[i].interval = std::pow(droprate, basezoom - i);
}

ds[i].error = 0;
}
}
65 changes: 65 additions & 0 deletions drop.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef DROP_HPP
#define DROP_HPP

// As features are read during the input phase, each one is represented by
// an index entry giving its geometry type, its spatial index, and the location
// in the geometry file of the rest of its data.
//
// Note that the fields are in a specific order so that `segment` and `t` will
// packed together with `seq` so that the total structure size will be only 32 bytes
// instead of 40. (Could we save a few more, perhaps, by tracking `len` instead of
// `end` and limiting the size of individual features to 2^32 bytes?)

struct index {
// first and last+1 byte of the feature in the geometry temp file
long long start = 0;
long long end = 0;

// z-index or hilbert index of the feature
unsigned long long ix = 0;

// which thread's geometry temp file this feature is in
short segment = 0;

// geometry type
unsigned short t : 2;

// sequence number (sometimes with gaps in numbering) of the feature in the original input file
unsigned long long seq : (64 - 16 - 2); // pack with segment and t to stay in 32 bytes

index()
: t(0),
seq(0) {
}
};

// Each zoom level has a drop_state that is used to account for the fraction of
// point features that are supposed to be dropped in that zoom level. As it goes
// through the spatially-sorted features, it is basically doing a diffusion dither
// to keep the density of features in each vicinity at each zoom level
// approximately correct by including or excluding individual features
// to maintain the balance.

struct drop_state {
// the z-index or hilbert index of the last feature that was placed in this zoom level
unsigned long long previndex;

// the preservation rate (1 or more) for features in this zoom level.
// 1 would be to keep all the features; 2 would drop every other feature;
// 4 every fourth feature, and so on.
double interval;

// the current accumulated error in this zoom level:
// positive if too many features have been dropped;
// negative if not enough features have been dropped.
//
// this is floating-point because the interval is.
double error;
};

extern unsigned long long preserve_point_density_threshold;

int calc_feature_minzoom(struct index *ix, struct drop_state ds[], int maxzoom, double gamma);
void prep_drop_states(struct drop_state ds[], int maxzoom, int basezoom, double droprate);

#endif
85 changes: 6 additions & 79 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#include "text.hpp"
#include "errors.hpp"
#include "read_json.hpp"
#include "drop.hpp"

static int low_detail = 12;
static int full_detail = -1;
Expand All @@ -90,7 +91,6 @@ size_t limit_tile_feature_count = 0;
size_t limit_tile_feature_count_at_maxzoom = 0;
unsigned int drop_denser = 0;
std::map<std::string, serial_val> set_attributes;
unsigned long long preserve_point_density_threshold = 0;

std::vector<order_field> order_by;
bool order_reverse;
Expand Down Expand Up @@ -274,13 +274,6 @@ static void insert(struct mergelist *m, struct mergelist **head, unsigned char *
*head = m;
}

struct drop_state {
double gap;
unsigned long long previndex;
double interval;
double seq; // floating point because interval is
};

struct drop_densest {
unsigned long long gap;
size_t seq;
Expand All @@ -291,61 +284,6 @@ struct drop_densest {
}
};

int calc_feature_minzoom(struct index *ix, struct drop_state *ds, int maxzoom, double gamma) {
int feature_minzoom = 0;

if (gamma >= 0 && (ix->t == VT_POINT ||
(additional[A_LINE_DROP] && ix->t == VT_LINE) ||
(additional[A_POLYGON_DROP] && ix->t == VT_POLYGON))) {
for (ssize_t i = maxzoom; i >= 0; i--) {
ds[i].seq++;
}
ssize_t chosen = maxzoom + 1;
for (ssize_t i = maxzoom; i >= 0; i--) {
if (ds[i].seq < 0) {
feature_minzoom = i + 1;

// The feature we are pushing out
// appears in zooms i + 1 through maxzoom,
// so track where that was so we can make sure
// not to cluster something else that is *too*
// far away into it.
for (ssize_t j = i + 1; j <= maxzoom; j++) {
ds[j].previndex = ix->ix;
}

chosen = i + 1;
break;
} else {
ds[i].seq -= ds[i].interval;
}
}

// If this feature has been chosen only for a high zoom level,
// check whether at a low zoom level it is nevertheless too far
// from the last feature chosen for that low zoom, in which case
// we will go ahead and push it out.

if (preserve_point_density_threshold > 0) {
for (ssize_t i = 0; i < chosen && i < maxzoom; i++) {
if (ix->ix - ds[i].previndex > ((1LL << (32 - i)) / preserve_point_density_threshold) * ((1LL << (32 - i)) / preserve_point_density_threshold)) {
feature_minzoom = i;

for (ssize_t j = i; j <= maxzoom; j++) {
ds[j].previndex = ix->ix;
}

break;
}
}
}

// XXX manage_gap
}

return feature_minzoom;
}

static void merge(struct mergelist *merges, size_t nmerges, unsigned char *map, FILE *indexfile, int bytes, char *geom_map, FILE *geom_out, std::atomic<long long> *geompos, long long *progress, long long *progress_max, long long *progress_reported, int maxzoom, double gamma, struct drop_state *ds) {
struct mergelist *head = NULL;

Expand Down Expand Up @@ -1047,21 +985,6 @@ void radix1(int *geomfds_in, int *indexfds_in, int inputs, int prefix, int split
}
}

void prep_drop_states(struct drop_state *ds, int maxzoom, int basezoom, double droprate) {
// Needs to be signed for interval calculation
for (ssize_t i = 0; i <= maxzoom; i++) {
ds[i].gap = 0;
ds[i].previndex = 0;
ds[i].interval = 0;

if (i < basezoom) {
ds[i].interval = std::exp(std::log(droprate) * (basezoom - i));
}

ds[i].seq = 0;
}
}

static size_t calc_memsize() {
size_t mem;

Expand Down Expand Up @@ -1133,7 +1056,11 @@ void radix(std::vector<struct reader> &readers, int nreaders, FILE *geomfile, FI
}

struct drop_state ds[maxzoom + 1];
prep_drop_states(ds, maxzoom, basezoom, droprate);
if (maxzoom < 0 || droprate <= 0) { // not guessed with -zg yet
prep_drop_states(ds, 0, 0, 1);
} else {
prep_drop_states(ds, maxzoom, basezoom, droprate);
}

long long progress = 0, progress_max = geom_total, progress_reported = -1;
long long availfiles_before = availfiles;
Expand Down
14 changes: 0 additions & 14 deletions main.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,6 @@
#include "json_logger.hpp"
#include "serial.hpp"

struct index {
long long start = 0;
long long end = 0;
unsigned long long ix = 0;
short segment = 0;
unsigned short t : 2;
unsigned long long seq : (64 - 18); // pack with segment and t to stay in 32 bytes

index()
: t(0),
seq(0) {
}
};

struct clipbbox {
double lon1;
double lat1;
Expand Down
1 change: 1 addition & 0 deletions serial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <sys/stat.h>
#include "geometry.hpp"
#include "mbtiles.hpp"
#include "drop.hpp" // for struct index
#include "jsonpull/jsonpull.h"

size_t fwrite_check(const void *ptr, size_t size, size_t nitems, FILE *stream, std::atomic<long long> *fpos, const char *fname);
Expand Down
Loading