Skip to content

Commit

Permalink
Merge pull request #1525 from pelias/negative-sources-and-layers
Browse files Browse the repository at this point in the history
Negative sources and layers
  • Loading branch information
orangejulius authored Jul 1, 2021
2 parents 18ff47b + 3b0a224 commit 07fc584
Show file tree
Hide file tree
Showing 6 changed files with 289 additions and 46 deletions.
6 changes: 6 additions & 0 deletions middleware/geocodeJSON.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ function convertToGeocodeJSON(req, res, next, opts) {
delete res.body.geocoding.query.tokens_complete;
delete res.body.geocoding.query.tokens_incomplete;

// remove arrays produced by negative sources and layers handling (only intended to be used internally).
delete res.body.geocoding.query.negative_layers;
delete res.body.geocoding.query.positive_layers;
delete res.body.geocoding.query.negative_sources;
delete res.body.geocoding.query.positive_sources;

// OPTIONAL. Warnings and errors.
addMessages(req, 'warnings', res.body.geocoding);
addMessages(req, 'errors', res.body.geocoding);
Expand Down
16 changes: 11 additions & 5 deletions sanitizer/_address_layer_filter.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@ function _setup(tm) {
// error & warning messages
let messages = { errors: [], warnings: [] };

// no nothing if user has explicitely specified layers in the request
if (_.isArray(clean.layers) && !_.isEmpty(clean.layers)) {
// do nothing if user has explicitly specified positive layers in the request
if ( _.isArray(clean.positive_layers) && !_.isEmpty(clean.positive_layers) ) {
return messages;
}

// default to using the full 'clean.text'
// note: this should already have superfluous characters removed
let input = clean.text;

// no nothing if no input text specified in the request
// do nothing if no input text specified in the request
if (!nonEmptyString(input)) {
return messages;
}
Expand Down Expand Up @@ -86,9 +86,15 @@ function _setup(tm) {
// then it is safe to apply the layer filter
if (totalWords < 2 || !hasNumeral) {

// handle the common case where neither source nor layers were specified
// handle the common case where neither sources nor (positive) layers were specified
if (!_.isArray(clean.sources) || _.isEmpty(clean.sources)) {
clean.layers = tm.layers.filter(item => item !== 'address'); // exclude 'address'
// if there are no layers already set, start with the list of all of them
if (_.isEmpty(clean.layers)) {
clean.layers = tm.layers;
}

// filter the existing list of layers so it excludes 'address'
clean.layers = clean.layers.filter(item => item !== 'address');
messages.warnings.push(ADDRESS_FILTER_WARNING);
}

Expand Down
105 changes: 65 additions & 40 deletions sanitizer/_targets.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
const _ = require('lodash');

const INVALID_NEGATIVE_TARGETS_ERROR = 'contains positive and negative options in a combination ' +
'that results in an empty list. Please chose a different combination';

function getValidKeys(mapping) {
return _.uniq(Object.keys(mapping)).join(',');
}

function expandAliases(targets, targetMap) {
const expanded = targets.reduce((acc, target) => acc.concat(targetMap[target]), []);

// return deduplicated list using `Set`
return [...new Set(expanded)];
}

function _setup( paramName, targetMap ) {
const opts = {
paramName: paramName,
Expand All @@ -15,52 +25,68 @@ function _setup( paramName, targetMap ) {
// error & warning messages
var messages = { errors: [], warnings: [] };

// the string of targets (comma delimeted)
// the string of targets (comma delimited)
var targetsString = raw[opts.paramName];

// trim whitespace
if( _.isString( targetsString ) && !_.isEmpty( targetsString ) ){
targetsString = targetsString.trim();

// param must be a valid non-empty string
if( !_.isString( targetsString ) || _.isEmpty( targetsString ) ){
messages.errors.push(
opts.paramName + ' parameter cannot be an empty string. Valid options: ' + getValidKeys(opts.targetMap)
);
}
else {

// split string in to array and lowercase each target string
var targets = targetsString.split(',').map( function( target ){
return target.toLowerCase(); // lowercase inputs
});

// emit an error for each target *not* present in the targetMap
targets.filter( function( target ){
return !opts.targetMap.hasOwnProperty(target);
}).forEach( function( target ){
messages.errors.push(
'\'' + target + '\' is an invalid ' + opts.paramName + ' parameter. Valid options: ' + getValidKeys(opts.targetMap)
);
});

// only set types value when no error occured
if( !messages.errors.length ){
clean[opts.paramName] = targets.reduce(function(acc, target) {
return acc.concat(opts.targetMap[target]);
}, []);

// dedupe in case aliases expanded to common things or user typed in duplicates
clean[opts.paramName] = _.uniq(clean[opts.paramName]);
}
}
// input is not a string, parameter is not defined, can quit early
if (!_.isString(targetsString)) {
return messages;
}

// string is empty
else if( _.isString( targetsString ) ){
// remove all whitespace characters
targetsString = targetsString.replace(/\s/g,'');

// return error if parameter ends up being all whitespace
if( _.isEmpty( targetsString ) ){
messages.errors.push(
opts.paramName + ' parameter cannot be an empty string. Valid options: ' + getValidKeys(opts.targetMap)
);
return messages;
}

// split string in to array and lowercase each target string
var targets = targetsString.split(',').map( function( target ){
return target.toLowerCase(); // lowercase inputs
});

const positive_targets = targets.filter((t) => t[0] !== '-' );

const negative_targets = targets.filter((t) => t[0] === '-' )
.map((t) => t.slice(1)); // remove the leading '-' from the negative target so it can be validated easily

// emit an error for each target *not* present in the targetMap
positive_targets.concat(negative_targets).filter( target => !opts.targetMap.hasOwnProperty(target) ).forEach(( target ) =>{
messages.errors.push(
`\'${target}\' is an invalid ${opts.paramName} parameter. Valid options: ${getValidKeys(opts.targetMap)}`
);
});

// for calculating the final list of targets use either:
// - the list of positive targets, if there are any
// - otherwise, the list of all possible targets
const starting_positive_targets = positive_targets.length ?
positive_targets :
Object.keys(opts.targetMap);

// calculate the "effective" list of positive and negative targets, by expanding aliases
const effective_positive_targets = expandAliases(starting_positive_targets, opts.targetMap);
const effective_negative_targets = expandAliases(negative_targets, opts.targetMap);

// the final list of targets is the positive list, with the negative list excluded
const final_targets = effective_positive_targets.filter((t) => !effective_negative_targets.includes(t));

if (final_targets.length === 0) {
messages.errors.push(
`${opts.paramName} ${INVALID_NEGATIVE_TARGETS_ERROR}`
);
}

clean[`positive_${opts.paramName}`] = effective_positive_targets;
clean[`negative_${opts.paramName}`] = effective_negative_targets;

// only set final value when no error occurred
if( !messages.errors.length ){
clean[opts.paramName] = final_targets;
}

return messages;
Expand All @@ -69,5 +95,4 @@ function _setup( paramName, targetMap ) {
}; // end of object to be returned
} // end of _setup function


module.exports = _setup;
48 changes: 47 additions & 1 deletion test/unit/sanitizer/_address_layer_filter.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@ const STD_MESSAGES = { errors: [], warnings: ['performance optimization: excludi
module.exports.tests = {};

module.exports.tests.sanitize = function (test, common) {
// a simplified type mapping with dummy values
let tm = new TypeMapping();
tm.setLayersBySource({ A: ['A'], B: ['B'], C: ['C'] });
tm.generateMappings();
let s = sanitizer(tm);

// a real type mapping with our common sources and layers
const real_type_mapping = new TypeMapping();
real_type_mapping.load();
const real_sanitizer = sanitizer(real_type_mapping);


test('sanitize - do nothing if clean.layers already specified', (t) => {
let clean = { text: '1 example', layers: ['not empty'] };
let clean = { text: '1 example', layers: ['not empty'], positive_layers: ['not empty'] };
t.deepEqual(s.sanitize(null, clean), NO_MESSAGES);
t.deepEqual(clean.layers, ['not empty']);
t.end();
Expand Down Expand Up @@ -83,6 +90,45 @@ module.exports.tests.sanitize = function (test, common) {
t.false(clean.layers);
t.end();
});

test('sanitize - do nothing when address layer explicitly specified', (t) => {
let clean = { text: 'foo', layers: ['address', 'venue'], positive_layers: ['address', 'venue'] };

t.deepEqual(real_sanitizer.sanitize(null, clean), NO_MESSAGES);
t.deepEqual(clean.layers, ['address', 'venue']);
t.end();
});

test('sanitize - do nothing when source with only addresses (OA) specified', (t) => {
let clean = { text: 'foo', sources: ['openaddresses']};

t.deepEqual(real_sanitizer.sanitize(null, clean), NO_MESSAGES);
t.equal(clean.layers, undefined);
t.deepEqual(clean.sources, ['openaddresses']);
t.end();
});

test('sanitize - exclude layers when source with addresses and other layers (OSM) specified', (t) => {
let clean = { text: 'foo', sources: ['openstreetmap']};

t.deepEqual(real_sanitizer.sanitize(null, clean), STD_MESSAGES);
t.deepEqual(clean.layers, ['venue', 'street'], 'layer list is reduced to exclude addresses');
t.deepEqual(clean.sources, ['openstreetmap']);
t.end();
});

test('sanitize - exclude addresses when negative layers other than address are specified', (t) => {
// select all layers except venue to simulate value of clean.layers from targets sanitizer
const clean_layers = real_type_mapping.getCanonicalLayers().filter(layer => layer !== 'venue').sort();

let clean = { text: 'foo', layers: clean_layers, negative_layers: ['-venue'], positive_layers: []};

const expected_layers = clean_layers.filter(layer => layer !== 'address').sort();

t.deepEqual(real_sanitizer.sanitize(null, clean), STD_MESSAGES);
t.deepEqual(clean.layers.sort(), expected_layers, 'layer list is reduced to exclude addresses');
t.end();
});
};

module.exports.tests.tricky_inputs = function (test, common) {
Expand Down
111 changes: 111 additions & 0 deletions test/unit/sanitizer/_layers.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@ module.exports.tests.sanitize_layers = function(test, common) {
t.end();
});

test('invalid negative layer', function(t) {
const raw = { layers: '-test_layer' };
const clean = {};

var messages = sanitizer.sanitize(raw, clean);

const msg = ' is an invalid layers parameter. Valid options: ';
t.equal(messages.errors.length, 1, 'errors set');
t.true(messages.errors[0].match(msg), 'invalid layer message emitted');
t.true(messages.errors[0].match('test_layer'), 'invalid layer message contains layer');
t.end();
});

test('venue (alias) layer', function(t) {
var raw = { layers: 'venue' };
var clean = {};
Expand Down Expand Up @@ -124,6 +137,104 @@ module.exports.tests.sanitize_layers = function(test, common) {
t.deepEqual(clean.layers, expected_layers, 'all layers found (no duplicates)');
t.end();
});

test('positive and negative layers', function(t) {
var raw = { layers: 'venue,address,-venue' };
var clean = {};

sanitizer.sanitize(raw, clean);

var expected_layers = ['address'];
t.deepEqual(clean.layers, expected_layers, 'positive layers plus negative layer returns only selected positive layers');
t.end();
});

test('only negative layers', function(t) {
var raw = { layers: '-venue' };
var clean = {};

sanitizer.sanitize(raw, clean);

const expected_layers = type_mapping.getCanonicalLayers().filter(layer => layer !== 'venue').sort();

t.deepEqual(clean.layers.sort(), expected_layers, 'all layers except negative layer selected');
t.end();
});

test('only negative layers, duplicated', function(t) {
var raw = { layers: '-venue,-venue' };
var clean = {};

sanitizer.sanitize(raw, clean);

const expected_layers = type_mapping.getCanonicalLayers().filter(layer => layer !== 'venue').sort();

t.deepEqual(clean.layers.sort(), expected_layers, 'all layers except negative layer selected');
t.end();
});

test('only negative layers, with extra space character', function(t) {
var raw = { layers: '- venue' };
var clean = {};

sanitizer.sanitize(raw, clean);

const expected_layers = type_mapping.getCanonicalLayers().filter(layer => layer !== 'venue').sort();

t.deepEqual(clean.layers.sort(), expected_layers, 'all layers except negative layer selected');
t.end();
});

test('only negative layers, with extra unicode whitespace character', function(t) {
var raw = { layers: '-\uFEFFvenue' };
var clean = {};

sanitizer.sanitize(raw, clean);

const expected_layers = type_mapping.getCanonicalLayers().filter(layer => layer !== 'venue').sort();

t.deepEqual(clean.layers.sort(), expected_layers, 'all layers except negative layer selected');
t.end();
});

test('positive alias and negative layers', function(t) {
var raw = { layers: 'coarse,-locality' };
var clean = {};

sanitizer.sanitize(raw, clean);

// final list of layers should be all coarse layers, except locality
const expected_layers = type_mapping.layer_mapping.coarse.filter(layer => layer !== 'locality');

t.deepEqual(clean.layers, expected_layers, 'positive alias plus negative layer returns subset of alias');
t.deepEqual(clean.negative_layers, ['locality'], 'negative_layers value is set');
t.end();
});

test('negative alias and positive layer in that alias', function(t) {
var raw = { layers: '-coarse,locality' };
var clean = {};

const messages = sanitizer.sanitize(raw, clean);

// final list of layers should be all coarse layers, except locality
t.deepEqual(clean.layers, undefined , 'returns undefined, as negative layers are applied after');
t.equal(messages.errors.length, 1, 'error emitted: invalid combination');
t.end();
});

test('positive alias and negative layers, reverse order', function(t) {
var raw = { layers: '-locality,coarse' };
var clean = {};

sanitizer.sanitize(raw, clean);

// final list of layers should be all coarse layers, except locality
const expected_layers = type_mapping.layer_mapping.coarse.filter((layer) => layer !== 'locality');

t.deepEqual(clean.layers, expected_layers, 'positive alias plus negative layer returns subset of alias');
t.end();
});
};

module.exports.all = function (tape, common) {
Expand Down
Loading

0 comments on commit 07fc584

Please sign in to comment.