Skip to content

Commit

Permalink
feat(parser): remove URL regex caret anchor
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Dec 9, 2024
1 parent 4c99ce3 commit 7c1fc5a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
8 changes: 3 additions & 5 deletions stream/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,9 @@ function selectName( names ){
// filter out URLs
// then return the longest name
// @todo: can we improve this logic?
return names.filter( function ( name) {
return !name.match(/^http(s)?:\/\//);
}).reduce( function( a, b ){
return a.length > b.length ? a : b;
}, '');
return names
.filter(name => !/http(s)?:\/\//.test(name))
.reduce((a, b) => a.length > b.length ? a : b, '');
}

module.exports = parser;
21 changes: 21 additions & 0 deletions test/stream/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,27 @@ module.exports.tests.filter_url = function(test, common) {
stream.write(row);
stream.end();
});

// real-world example where the URL was included with a valid name
// (ie. was preceeded by a space rather than a NULL character).
test('parse: filter URL within name', (t) => {
const stream = parser(6);
const row = [
'i{s~{AqubwJ{TxV{BlDmBnCiGhJgCbCs@dAaCfHmAnCoBpB',
'Sentier des Chasupes',
'Mairie Bouxières http://www.mairie-bouxieres-aux-dames.fr/wp-content/uploads/2005/01/Les-sentiers-de-Bouxi%C3%A8res-aux-Dames.pdf',
].join('\0');
const expected = 'Sentier des Chasupes';

const assert = ( actual, enc, next ) => {
t.deepEqual( actual.properties.name, expected, 'longest non-URL name selected' );
next();
};

stream.pipe( through.obj( assert, () => t.end() ) );
stream.write(row);
stream.end();
});
};

module.exports.tests.filter_only_url = function(test, common) {
Expand Down

0 comments on commit 7c1fc5a

Please sign in to comment.