From ce57c15add912a8247fbaabfafb066741c9127ca Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 9 Dec 2024 14:40:17 +0100 Subject: [PATCH] feat(parser): remove URLs from names (ie. try to save them) --- stream/parser.js | 9 ++++----- test/stream/parser.js | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/stream/parser.js b/stream/parser.js index fbda09f..c7f7399 100644 --- a/stream/parser.js +++ b/stream/parser.js @@ -53,12 +53,11 @@ function parser( precision ){ // each connected road can have one or more names // we select one name to be the default. function selectName( names ){ - // filter out URLs - // then return the longest name - // @todo: can we improve this logic? + // remove URLs then return the longest name return names - .filter(name => !/http(s)?:\/\//.test(name)) - .reduce((a, b) => a.length > b.length ? a : b, ''); + .map(name => name.replace(/(?:https?|ftp):\/\/\S*/g, '').trim()) + .sort((a, b) => b.length - a.length) + .at(0); } module.exports = parser; diff --git a/test/stream/parser.js b/test/stream/parser.js index 1345409..a707d0a 100644 --- a/test/stream/parser.js +++ b/test/stream/parser.js @@ -132,6 +132,26 @@ module.exports.tests.filter_url = function(test, common) { stream.write(row); stream.end(); }); + + test('parse: URL removal', (t) => { + const stream = parser(6); + const row = [ + 'i{s~{AqubwJ{TxV{BlDmBnCiGhJgCbCs@dAaCfHmAnCoBpB', + 'http://foo.com/bar.pdf', + 'Short Example https://foo.com/bar.pdf', + 'Longer Example ftp://foo.com/bar.pdf', + ].join('\0'); + const expected = 'Longer Example'; + + const assert = ( actual, enc, next ) => { + t.deepEqual( actual.properties.name, expected, 'longest non-URL name selected' ); + next(); + }; + + stream.pipe( through.obj( assert, () => t.end() ) ); + stream.write(row); + stream.end(); + }); }; module.exports.tests.filter_only_url = function(test, common) {