From edbd1b644b490ca9208fbbf4d7cbdc69664762af Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 13 Aug 2023 14:17:20 -0700 Subject: [PATCH] args parsing: fix parseRx() for inclusions/exclusions to deal with non-string types (fixes #352) treat non-regexes as strings and pass to RegExp constructor tests: add additional scope parsing tests for different types passed in as exclusions update yargs bump to 0.10.4 --- package.json | 4 +-- tests/scopes.test.js | 73 ++++++++++++++++++++++++++++++++++++++++++++ util/seeds.js | 6 ++-- yarn.lock | 38 ++++++++--------------- 4 files changed, 90 insertions(+), 31 deletions(-) diff --git a/package.json b/package.json index 79e837acd..5c8316ea3 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "browsertrix-crawler", - "version": "0.10.3", + "version": "0.10.4", "main": "browsertrix-crawler", "type": "module", "repository": "https://github.com/webrecorder/browsertrix-crawler", @@ -23,7 +23,7 @@ "uuid": "8.3.2", "warcio": "^1.6.0", "ws": "^7.4.4", - "yargs": "^16.0.3" + "yargs": "^17.7.2" }, "devDependencies": { "eslint": "^7.20.0", diff --git a/tests/scopes.test.js b/tests/scopes.test.js index 6a2d0be77..0e6f0422b 100644 --- a/tests/scopes.test.js +++ b/tests/scopes.test.js @@ -49,6 +49,26 @@ exclude: https://example.com/pathexclude }); +test("default scope + exclude is numeric", async () => { + const seeds = getSeeds(` +seeds: + - https://example.com/ + +exclude: "2022" + +`); + + + expect(seeds.length).toEqual(1); + expect(seeds[0].scopeType).toEqual("prefix"); + expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\//]); + expect(seeds[0].exclude).toEqual([/2022/]); + +}); + + + + test("prefix scope global + exclude", async () => { const seeds = getSeeds(` seeds: @@ -271,3 +291,56 @@ exclude: }); + +test("with exclude non-string types", async () => { + const seeds = getSeeds(` +seeds: + - url: https://example.com/ + exclude: "2023" + + - url: https://example.com/ + exclude: 2023 + + - url: https://example.com/ + exclude: "0" + + - url: https://example.com/ + exclude: 0 + + - url: https://example.com/ + exclude: + + - url: https://example.com/ + exclude: "" + + - url: https://example.com/ + exclude: null + + - url: https://example.com/ + exclude: "null" + + - url: https://example.com/ + exclude: false + + - url: https://example.com/ + exclude: true +`); + + expect(seeds.length).toEqual(10); + for (let i = 0; i < 10; i++) { + expect(seeds[i].scopeType).toEqual("prefix"); + expect(seeds[i].include).toEqual([/^https?:\/\/example\.com\//]); + } + + expect(seeds[0].exclude).toEqual([/2023/]); + expect(seeds[1].exclude).toEqual([/2023/]); + expect(seeds[2].exclude).toEqual([/0/]); + expect(seeds[3].exclude).toEqual([/0/]); + expect(seeds[4].exclude).toEqual([]); + expect(seeds[5].exclude).toEqual([]); + expect(seeds[6].exclude).toEqual([]); + expect(seeds[7].exclude).toEqual([/null/]); + expect(seeds[8].exclude).toEqual([/false/]); + expect(seeds[9].exclude).toEqual([/true/]); + +}); diff --git a/util/seeds.js b/util/seeds.js index c45f51ff3..081b9c3e8 100644 --- a/util/seeds.js +++ b/util/seeds.js @@ -30,12 +30,12 @@ export class ScopedSeed } parseRx(value) { - if (!value) { + if (value === null || value === undefined || value === "") { return []; - } else if (typeof(value) === "string") { + } else if (!(value instanceof Array)) { return [new RegExp(value)]; } else { - return value.map(e => typeof(e) === "string" ? new RegExp(e) : e); + return value.map(e => (e instanceof RegExp) ? e : new RegExp(e)); } } diff --git a/yarn.lock b/yarn.lock index 6423dba64..5b889f341 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1294,15 +1294,6 @@ cliui@^6.0.0: strip-ansi "^6.0.0" wrap-ansi "^6.2.0" -cliui@^7.0.2: - version "7.0.4" - resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.4.tgz#a0265ee655476fc807aea9df3df8df7783808b4f" - integrity sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ== - dependencies: - string-width "^4.2.0" - strip-ansi "^6.0.0" - wrap-ansi "^7.0.0" - cliui@^8.0.1: version "8.0.1" resolved "https://registry.yarnpkg.com/cliui/-/cliui-8.0.1.tgz#0c04b075db02cbfe60dc8e6cf2f5486b1a3608aa" @@ -4592,11 +4583,6 @@ yargs-parser@^18.1.2: camelcase "^5.0.0" decamelize "^1.2.0" -yargs-parser@^20.2.2: - version "20.2.9" - resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee" - integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w== - yargs-parser@^21.0.0, yargs-parser@^21.1.1: version "21.1.1" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35" @@ -4632,23 +4618,23 @@ yargs@^15.3.1: y18n "^4.0.0" yargs-parser "^18.1.2" -yargs@^16.0.3: - version "16.2.0" - resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.2.0.tgz#1c82bf0f6b6a66eafce7ef30e376f49a12477f66" - integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw== +yargs@^17.3.1: + version "17.6.0" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.6.0.tgz#e134900fc1f218bc230192bdec06a0a5f973e46c" + integrity sha512-8H/wTDqlSwoSnScvV2N/JHfLWOKuh5MVla9hqLjK3nsfyy6Y4kDSYSvkU5YCUEPOSnRXfIyx3Sq+B/IWudTo4g== dependencies: - cliui "^7.0.2" + cliui "^8.0.1" escalade "^3.1.1" get-caller-file "^2.0.5" require-directory "^2.1.1" - string-width "^4.2.0" + string-width "^4.2.3" y18n "^5.0.5" - yargs-parser "^20.2.2" + yargs-parser "^21.0.0" -yargs@^17.3.1: - version "17.6.0" - resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.6.0.tgz#e134900fc1f218bc230192bdec06a0a5f973e46c" - integrity sha512-8H/wTDqlSwoSnScvV2N/JHfLWOKuh5MVla9hqLjK3nsfyy6Y4kDSYSvkU5YCUEPOSnRXfIyx3Sq+B/IWudTo4g== +yargs@^17.7.2: + version "17.7.2" + resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269" + integrity sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w== dependencies: cliui "^8.0.1" escalade "^3.1.1" @@ -4656,7 +4642,7 @@ yargs@^17.3.1: require-directory "^2.1.1" string-width "^4.2.3" y18n "^5.0.5" - yargs-parser "^21.0.0" + yargs-parser "^21.1.1" yauzl@^2.10.0: version "2.10.0"