Skip to content

Commit

Permalink
args parsing: fix parseRx() for inclusions/exclusions to deal with no…
Browse files Browse the repository at this point in the history
…n-string types (fixes #352) (#353)

treat non-regexes as strings and pass to RegExp constructor
tests: add additional scope parsing tests for different types passed in as exclusions
update yargs
bump to 0.10.4
  • Loading branch information
ikreymer authored Aug 13, 2023
1 parent 16751de commit 5ba6c33
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 31 deletions.
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "browsertrix-crawler",
"version": "0.10.3",
"version": "0.10.4",
"main": "browsertrix-crawler",
"type": "module",
"repository": "https://github.com/webrecorder/browsertrix-crawler",
Expand All @@ -23,7 +23,7 @@
"uuid": "8.3.2",
"warcio": "^1.6.0",
"ws": "^7.4.4",
"yargs": "^16.0.3"
"yargs": "^17.7.2"
},
"devDependencies": {
"eslint": "^7.20.0",
Expand Down
73 changes: 73 additions & 0 deletions tests/scopes.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@ exclude: https://example.com/pathexclude
});


test("default scope + exclude is numeric", async () => {
const seeds = getSeeds(`
seeds:
- https://example.com/
exclude: "2022"
`);


expect(seeds.length).toEqual(1);
expect(seeds[0].scopeType).toEqual("prefix");
expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\//]);
expect(seeds[0].exclude).toEqual([/2022/]);

});




test("prefix scope global + exclude", async () => {
const seeds = getSeeds(`
seeds:
Expand Down Expand Up @@ -271,3 +291,56 @@ exclude:

});


test("with exclude non-string types", async () => {
const seeds = getSeeds(`
seeds:
- url: https://example.com/
exclude: "2023"
- url: https://example.com/
exclude: 2023
- url: https://example.com/
exclude: "0"
- url: https://example.com/
exclude: 0
- url: https://example.com/
exclude:
- url: https://example.com/
exclude: ""
- url: https://example.com/
exclude: null
- url: https://example.com/
exclude: "null"
- url: https://example.com/
exclude: false
- url: https://example.com/
exclude: true
`);

expect(seeds.length).toEqual(10);
for (let i = 0; i < 10; i++) {
expect(seeds[i].scopeType).toEqual("prefix");
expect(seeds[i].include).toEqual([/^https?:\/\/example\.com\//]);
}

expect(seeds[0].exclude).toEqual([/2023/]);
expect(seeds[1].exclude).toEqual([/2023/]);
expect(seeds[2].exclude).toEqual([/0/]);
expect(seeds[3].exclude).toEqual([/0/]);
expect(seeds[4].exclude).toEqual([]);
expect(seeds[5].exclude).toEqual([]);
expect(seeds[6].exclude).toEqual([]);
expect(seeds[7].exclude).toEqual([/null/]);
expect(seeds[8].exclude).toEqual([/false/]);
expect(seeds[9].exclude).toEqual([/true/]);

});
6 changes: 3 additions & 3 deletions util/seeds.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ export class ScopedSeed
}

parseRx(value) {
if (!value) {
if (value === null || value === undefined || value === "") {
return [];
} else if (typeof(value) === "string") {
} else if (!(value instanceof Array)) {
return [new RegExp(value)];
} else {
return value.map(e => typeof(e) === "string" ? new RegExp(e) : e);
return value.map(e => (e instanceof RegExp) ? e : new RegExp(e));
}
}

Expand Down
38 changes: 12 additions & 26 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1294,15 +1294,6 @@ cliui@^6.0.0:
strip-ansi "^6.0.0"
wrap-ansi "^6.2.0"

cliui@^7.0.2:
version "7.0.4"
resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.4.tgz#a0265ee655476fc807aea9df3df8df7783808b4f"
integrity sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==
dependencies:
string-width "^4.2.0"
strip-ansi "^6.0.0"
wrap-ansi "^7.0.0"

cliui@^8.0.1:
version "8.0.1"
resolved "https://registry.yarnpkg.com/cliui/-/cliui-8.0.1.tgz#0c04b075db02cbfe60dc8e6cf2f5486b1a3608aa"
Expand Down Expand Up @@ -4592,11 +4583,6 @@ yargs-parser@^18.1.2:
camelcase "^5.0.0"
decamelize "^1.2.0"

yargs-parser@^20.2.2:
version "20.2.9"
resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee"
integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==

yargs-parser@^21.0.0, yargs-parser@^21.1.1:
version "21.1.1"
resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35"
Expand Down Expand Up @@ -4632,31 +4618,31 @@ yargs@^15.3.1:
y18n "^4.0.0"
yargs-parser "^18.1.2"

yargs@^16.0.3:
version "16.2.0"
resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.2.0.tgz#1c82bf0f6b6a66eafce7ef30e376f49a12477f66"
integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==
yargs@^17.3.1:
version "17.6.0"
resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.6.0.tgz#e134900fc1f218bc230192bdec06a0a5f973e46c"
integrity sha512-8H/wTDqlSwoSnScvV2N/JHfLWOKuh5MVla9hqLjK3nsfyy6Y4kDSYSvkU5YCUEPOSnRXfIyx3Sq+B/IWudTo4g==
dependencies:
cliui "^7.0.2"
cliui "^8.0.1"
escalade "^3.1.1"
get-caller-file "^2.0.5"
require-directory "^2.1.1"
string-width "^4.2.0"
string-width "^4.2.3"
y18n "^5.0.5"
yargs-parser "^20.2.2"
yargs-parser "^21.0.0"

yargs@^17.3.1:
version "17.6.0"
resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.6.0.tgz#e134900fc1f218bc230192bdec06a0a5f973e46c"
integrity sha512-8H/wTDqlSwoSnScvV2N/JHfLWOKuh5MVla9hqLjK3nsfyy6Y4kDSYSvkU5YCUEPOSnRXfIyx3Sq+B/IWudTo4g==
yargs@^17.7.2:
version "17.7.2"
resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269"
integrity sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==
dependencies:
cliui "^8.0.1"
escalade "^3.1.1"
get-caller-file "^2.0.5"
require-directory "^2.1.1"
string-width "^4.2.3"
y18n "^5.0.5"
yargs-parser "^21.0.0"
yargs-parser "^21.1.1"

yauzl@^2.10.0:
version "2.10.0"
Expand Down

0 comments on commit 5ba6c33

Please sign in to comment.