Skip to content

Commit

Permalink
seed parsing: return null if invalid url encountered in parseUrl to a…
Browse files Browse the repository at this point in the history
…void subsequent exception! (#349)

adjust error labels to differentiate invalid pages vs seeds
fixes webrecorder/browsertrix#1037
  • Loading branch information
ikreymer authored Aug 8, 2023
1 parent 69fc181 commit 6270571
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions util/seeds.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export class ScopedSeed
constructor({url, scopeType, include, exclude = [], allowHash = false, depth = -1, sitemap = false, extraHops = 0} = {}) {
const parsedUrl = this.parseUrl(url);
if (!parsedUrl) {
logger.fatal(`Invalid Seed "${url}" - not a valid URL`);
logger.fatal(`Invalid Seed "${url}" specified, aborting crawl.`);
}
this.url = parsedUrl.href;
this.include = this.parseRx(include);
Expand Down Expand Up @@ -44,11 +44,12 @@ export class ScopedSeed
try {
parsedUrl = new URL(url.trim());
} catch (e) {
logger.warn("Invalid Seed - not a valid URL", {url, ...logDetails});
logger.warn("Invalid Page - not a valid URL", {url, ...logDetails});
return null;
}

if (parsedUrl.protocol !== "http:" && parsedUrl.protocol != "https:") {
logger.warn("Invalid Seed - URL must start with http:// or https://", {url, ...logDetails});
logger.warn("Invalid Page - URL must start with http:// or https://", {url, ...logDetails});
parsedUrl = null;
}

Expand Down

0 comments on commit 6270571

Please sign in to comment.