Skip to content

Commit

Permalink
add timeouts to:
Browse files Browse the repository at this point in the history
- await page.close() if not finished within 20s
- await crawler.pageFinished() if not finished within 60s (in case config is being written)
  • Loading branch information
ikreymer committed Sep 26, 2023
1 parent 165a978 commit f2951ee
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions util/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const MAX_REUSE = 5;

const NEW_WINDOW_TIMEOUT = 20;
const TEARDOWN_TIMEOUT = 10;
const FINISHED_TIMEOUT = 60;

// ===========================================================================
export function runWorkers(crawler, numWorkers, maxPageTime) {
Expand Down Expand Up @@ -81,7 +82,13 @@ export class PageWorker

try {
logger.debug("Closing page", {crashed: this.crashed, workerid: this.id}, "worker");
await this.page.close();
await timedRun(
this.page.close(),
TEARDOWN_TIMEOUT,
"Page Close Timed Out",
this.logDetails,
"worker"
);
} catch (e) {
// ignore
} finally {
Expand Down Expand Up @@ -203,7 +210,13 @@ export class PageWorker
logger.error("Worker Exception", {...errJSON(e), ...this.logDetails}, "worker");
}
} finally {
await this.crawler.pageFinished(data);
await timedRun(
this.crawler.pageFinished(data),
FINISHED_TIMEOUT,
"Page Finished Timed Out",
this.logDetails,
"worker"
);
}
}

Expand Down

0 comments on commit f2951ee

Please sign in to comment.