From 102791f9556c4241b5fe4f92d526d6535520ff51 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 5 Sep 2023 20:18:48 -0400 Subject: [PATCH] misc exit features: - if interrupted (via signal or due to limits) and not finished, return error code 11 to indicate interruption - allow stopping single instances with hset ':stopone' uid (similar to status) - deliberate stop via redis not considered interruption (exit 0) --- crawler.js | 10 +++++++--- util/state.js | 10 +++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/crawler.js b/crawler.js index 0b9d8af84..03c0fa451 100644 --- a/crawler.js +++ b/crawler.js @@ -163,7 +163,7 @@ export class Crawler { } catch (e) { //logger.fatal("Unable to connect to state store Redis: " + redisUrl); logger.warn(`Waiting for redis at ${redisUrl}`, {}, "state"); - await sleep(3); + await sleep(1); } } @@ -304,12 +304,16 @@ export class Crawler { async run() { await this.bootstrap(); - let status; + let status = "done"; let exitCode = 0; try { await this.crawl(); - status = (!this.interrupted ? "done" : "interrupted"); + const finished = await this.crawlState.isFinished(); + if (this.interrupted && !finished) { + status = "interrupted"; + exitCode = 11; + } } catch(e) { logger.error("Crawl failed", e); exitCode = 9; diff --git a/util/state.js b/util/state.js index 5c33871ca..0cfe45471 100644 --- a/util/state.js +++ b/util/state.js @@ -207,7 +207,15 @@ return 0; } async isCrawlStopped() { - return await this.redis.get(`${this.key}:stopping`) === "1"; + if (await this.redis.get(`${this.key}:stopping`) === "1") { + return true; + } + + if (await this.redis.hget(`${this.key}:stopone`, this.uid) === "1") { + return true; + } + + return false; } // note: not currently called in crawler, but could be