Skip to content

Commit

Permalink
logging: resolve confusion with 'crawl done' not being written to log…
Browse files Browse the repository at this point in the history
…, because the log is itself stored in the WACZ: (fixes #365)

- keep log file open until end, even if its being written to WACZ
- add logging of 'crawling done' when crawling is done (writing to WACZ or not)
- add debug logging of 'end of log file' to indicate log file is being added to WACZ and nothing else will be added there in the WACZ.
  • Loading branch information
ikreymer committed Sep 12, 2023
1 parent d42010a commit 664d0b7
Showing 1 changed file with 4 additions and 12 deletions.
16 changes: 4 additions & 12 deletions crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ export class Crawler {
}

} finally {
logger.info(`Crawl status: ${status}`);
logger.info(`Final crawl status: ${status}`);

if (this.crawlState) {
await this.crawlState.setStatus(status);
Expand Down Expand Up @@ -830,7 +830,7 @@ self.__bx_behaviors.selectMainBehavior();
}
}

await this.closeLog();
logger.info("Crawling done");

if (this.params.generateWACZ && (!this.interrupted || this.finalExit || this.uploadAndDeleteLocal)) {
const uploaded = await this.generateWACZ();
Expand All @@ -855,16 +855,6 @@ self.__bx_behaviors.selectMainBehavior();
}
}

async closeLog() {
// close file-based log
logger.setExternalLogStream(null);
try {
await new Promise(resolve => this.logFH.close(() => resolve()));
} catch (e) {
// ignore
}
}

async generateWACZ() {
logger.info("Generating WACZ");
await this.crawlState.setStatus("generate-wacz");
Expand All @@ -890,6 +880,8 @@ self.__bx_behaviors.selectMainBehavior();
logger.fatal("No WARC Files, assuming crawl failed");
}

logger.debug("End of log file, storing logs in WACZ");

// Build the argument list to pass to the wacz create command
const waczFilename = this.params.collection.concat(".wacz");
const waczPath = path.join(this.collDir, waczFilename);
Expand Down

0 comments on commit 664d0b7

Please sign in to comment.