From 44de6cf7429e98122819a3338a3d6d6d7e588a19 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 13 Aug 2024 15:55:30 -0400 Subject: [PATCH 1/7] Add logDirectory option to copy logs to WACZ --- bin/cli.js | 4 ++++ constants.js | 12 ++++++++++++ fixtures/logs/log.txt | 20 ++++++++++++++++++++ index.js | 38 ++++++++++++++++++++++++++++++++++++++ index.test.js | 15 +++++++++++++-- 5 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 fixtures/logs/log.txt diff --git a/bin/cli.js b/bin/cli.js index 1b3caaf..3d418f6 100755 --- a/bin/cli.js +++ b/bin/cli.js @@ -61,6 +61,9 @@ program.command('create') 'If not provided, js-wacz will reindex from WARCS. Must be used in combination ' + 'with --pages, since using this option will skip the step required to generate a ' + 'pages.jsonl file.') + .option( + '-l --log-directory ', + 'Path to a directory of log files to copy into WACZ.') .action(async (name, options, command) => { /** @type {Object} */ const values = options._optionValues @@ -113,6 +116,7 @@ program.command('create') signingToken: values?.signingToken, pages: values?.pages, cdxj: values?.cdxj, + logDirectory: values?.logDirectory, log }) } catch (err) { diff --git a/constants.js b/constants.js index ddd4fff..b043b77 100644 --- a/constants.js +++ b/constants.js @@ -40,6 +40,18 @@ export const EXTRA_PAGES_FIXTURE_PATH = `${PAGES_DIR_FIXTURES_PATH}extraPages.js */ export const CDXJ_DIR_FIXTURES_PATH = `${FIXTURES_PATH}cdxj${sep}` +/** + * Path to the fixtures folder log directory sub-directory. + * @constant + */ +export const LOG_DIR_FIXTURES_PATH = `${FIXTURES_PATH}logs${sep}` + +/** + * Path to the log.txt fixture + * @constant + */ +export const LOG_FILE_FIXTURE_PATH = `${LOG_DIR_FIXTURES_PATH}log.txt` + /** * Colors scheme for log level. * @constant diff --git a/fixtures/logs/log.txt b/fixtures/logs/log.txt new file mode 100644 index 0000000..2717cc6 --- /dev/null +++ b/fixtures/logs/log.txt @@ -0,0 +1,20 @@ +{"timestamp":"2024-08-13T19:53:20.782Z","logLevel":"info","context":"general","message":"Browsertrix-Crawler 1.2.6 (with warcio.js 2.2.1)","details":{}} +{"timestamp":"2024-08-13T19:53:20.784Z","logLevel":"info","context":"general","message":"Seeds","details":[{"url":"https://webrecorder.net/","scopeType":"prefix","include":["/^https?:\\/\\/webrecorder\\.net\\//"],"exclude":[],"allowHash":false,"depth":-1,"sitemap":null,"auth":null,"_authEncoded":null,"maxExtraHops":0,"maxDepth":1000000}]} +{"timestamp":"2024-08-13T19:53:20.785Z","logLevel":"info","context":"general","message":"Behavior Options","details":{"message":"{\"autoplay\":true,\"autofetch\":true,\"autoscroll\":true,\"siteSpecific\":true,\"log\":\"__bx_log\",\"startEarly\":true}"}} +{"timestamp":"2024-08-13T19:53:22.417Z","logLevel":"info","context":"worker","message":"Creating 1 workers","details":{}} +{"timestamp":"2024-08-13T19:53:22.419Z","logLevel":"info","context":"worker","message":"Worker starting","details":{"workerid":0}} +{"timestamp":"2024-08-13T19:53:22.654Z","logLevel":"info","context":"worker","message":"Starting page","details":{"workerid":0,"page":"https://webrecorder.net/"}} +{"timestamp":"2024-08-13T19:53:22.655Z","logLevel":"info","context":"crawlStatus","message":"Crawl statistics","details":{"crawled":0,"total":1,"pending":1,"failed":0,"limit":{"max":1,"hit":false},"pendingPages":["{\"seedId\":0,\"started\":\"2024-08-13T19:53:22.423Z\",\"extraHops\":0,\"url\":\"https:\\/\\/webrecorder.net\\/\",\"added\":\"2024-08-13T19:53:21.426Z\",\"depth\":0}"]}} +{"timestamp":"2024-08-13T19:53:22.928Z","logLevel":"info","context":"general","message":"Awaiting page load","details":{"page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:25.176Z","logLevel":"warn","context":"general","message":"Invalid Page - URL must start with http:// or https://","details":{"url":"mailto:info@webrecorder.net","page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:25.190Z","logLevel":"info","context":"behavior","message":"Running behaviors","details":{"frames":1,"frameUrls":["https://webrecorder.net/"],"page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:25.191Z","logLevel":"info","context":"behavior","message":"Run Script Started","details":{"frameUrl":"https://webrecorder.net/","page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:25.200Z","logLevel":"info","context":"behaviorScript","message":"Behavior log","details":{"state":{"segments":1},"msg":"Skipping autoscroll, page seems to not be responsive to scrolling events","page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:25.200Z","logLevel":"info","context":"behaviorScript","message":"Behavior log","details":{"state":{"segments":1},"msg":"done!","page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:25.702Z","logLevel":"info","context":"behavior","message":"Run Script Finished","details":{"frameUrl":"https://webrecorder.net/","page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:25.703Z","logLevel":"info","context":"behavior","message":"Behaviors finished","details":{"finished":1,"page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:26.716Z","logLevel":"info","context":"pageStatus","message":"Page Finished","details":{"loadState":4,"page":"https://webrecorder.net/","workerid":0}} +{"timestamp":"2024-08-13T19:53:26.734Z","logLevel":"info","context":"worker","message":"Worker done, all tasks complete","details":{"workerid":0}} +{"timestamp":"2024-08-13T19:53:26.829Z","logLevel":"info","context":"crawlStatus","message":"Crawl statistics","details":{"crawled":1,"total":1,"pending":0,"failed":0,"limit":{"max":1,"hit":true},"pendingPages":[]}} +{"timestamp":"2024-08-13T19:53:26.830Z","logLevel":"info","context":"general","message":"Crawling done","details":{}} +{"timestamp":"2024-08-13T19:53:26.831Z","logLevel":"info","context":"general","message":"Exiting, Crawl status: done","details":{}} diff --git a/index.js b/index.js index fbf80fe..2bd86b3 100644 --- a/index.js +++ b/index.js @@ -191,6 +191,12 @@ export class WACZ { */ cdxjDir = null + /** + * Path to directory of log files to copy into WACZ. + * @type {?string} + */ + logDirectory = null + /** * @param {WACZOptions} options - See {@link WACZOptions} for details. */ @@ -340,6 +346,10 @@ export class WACZ { } } + if (options?.logDirectory) { + this.logDirectory = String(options?.logDirectory).trim() + } + if (options?.signingToken && this.signingUrl) { this.signingToken = String(options.signingToken) } @@ -398,6 +408,11 @@ export class WACZ { info('Writing WARCs to WACZ') await this.writeWARCsToZip() + if (this.logDirectory) { + info('Writing logs to WACZ') + await this.writeLogsToZip() + } + info('Writing datapackage.json to WACZ') await this.writeDatapackageToZip() @@ -727,6 +742,29 @@ export class WACZ { } } + /** + * Streams all the files listed in `this.logDirectory` to the output ZIP. + * @returns {Promise} + */ + writeLogsToZip = async () => { + this.stateCheck() + + const { logDirectory, addFileToZip, log } = this + + const logFiles = await fs.readdir(logDirectory) + + for (const logFile of logFiles) { + const logFilepath = resolve(this.logDirectory, logFile) + + try { + await addFileToZip(logFilepath, `logs/${logFile}`) + } catch (err) { + log.trace(err) + throw new Error(`An error occurred while writing "${logFile}" to ZIP.`) + } + } + } + /** * Creates `datapackage.json` out of `this.resources` and writes it to ZIP. * @returns {Promise} diff --git a/index.test.js b/index.test.js index 42a5bc3..98aae7e 100644 --- a/index.test.js +++ b/index.test.js @@ -11,7 +11,7 @@ import StreamZip from 'node-stream-zip' import * as dotenv from 'dotenv' import { WACZ } from './index.js' -import { FIXTURES_PATH, PAGES_DIR_FIXTURES_PATH, PAGES_FIXTURE_PATH, EXTRA_PAGES_FIXTURE_PATH, CDXJ_DIR_FIXTURES_PATH } from './constants.js' +import { FIXTURES_PATH, PAGES_DIR_FIXTURES_PATH, PAGES_FIXTURE_PATH, EXTRA_PAGES_FIXTURE_PATH, LOG_DIR_FIXTURES_PATH, LOG_FILE_FIXTURE_PATH, CDXJ_DIR_FIXTURES_PATH } from './constants.js' import { assertSHA256WithPrefix, assertValidWACZSignatureFormat } from './utils/assertions.js' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import // Loads env vars from .env if provided @@ -187,6 +187,11 @@ test('WACZ constructor accounts for options.datapackageExtras if provided.', asy assert.equal(archive.datapackageExtras, datapackageExtras) }) +test('WACZ constructor accounts for options.logDirectory if valid.', async (_t) => { + const archive = new WACZ({ input: FIXTURE_INPUT, logDirectory: LOG_DIR_FIXTURES_PATH }) + assert.equal(archive.logDirectory, LOG_DIR_FIXTURES_PATH) +}) + test('addPage adds entry to pagesTree and turns detectPages off.', async (_t) => { const archive = new WACZ({ input: FIXTURE_INPUT }) assert.equal(archive.detectPages, true) @@ -347,7 +352,8 @@ test('WACZ.process with pagesDir option creates valid WACZ with provided pages f url: 'https://lil.law.harvard.edu', title: 'WACZ Title', description: 'WACZ Description', - pages: PAGES_DIR_FIXTURES_PATH + pages: PAGES_DIR_FIXTURES_PATH, + logDirectory: LOG_DIR_FIXTURES_PATH } const archive = new WACZ(options) @@ -373,6 +379,11 @@ test('WACZ.process with pagesDir option creates valid WACZ with provided pages f const extraPagesFixtureHash = await archive.sha256(EXTRA_PAGES_FIXTURE_PATH) assert.equal(datapackageExtraPages.hash, extraPagesFixtureHash) + // log file provided in logDirectory option should have same hash as fixture + const datapackageLogFile = datapackage.resources.filter(entry => entry.path === 'logs/log.txt')[0] + const logFileFixtureHash = await archive.sha256(LOG_FILE_FIXTURE_PATH) + assert.equal(datapackageLogFile.hash, logFileFixtureHash) + // Delete temp file await fs.unlink(options.output) }) From 879a7858bbcbd30c0ce137fcce9ce92cabc937cb Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 16 Aug 2024 07:30:31 -0400 Subject: [PATCH 2/7] Rename logDirectory to logDir in WACZ class --- index.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/index.js b/index.js index 2bd86b3..f30dcc5 100644 --- a/index.js +++ b/index.js @@ -195,7 +195,7 @@ export class WACZ { * Path to directory of log files to copy into WACZ. * @type {?string} */ - logDirectory = null + logDir = null /** * @param {WACZOptions} options - See {@link WACZOptions} for details. @@ -347,7 +347,7 @@ export class WACZ { } if (options?.logDirectory) { - this.logDirectory = String(options?.logDirectory).trim() + this.logDir = String(options?.logDirectory).trim() } if (options?.signingToken && this.signingUrl) { @@ -408,7 +408,7 @@ export class WACZ { info('Writing WARCs to WACZ') await this.writeWARCsToZip() - if (this.logDirectory) { + if (this.logDir) { info('Writing logs to WACZ') await this.writeLogsToZip() } @@ -743,18 +743,18 @@ export class WACZ { } /** - * Streams all the files listed in `this.logDirectory` to the output ZIP. + * Streams all the files listed in `this.logDir` to the output ZIP. * @returns {Promise} */ writeLogsToZip = async () => { this.stateCheck() - const { logDirectory, addFileToZip, log } = this + const { logDir, addFileToZip, log } = this - const logFiles = await fs.readdir(logDirectory) + const logFiles = await fs.readdir(logDir) for (const logFile of logFiles) { - const logFilepath = resolve(this.logDirectory, logFile) + const logFilepath = resolve(this.logDir, logFile) try { await addFileToZip(logFilepath, `logs/${logFile}`) From 05451a7c01ae99f25dc8426b540dfafa03249268 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 16 Aug 2024 07:34:12 -0400 Subject: [PATCH 3/7] Only copy log files with .txt or .log extensions --- index.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/index.js b/index.js index f30dcc5..ab1e88b 100644 --- a/index.js +++ b/index.js @@ -751,11 +751,19 @@ export class WACZ { const { logDir, addFileToZip, log } = this + const allowedExts = ['log', 'txt'] + const logFiles = await fs.readdir(logDir) for (const logFile of logFiles) { const logFilepath = resolve(this.logDir, logFile) + const ext = logFilepath.toLowerCase().split('.').pop() + if (!allowedExts.includes(ext)) { + log.warn(`Skipping log file ${logFile}, not in allowed extensions (txt, log).`) + continue + } + try { await addFileToZip(logFilepath, `logs/${logFile}`) } catch (err) { From ba6644e6f0418b5b401f76a1338135167a44e837 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 16 Aug 2024 07:38:23 -0400 Subject: [PATCH 4/7] Update tests --- constants.js | 2 +- fixtures/logs/invalid.md | 3 +++ fixtures/logs/{log.txt => sample.log} | 0 index.test.js | 5 +++-- 4 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 fixtures/logs/invalid.md rename fixtures/logs/{log.txt => sample.log} (100%) diff --git a/constants.js b/constants.js index b043b77..fb58e82 100644 --- a/constants.js +++ b/constants.js @@ -50,7 +50,7 @@ export const LOG_DIR_FIXTURES_PATH = `${FIXTURES_PATH}logs${sep}` * Path to the log.txt fixture * @constant */ -export const LOG_FILE_FIXTURE_PATH = `${LOG_DIR_FIXTURES_PATH}log.txt` +export const LOG_FILE_FIXTURE_PATH = `${LOG_DIR_FIXTURES_PATH}sample.log` /** * Colors scheme for log level. diff --git a/fixtures/logs/invalid.md b/fixtures/logs/invalid.md new file mode 100644 index 0000000..83d666d --- /dev/null +++ b/fixtures/logs/invalid.md @@ -0,0 +1,3 @@ +# Markdown file + +This shouldn't be copied into the WACZ due to file extension. diff --git a/fixtures/logs/log.txt b/fixtures/logs/sample.log similarity index 100% rename from fixtures/logs/log.txt rename to fixtures/logs/sample.log diff --git a/index.test.js b/index.test.js index 98aae7e..50fa830 100644 --- a/index.test.js +++ b/index.test.js @@ -362,10 +362,11 @@ test('WACZ.process with pagesDir option creates valid WACZ with provided pages f const zip = new StreamZip.async({ file: options.output }) // eslint-disable-line - // File in pages fixture directory that are invalid JSONL or have wrong extension + // Files in fixtures directories that are invalid JSONL or have wrong extensions // should not be copied into the WACZ. assert.rejects(async () => await zip.entryData('pages/invalid.jsonl')) assert.rejects(async () => await zip.entryData('pages/invalid.txt')) + assert.rejects(async () => await zip.entryData('logs/invalid.md')) // pages/pages.jsonl and pages/extraPages.jsonl should have same hash as fixtures // they were copied from. @@ -380,7 +381,7 @@ test('WACZ.process with pagesDir option creates valid WACZ with provided pages f assert.equal(datapackageExtraPages.hash, extraPagesFixtureHash) // log file provided in logDirectory option should have same hash as fixture - const datapackageLogFile = datapackage.resources.filter(entry => entry.path === 'logs/log.txt')[0] + const datapackageLogFile = datapackage.resources.filter(entry => entry.path === 'logs/sample.log')[0] const logFileFixtureHash = await archive.sha256(LOG_FILE_FIXTURE_PATH) assert.equal(datapackageLogFile.hash, logFileFixtureHash) From 6b8c919546c96be911dd4362d189bc6ce58a2620 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 16 Aug 2024 07:41:04 -0400 Subject: [PATCH 5/7] Add logDirectory to types --- types.js | 1 + 1 file changed, 1 insertion(+) diff --git a/types.js b/types.js index 7c025ec..242f790 100644 --- a/types.js +++ b/types.js @@ -14,6 +14,7 @@ * @property {?Object} datapackageExtras - If set, will be appended to datapackage.json under `extras`. * @property {?string} cdxj - If set, skips indexing and allows for passing CDXJ files "as is". Path to a folder containing CDXJ files. * @property {?string} pages - If set, allows for passing pre-set pages.jsonl file(s). Path to a folder containing pages files (pages.jsonl, extraPages.jsonl ...). Must be used in combination with `pages`, since using this option will skip the step required to generate a pages.jsonl file. + * @property {?string} logDirectory - If set, allows for passing existing crawler log files into the WACZ. Path to a folder containing log files. Only files with log or txt extensions in that folder will be copied. * @property {?any} log - Will be used instead of the Console API for logging, if compatible (i.e: loglevel). Defaults to globalThis.console. */ From 422d799f7e0253bbb98022d2989a459c9ab9384e Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 16 Aug 2024 07:43:27 -0400 Subject: [PATCH 6/7] Fix test --- index.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.test.js b/index.test.js index 50fa830..09f1772 100644 --- a/index.test.js +++ b/index.test.js @@ -189,7 +189,7 @@ test('WACZ constructor accounts for options.datapackageExtras if provided.', asy test('WACZ constructor accounts for options.logDirectory if valid.', async (_t) => { const archive = new WACZ({ input: FIXTURE_INPUT, logDirectory: LOG_DIR_FIXTURES_PATH }) - assert.equal(archive.logDirectory, LOG_DIR_FIXTURES_PATH) + assert.equal(archive.logDir, LOG_DIR_FIXTURES_PATH) }) test('addPage adds entry to pagesTree and turns detectPages off.', async (_t) => { From 3ae73b03d3f73f5e49c6b171f813574682ca3d53 Mon Sep 17 00:00:00 2001 From: Matteo Cargnelutti Date: Mon, 19 Aug 2024 11:40:18 -0400 Subject: [PATCH 7/7] Updating conventions for "dir" options This is a breaking change in the case of `pagesDir` --- bin/cli.js | 6 +++--- index.js | 12 ++++++------ index.test.js | 18 +++++++++--------- types.js | 6 +++--- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/bin/cli.js b/bin/cli.js index 3d418f6..3608e09 100755 --- a/bin/cli.js +++ b/bin/cli.js @@ -114,9 +114,9 @@ program.command('create') description: values?.desc, signingUrl: values?.signingUrl, signingToken: values?.signingToken, - pages: values?.pages, - cdxj: values?.cdxj, - logDirectory: values?.logDirectory, + pagesDir: values?.pages, + cdxjDir: values?.cdxj, + logDir: values?.logDirectory, log }) } catch (err) { diff --git a/index.js b/index.js index ab1e88b..b78ab4a 100644 --- a/index.js +++ b/index.js @@ -300,15 +300,15 @@ export class WACZ { this.indexFromWARCs = false } - if (options?.pages) { + if (options?.pagesDir) { this.detectPages = false - this.pagesDir = String(options?.pages).trim() + this.pagesDir = String(options?.pagesDir).trim() } - if (options?.cdxj) { + if (options?.cdxjDir) { this.detectPages = false this.indexFromWARCs = false // Added here for clarity, but implied by calls to `this.addCDXJ()` - this.cdxjDir = String(options?.cdxj).trim() + this.cdxjDir = String(options?.cdxjDir).trim() } if (options?.url) { @@ -346,8 +346,8 @@ export class WACZ { } } - if (options?.logDirectory) { - this.logDir = String(options?.logDirectory).trim() + if (options?.logDir) { + this.logDir = String(options?.logDir).trim() } if (options?.signingToken && this.signingUrl) { diff --git a/index.test.js b/index.test.js index 09f1772..b15fab5 100644 --- a/index.test.js +++ b/index.test.js @@ -74,8 +74,8 @@ test('WACZ constructor accounts for options.detectPages if valid.', async (_t) = assert.equal(archive.detectPages, false) }) -test('WACZ constructor accounts for options.pages if provided.', async (_t) => { - const archive = new WACZ({ input: FIXTURE_INPUT, pages: PAGES_DIR_FIXTURES_PATH }) +test('WACZ constructor accounts for options.pagesDir if provided.', async (_t) => { + const archive = new WACZ({ input: FIXTURE_INPUT, pagesDir: PAGES_DIR_FIXTURES_PATH }) assert.equal(archive.detectPages, false) assert.equal(archive.pagesDir, PAGES_DIR_FIXTURES_PATH) }) @@ -187,8 +187,8 @@ test('WACZ constructor accounts for options.datapackageExtras if provided.', asy assert.equal(archive.datapackageExtras, datapackageExtras) }) -test('WACZ constructor accounts for options.logDirectory if valid.', async (_t) => { - const archive = new WACZ({ input: FIXTURE_INPUT, logDirectory: LOG_DIR_FIXTURES_PATH }) +test('WACZ constructor accounts for options.logDir if valid.', async (_t) => { + const archive = new WACZ({ input: FIXTURE_INPUT, logDir: LOG_DIR_FIXTURES_PATH }) assert.equal(archive.logDir, LOG_DIR_FIXTURES_PATH) }) @@ -352,8 +352,8 @@ test('WACZ.process with pagesDir option creates valid WACZ with provided pages f url: 'https://lil.law.harvard.edu', title: 'WACZ Title', description: 'WACZ Description', - pages: PAGES_DIR_FIXTURES_PATH, - logDirectory: LOG_DIR_FIXTURES_PATH + pagesDir: PAGES_DIR_FIXTURES_PATH, + logDir: LOG_DIR_FIXTURES_PATH } const archive = new WACZ(options) @@ -380,7 +380,7 @@ test('WACZ.process with pagesDir option creates valid WACZ with provided pages f const extraPagesFixtureHash = await archive.sha256(EXTRA_PAGES_FIXTURE_PATH) assert.equal(datapackageExtraPages.hash, extraPagesFixtureHash) - // log file provided in logDirectory option should have same hash as fixture + // log file provided in logDir option should have same hash as fixture const datapackageLogFile = datapackage.resources.filter(entry => entry.path === 'logs/sample.log')[0] const logFileFixtureHash = await archive.sha256(LOG_FILE_FIXTURE_PATH) assert.equal(datapackageLogFile.hash, logFileFixtureHash) @@ -396,8 +396,8 @@ test('WACZ.process with cdxj option creates valid WACZ with index from provided url: 'https://lil.law.harvard.edu', title: 'WACZ Title', description: 'WACZ Description', - pages: PAGES_DIR_FIXTURES_PATH, - cdxj: CDXJ_DIR_FIXTURES_PATH + pagesDir: PAGES_DIR_FIXTURES_PATH, + cdxjDir: CDXJ_DIR_FIXTURES_PATH } const archive = new WACZ(options) diff --git a/types.js b/types.js index 242f790..94e7491 100644 --- a/types.js +++ b/types.js @@ -12,9 +12,9 @@ * @property {?string} signingUrl - If set, will be used to try and sign the resulting archive. * @property {?string} signingToken - Access token to be used in combination with `signingUrl`. * @property {?Object} datapackageExtras - If set, will be appended to datapackage.json under `extras`. - * @property {?string} cdxj - If set, skips indexing and allows for passing CDXJ files "as is". Path to a folder containing CDXJ files. - * @property {?string} pages - If set, allows for passing pre-set pages.jsonl file(s). Path to a folder containing pages files (pages.jsonl, extraPages.jsonl ...). Must be used in combination with `pages`, since using this option will skip the step required to generate a pages.jsonl file. - * @property {?string} logDirectory - If set, allows for passing existing crawler log files into the WACZ. Path to a folder containing log files. Only files with log or txt extensions in that folder will be copied. + * @property {?string} cdxjDir - If set, skips indexing and allows for passing CDXJ files "as is". Path to a folder containing CDXJ files. + * @property {?string} pagesDir - If set, allows for passing pre-set pages.jsonl file(s). Path to a folder containing pages files (pages.jsonl, extraPages.jsonl ...). Must be used in combination with `pages`, since using this option will skip the step required to generate a pages.jsonl file. + * @property {?string} logDir - If set, allows for passing existing crawler log files into the WACZ. Path to a folder containing log files. Only files with log or txt extensions in that folder will be copied. * @property {?any} log - Will be used instead of the Console API for logging, if compatible (i.e: loglevel). Defaults to globalThis.console. */