Skip to content

Commit

Permalink
fix redirect handling:
Browse files Browse the repository at this point in the history
- allow recreating reqresp with same requestId for redirects, to store properly final request/response of redirect chain
- add isSelfRedirect() to check self-redirect responses, skip writing those
- logging: add networkId, requestId to fetch request error logging
- puppeteer-core: bump to latest (21.1.0)
  • Loading branch information
ikreymer committed Aug 26, 2023
1 parent 60aec17 commit 759f950
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 32 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"js-yaml": "^4.1.0",
"minio": "7.0.26",
"p-queue": "^7.3.4",
"puppeteer-core": "^20.9.0",
"puppeteer-core": "^21.1.0",
"sharp": "^0.32.1",
"sitemapper": "^3.1.2",
"uuid": "8.3.2",
Expand Down
24 changes: 17 additions & 7 deletions util/recorder.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ export class Recorder
// Fetch

cdp.on("Fetch.requestPaused", async (params) => {
logNetwork("Fetch.requestPaused", {requestId: params.requestId, ...this.logDetails});
this.handleRequestPaused(params, cdp);
});

Expand Down Expand Up @@ -170,13 +169,20 @@ export class Recorder
handleRedirectResponse(params) {
const { requestId, redirectResponse } = params;

// remove and serialize now as may redirect chain may reuse same requestId
const reqresp = this.removeReqResp(requestId);
// remove and serialize, but allow reusing requestId
// as redirect chain may reuse same requestId for subsequent request
const reqresp = this.removeReqResp(requestId, true);
if (!reqresp) {
return;
}

reqresp.fillResponse(redirectResponse);

if (reqresp.isSelfRedirect()) {
logger.warn("Skipping self redirect", {url: reqresp. url, status: reqresp.status, ...this.logDetails}, "recorder");
return;
}

this.serializeToWARC(reqresp);
}

Expand Down Expand Up @@ -236,6 +242,8 @@ export class Recorder
const { requestId, request, responseStatusCode, responseErrorReason, resourceType, networkId } = params;
const { method, headers, url } = request;

logNetwork("Fetch.requestPaused", {requestId, networkId, url, ...this.logDetails});

let continued = false;

try {
Expand All @@ -250,7 +258,7 @@ export class Recorder
try {
await cdp.send("Fetch.continueResponse", {requestId});
} catch (e) {
logger.debug("continueResponse failed", {url, ...errJSON(e), ...this.logDetails}, "recorder");
logger.debug("continueResponse failed", {requestId, networkId, url, ...errJSON(e), ...this.logDetails}, "recorder");
}
}
}
Expand Down Expand Up @@ -326,7 +334,7 @@ export class Recorder
reqresp.payload = Buffer.from(body, base64Encoded ? "base64" : "utf-8");
logNetwork("Fetch done", {size: reqresp.payload.length, url, networkId, ...this.logDetails});
} catch (e) {
logger.warn("Failed to load response body", {...errJSON(e), url, ...this.logDetails}, "recorder");
logger.warn("Failed to load response body", {url, networkId, ...errJSON(e), ...this.logDetails}, "recorder");
return false;
}
}
Expand Down Expand Up @@ -570,10 +578,12 @@ export class Recorder
}
}

removeReqResp(requestId) {
removeReqResp(requestId, allowReuse=false) {
const reqresp = this.pendingRequests.get(requestId);
this.pendingRequests.delete(requestId);
this.skipIds.add(requestId);
if (!allowReuse) {
this.skipIds.add(requestId);
}
return reqresp;
}

Expand Down
13 changes: 13 additions & 0 deletions util/reqresp.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,19 @@ export class RequestResponseInfo
}
}

isSelfRedirect() {
if (this.status < 300 || this.status >= 400 || this.status === 304) {
return false;
}
try {
const headers = new Headers(this.responseHeaders);
const redirUrl = new URL(headers.get("location"), this.url).href;
return this.url === redirUrl;
} catch (e) {
return false;
}
}

fillResponseReceivedExtraInfo(params) {
// this.responseHeaders = params.headers;
// if (params.headersText) {
Expand Down
48 changes: 24 additions & 24 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -794,10 +794,10 @@
tslib "^2.4.1"
tsyringe "^4.7.0"

"@puppeteer/browsers@1.4.6":
version "1.4.6"
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-1.4.6.tgz#1f70fd23d5d2ccce9d29b038e5039d7a1049ca77"
integrity sha512-x4BEjr2SjOPowNeiguzjozQbsc6h437ovD/wu+JpaenxVLm3jkgzHY2xOslMTp50HoTvQreMjiexiGQw1sqZlQ==
"@puppeteer/browsers@1.7.0":
version "1.7.0"
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-1.7.0.tgz#714a25ad6963f5478e36004ea7eda254870a4659"
integrity sha512-sl7zI0IkbQGak/+IE3VEEZab5SSOlI5F6558WvzWGC1n3+C722rfewC1ZIkcF9dsoGSsxhsONoseVlNQG4wWvQ==
dependencies:
debug "4.3.4"
extract-zip "2.0.1"
Expand Down Expand Up @@ -1486,12 +1486,12 @@ chownr@^1.1.1:
resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==

[email protected].16:
version "0.4.16"
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.16.tgz#8a67bfdf6bb8804efc22765a82859d20724b46ab"
integrity sha512-7ZbXdWERxRxSwo3txsBjjmc/NLxqb1Bk30mRb0BMS4YIaiV6zvKZqL/UAH+DdqcDYayDWk2n/y8klkBDODrPvA==
[email protected].20:
version "0.4.20"
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.20.tgz#1cd56426638452b40b29b7054e83c379e7e2b20a"
integrity sha512-ruHgVZFEv00mAQMz1tQjfjdG63jiPWrQPF6HLlX2ucqLqVTJoWngeBEKHaJ6n1swV/HSvgnBNbtTRIlcVyW3Fw==
dependencies:
mitt "3.0.0"
mitt "3.0.1"

ci-info@^3.2.0:
version "3.5.0"
Expand Down Expand Up @@ -1760,10 +1760,10 @@ detect-newline@^3.0.0:
resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==

[email protected].1147663:
version "0.0.1147663"
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1147663.tgz#4ec5610b39a6250d1f87e6b9c7e16688ed0ac78e"
integrity sha512-hyWmRrexdhbZ1tcJUGpO95ivbRhWXz++F4Ko+n21AY5PNln2ovoJw+8ZMNDTtip+CNFQfrtLVh/w4009dXO/eQ==
[email protected].1159816:
version "0.0.1159816"
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1159816.tgz#b5848e8597de01e4738589e7553674c7312c8d2a"
integrity sha512-2cZlHxC5IlgkIWe2pSDmCrDiTzbSJWywjbDDnupOImEBcG31CQgBLV8wWE+5t+C4rimcjHsbzy7CBzf9oFjboA==

diff-sequences@^29.2.0:
version "29.2.0"
Expand Down Expand Up @@ -3485,10 +3485,10 @@ [email protected]:
xml "^1.0.0"
xml2js "^0.4.15"

[email protected].0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/mitt/-/mitt-3.0.0.tgz#69ef9bd5c80ff6f57473e8d89326d01c414be0bd"
integrity sha512-7dX2/10ITVyqh4aOSVI9gdape+t9l2/8QxHrFmUXu4EEUpdlxl6RudZUPZoc+zuY2hk1j7XxVroIVIan/pD/SQ==
[email protected].1:
version "3.0.1"
resolved "https://registry.yarnpkg.com/mitt/-/mitt-3.0.1.tgz#ea36cf0cc30403601ae074c8f77b7092cdab36d1"
integrity sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==

mkdirp-classic@^0.5.2, mkdirp-classic@^0.5.3:
version "0.5.3"
Expand Down Expand Up @@ -3968,16 +3968,16 @@ punycode@^2.1.0:
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==

puppeteer-core@^20.9.0:
version "20.9.0"
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-20.9.0.tgz#6f4b420001b64419deab38d398a4d9cd071040e6"
integrity sha512-H9fYZQzMTRrkboEfPmf7m3CLDN6JvbxXA3qTtS+dFt27tR+CsFHzPsT6pzp6lYL6bJbAPaR0HaPO6uSi+F94Pg==
puppeteer-core@^21.1.0:
version "21.1.0"
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-21.1.0.tgz#f7680ed17076fba6a721f9b81fc045a8351bb8b3"
integrity sha512-ggfTj09jo81Y6M4DyNj80GrY6Pip+AtDUgGljqoSzP6FG5nz5Aju6Cs/X147fLgkJ4UKTb736U6cDp0ssLzN5Q==
dependencies:
"@puppeteer/browsers" "1.4.6"
chromium-bidi "0.4.16"
"@puppeteer/browsers" "1.7.0"
chromium-bidi "0.4.20"
cross-fetch "4.0.0"
debug "4.3.4"
devtools-protocol "0.0.1147663"
devtools-protocol "0.0.1159816"
ws "8.13.0"

pvtsutils@^1.3.2:
Expand Down

0 comments on commit 759f950

Please sign in to comment.