Skip to content

Commit

Permalink
fix for direct fetch timeouts (#677)
Browse files Browse the repository at this point in the history
- use '--timeout' value for direct fetch timeout, instead of fixed 30
seconds
- don't consider 'document' as essential resource regardless of mime
type, as any top-level URL is a document
- don't count non-200 responses as non-essential even if missing
content-type fixes #676
  • Loading branch information
ikreymer authored Sep 5, 2024
1 parent 85a07af commit 0d6a0b0
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,7 @@ self.__bx_behaviors.selectMainBehavior();
try {
const { fetched, mime, ts } = await timedRun(
directFetchCapture({ url, headers: this.headers, cdp }),
FETCH_TIMEOUT_SECS,
this.params.pageLoadTimeout,
"Direct fetch capture attempt timed out",
logDetails,
"fetch",
Expand Down
4 changes: 3 additions & 1 deletion src/util/recorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -670,8 +670,10 @@ export class Recorder {

// if contentLength is large or unknown, do streaming, unless its an essential resource
// in which case, need to do a full fetch either way
// don't count non-200 responses which may not have content-length
if (
(contentLen < 0 || contentLen > MAX_BROWSER_DEFAULT_FETCH_SIZE) &&
responseStatusCode === 200 &&
!this.isEssentialResource(reqresp.resourceType, mimeType)
) {
const opts: ResponseStreamAsyncFetchOptions = {
Expand Down Expand Up @@ -1030,7 +1032,7 @@ export class Recorder {
}

isEssentialResource(resourceType: string | undefined, contentType: string) {
if (["document", "stylesheet", "script"].includes(resourceType || "")) {
if (resourceType === "script" || resourceType === "stylesheet") {
return true;
}

Expand Down

0 comments on commit 0d6a0b0

Please sign in to comment.