Skip to content

Commit

Permalink
encoding fix: when BOM marker is detected, re-encode text as UTF-8 an…
Browse files Browse the repository at this point in the history
…d a 'charset=utf-8' to content-type to ensure correct parsing

fixes #178
  • Loading branch information
ikreymer committed Jun 17, 2024
1 parent ef31628 commit 6d4139f
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 10 deletions.
2 changes: 1 addition & 1 deletion dist/sw.js

Large diffs are not rendered by default.

19 changes: 11 additions & 8 deletions src/response.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,25 +95,28 @@ class ArchiveResponse
async getText(isUTF8 = false) {
let buff = await this.getBuffer();
if (typeof(buff) === "string") {
return buff;
return {bomFound: false, text: buff};
}

// Check for BOMs
// Check for BOMs -- since we're removing BOM, set 'bomFound'
// to re-encode as UTF-8 without BOM
// UTF-8
if (buff[0] === 0xEF && buff[1] === 0xBB && buff[2] === 0xBF) {
return decoder.decode(buff.slice(3));
return {bomFound: true, text: decoder.decode(buff.slice(3))};
// UTF-16BE -- convert to buffer, swap, and decode LE
} else if (buff[0] === 0xFE && buff[1] === 0xFF) {
return Buffer.from(buff.slice(2)).swap16().toString("utf16le");
return {bomFound: true, text: Buffer.from(buff.slice(2)).swap16().toString("utf16le")};
// UTF-16LE -- convert to buffer, decode LE
} else if (buff[0] === 0xFF && buff[1] === 0xFE) {
return Buffer.from(buff.slice(2)).toString("utf16le");
return {bomFound: true, text: Buffer.from(buff.slice(2)).toString("utf16le")};
}

return isUTF8 ? decoder.decode(buff) : decodeLatin1(buff);
// if no BOM, go by 'isUTF8' param
return {bomFound: false, text: isUTF8 ? decoder.decode(buff) : decodeLatin1(buff)};
}

setText(text, isUTF8 = false) {
this.setBuffer(isUTF8 ? encoder.encode(text) : encodeLatin1(text));
setText(text, encodeUTF8 = false) {
this.setBuffer(encodeUTF8 ? encoder.encode(text) : encodeLatin1(text));
}

async getBuffer() {
Expand Down
12 changes: 11 additions & 1 deletion src/rewrite/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,18 @@ class Rewriter {
}

if (rwFunc) {
let text = await response.getText(this.isCharsetUTF8);
let {bomFound, text} = await response.getText(this.isCharsetUTF8);
text = rwFunc.call(this, text, opts);
// if BOM found and not already UTF-8, add charset explicitly
if (bomFound && !this.isCharsetUTF8) {
let mime = headers.get("Content-Type") || "";
const parts = mime.split(";");
mime = parts[0];
if (mime) {
headers.set("Content-Type", mime + "; charset=utf-8");
}
this.isCharsetUTF8 = true;
}
response.setText(text, this.isCharsetUTF8);
}

Expand Down

0 comments on commit 6d4139f

Please sign in to comment.