From 568d07e40bd05a89982ee9dab1c0cfcc0065b6dd Mon Sep 17 00:00:00 2001 From: Richard Viney Date: Thu, 28 Nov 2024 19:38:09 +1300 Subject: [PATCH] Unaligned bit arrays on the JavaScript target --- CHANGELOG.md | 6 + src/gleam/bit_array.gleam | 12 +- src/gleam_stdlib.mjs | 221 ++++++++++++++++++++++++++----- test/gleam/bit_array_test.gleam | 66 ++------- test/gleam/bytes_tree_test.gleam | 2 - 5 files changed, 211 insertions(+), 96 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d110acb..b4f3d17b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +- Unaligned bit arrays on the JavaScript target are now supported by the + `bit_array` module. Note: unaligned bit arrays on JavaScript are supported + starting with Gleam v1.8. + ## v0.52.0 - 2025-01-04 - Improved the precision of `float.to_precision`. diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index 173fab89..1da43621 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -13,22 +13,20 @@ pub fn from_string(x: String) -> BitArray /// Returns an integer which is the number of bits in the bit array. /// @external(erlang, "erlang", "bit_size") -pub fn bit_size(x: BitArray) -> Int { - byte_size(x) * 8 -} +@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size") +pub fn bit_size(x: BitArray) -> Int /// Returns an integer which is the number of bytes in the bit array. /// @external(erlang, "erlang", "byte_size") -@external(javascript, "../gleam_stdlib.mjs", "length") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_byte_size") pub fn byte_size(x: BitArray) -> Int /// Pads a bit array with zeros so that it is a whole number of bytes. /// @external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes") -pub fn pad_to_bytes(x: BitArray) -> BitArray { - x -} +@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes") +pub fn pad_to_bytes(x: BitArray) -> BitArray /// Creates a new bit array by joining two bit arrays. /// diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index e7088b46..ca49b6c1 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -316,8 +316,64 @@ export function bit_array_from_string(string) { return toBitArray([stringBits(string)]); } +const BIT_ARRAY_UNALIGNED_SUPPORTED = + new BitArray(new Uint8Array()).bitSize !== undefined; + +export function bit_array_bit_size(bit_array) { + if (!BIT_ARRAY_UNALIGNED_SUPPORTED) { + return bit_array.length * 8; + } + + return bit_array.bitSize; +} + +export function bit_array_byte_size(bit_array) { + if (!BIT_ARRAY_UNALIGNED_SUPPORTED) { + return bit_array.length; + } + + return bit_array.byteSize; +} + +export function bit_array_pad_to_bytes(bit_array) { + const trailingBitsCount = bit_array_bit_size(bit_array) % 8; + + // If the bit array is byte aligned it can be returned unchanged + if (trailingBitsCount === 0) { + return bit_array; + } + + const finalByte = bit_array.byteAt(bit_array.byteSize - 1); + + const unusedBitsCount = 8 - trailingBitsCount; + const correctFinalByte = (finalByte >> unusedBitsCount) << unusedBitsCount; + + // If the unused bits in the final byte are already set to zero then the + // existing buffer can be re-used, avoiding a copy + if (finalByte === correctFinalByte) { + return new BitArray( + bit_array.rawBuffer, + bit_array.byteSize * 8, + bit_array.bitOffset, + ); + } + + // Copy the bit array into a new aligned buffer and set the correct final byte + const buffer = new Uint8Array(bit_array.byteSize); + for (let i = 0; i < buffer.length - 1; i++) { + buffer[i] = bit_array.byteAt(i); + } + buffer[buffer.length - 1] = correctFinalByte; + + return new BitArray(buffer); +} + export function bit_array_concat(bit_arrays) { - return toBitArray(bit_arrays.toArray().map((b) => b.buffer)); + if (BIT_ARRAY_UNALIGNED_SUPPORTED) { + return toBitArray(bit_arrays.toArray()); + } else { + return toBitArray(bit_arrays.toArray().map((b) => b.buffer)); + } } export function console_log(term) { @@ -333,9 +389,25 @@ export function crash(message) { } export function bit_array_to_string(bit_array) { + if (bit_array_bit_size(bit_array) % 8 !== 0) { + return new Error(Nil); + } + try { const decoder = new TextDecoder("utf-8", { fatal: true }); - return new Ok(decoder.decode(bit_array.buffer)); + if (BIT_ARRAY_UNALIGNED_SUPPORTED) { + if (bit_array.bitOffset === 0) { + return new Ok(decoder.decode(bit_array.rawBuffer)); + } else { + const buffer = new Uint8Array(bit_array.byteSize); + for (let i = 0; i < buffer.length; i++) { + buffer[i] = bit_array.byteAt(i); + } + return new Ok(decoder.decode(buffer)); + } + } else { + return new Ok(decoder.decode(bit_array.buffer)); + } } catch { return new Error(Nil); } @@ -415,13 +487,21 @@ export function random_uniform() { export function bit_array_slice(bits, position, length) { const start = Math.min(position, position + length); const end = Math.max(position, position + length); - if (start < 0 || end > bits.length) return new Error(Nil); - const byteOffset = bits.buffer.byteOffset + start; + + if (start < 0 || end * 8 > bit_array_bit_size(bits)) { + return new Error(Nil); + } + + if (BIT_ARRAY_UNALIGNED_SUPPORTED) { + return new Ok(bits.slice(start * 8, end * 8)); + } + const buffer = new Uint8Array( bits.buffer.buffer, - byteOffset, - Math.abs(length), + bits.buffer.byteOffset + start, + end - start, ); + return new Ok(new BitArray(buffer)); } @@ -522,16 +602,20 @@ let b64TextDecoder; export function encode64(bit_array, padding) { b64TextDecoder ??= new TextDecoder(); - const bytes = bit_array.buffer; + bit_array = bit_array_pad_to_bytes(bit_array); - const m = bytes.length; + const m = bit_array_byte_size(bit_array); const k = m % 3; const n = Math.floor(m / 3) * 4 + (k && k + 1); const N = Math.ceil(m / 3) * 4; const encoded = new Uint8Array(N); for (let i = 0, j = 0; j < m; i += 4, j += 3) { - const y = (bytes[j] << 16) + (bytes[j + 1] << 8) + (bytes[j + 2] | 0); + const y = + (bit_array.byteAt(j) << 16) + + (bit_array.byteAt(j + 1) << 8) + + (bit_array.byteAt(j + 2) | 0); + encoded[i] = b64EncodeLookup[y >> 18]; encoded[i + 1] = b64EncodeLookup[(y >> 12) & 0x3f]; encoded[i + 2] = b64EncodeLookup[(y >> 6) & 0x3f]; @@ -804,7 +888,7 @@ export function inspect(v) { if (Array.isArray(v)) return `#(${v.map(inspect).join(", ")})`; if (v instanceof List) return inspectList(v); if (v instanceof UtfCodepoint) return inspectUtfCodepoint(v); - if (v instanceof BitArray) return inspectBitArray(v); + if (v instanceof BitArray) return `<<${bit_array_inspect(v, "")}>>`; if (v instanceof CustomType) return inspectCustomType(v); if (v instanceof Dict) return inspectDict(v); if (v instanceof Set) return `//js(Set(${[...v].map(inspect).join(", ")}))`; @@ -895,17 +979,22 @@ export function inspectList(list) { return `[${list.toArray().map(inspect).join(", ")}]`; } -export function inspectBitArray(bits) { - return `<<${Array.from(bits.buffer).join(", ")}>>`; -} - export function inspectUtfCodepoint(codepoint) { return `//utfcodepoint(${String.fromCodePoint(codepoint.value)})`; } export function base16_encode(bit_array) { + const trailingBitsCount = bit_array_bit_size(bit_array) % 8; + let result = ""; - for (const byte of bit_array.buffer) { + for (let i = 0; i < bit_array.byteSize; i++) { + let byte = bit_array.byteAt(i); + + if (i === bit_array.byteSize - 1 && trailingBitsCount) { + const unusedBitsCount = 8 - trailingBitsCount; + byte = (byte >> unusedBitsCount) << unusedBitsCount; + } + result += byte.toString(16).padStart(2, "0").toUpperCase(); } return result; @@ -923,38 +1012,108 @@ export function base16_decode(string) { } export function bit_array_inspect(bits, acc) { - return `${acc}${[...bits.buffer].join(", ")}`; + const bitSize = bit_array_bit_size(bits); + + if (bitSize === 0) { + return acc; + } + + const byteSize = bit_array_byte_size(bits); + + for (let i = 0; i < byteSize - 1; i++) { + acc += bits.byteAt(i).toString(); + acc += ", "; + } + + if (byteSize * 8 === bitSize) { + acc += bits.byteAt(byteSize - 1).toString(); + } else { + const trailingBitsCount = bitSize % 8; + acc += bits.byteAt(byteSize - 1) >> (8 - trailingBitsCount); + acc += `:size(${trailingBitsCount})`; + } + + return acc; } export function bit_array_compare(first, second) { - for (let i = 0; i < first.length; i++) { - if (i >= second.length) { - return new Gt(); // first has more items - } - const f = first.buffer[i]; - const s = second.buffer[i]; + let i = 0; + + let firstSize = bit_array_bit_size(first); + let secondSize = bit_array_bit_size(second); + + while (firstSize >= 8 && secondSize >= 8) { + const f = first.byteAt(i); + const s = second.byteAt(i); + if (f > s) { return new Gt(); - } - if (f < s) { + } else if (f < s) { return new Lt(); } + + i++; + firstSize -= 8; + secondSize -= 8; } - // This means that either first did not have any items - // or all items in first were equal to second. - if (first.length === second.length) { + + if (firstSize === 0 && secondSize === 0) { return new Eq(); } - return new Lt(); // second has more items + + // First has more items, example: "AB" > "A": + if (secondSize === 0) { + return new Gt(); + } + + // Second has more items, example: "A" < "AB": + if (firstSize === 0) { + return new Lt(); + } + + // This happens when there are unaligned bit arrays + + const f = first.byteAt(i) >> (8 - firstSize); + const s = second.byteAt(i) >> (8 - secondSize); + + if (f > s) { + return new Gt(); + } + if (f < s) { + return new Lt(); + } + if (firstSize > secondSize) { + return new Gt(); + } + if (firstSize < secondSize) { + return new Lt(); + } + + return new Eq(); } export function bit_array_starts_with(bits, prefix) { - if (prefix.length > bits.length) { + const prefixSize = bit_array_bit_size(prefix); + + if (prefixSize > bit_array_bit_size(bits)) { return false; } - for (let i = 0; i < prefix.length; i++) { - if (bits.buffer[i] !== prefix.buffer[i]) { + // Check any whole bytes + const byteCount = Math.trunc(prefixSize / 8); + for (let i = 0; i < byteCount; i++) { + if (bits.byteAt(i) !== prefix.byteAt(i)) { + return false; + } + } + + // Check any trailing bits at the end of the prefix + if (prefixSize % 8 !== 0) { + const unusedBitsCount = 8 - (prefixSize % 8); + if ( + bits.byteAt(byteCount) >> unusedBitsCount !== + prefix.byteAt(byteCount) >> unusedBitsCount + ) { return false; } } diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 30e8c17d..1ec1f660 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -16,12 +16,7 @@ pub fn bit_size_test() { bit_array.bit_size(<<0:-8>>) |> should.equal(0) -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn bit_size_erlang_only_test() { bit_array.bit_size(<<0:1>>) |> should.equal(1) @@ -41,12 +36,7 @@ pub fn byte_size_test() { bit_array.byte_size(<<0, 1, 2, 3, 4>>) |> should.equal(5) -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn byte_size_erlang_only_test() { bit_array.byte_size(<<1, 2, 3:6>>) |> should.equal(3) } @@ -63,12 +53,7 @@ pub fn pad_to_bytes_test() { <<0xAB, 0x12>> |> bit_array.pad_to_bytes |> should.equal(<<0xAB, 0x12>>) -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn pad_to_bytes_erlang_only_test() { <<1:1>> |> bit_array.pad_to_bytes |> should.equal(<<0x80>>) @@ -80,6 +65,11 @@ pub fn pad_to_bytes_erlang_only_test() { <<0xAB, 0x12, 3:3>> |> bit_array.pad_to_bytes |> should.equal(<<0xAB, 0x12, 0x60>>) + + let assert <> = <<0xAB, 0xFF>> + a + |> bit_array.pad_to_bytes + |> should.equal(<<0xAB, 0xF0>>) } pub fn not_equal_test() { @@ -99,12 +89,7 @@ pub fn append_test() { <<1, 2>> |> bit_array.append(<<3, 4>>) |> should.equal(<<1, 2, 3, 4>>) -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn append_erlang_only_test() { <<1, 2:4>> |> bit_array.append(<<3>>) |> should.equal(<<1, 2:4, 3>>) @@ -118,12 +103,7 @@ pub fn concat_test() { [<<1, 2>>, <<3>>, <<4>>] |> bit_array.concat |> should.equal(<<1, 2, 3, 4>>) -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn concat_erlang_only_test() { [<<-1:32>>, <<0:1>>, <<0:0>>] |> bit_array.concat |> should.equal(<<255, 255, 255, 255, 0:1>>) @@ -186,12 +166,7 @@ pub fn slice_test() { |> bit_array.slice(1, 1) |> result.try(bit_array.slice(_, 0, 1)) |> should.equal(Ok(<<"b":utf8>>)) -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn slice_erlang_only_test() { <<0, 1, 2:7>> |> bit_array.slice(0, 3) |> should.equal(Error(Nil)) @@ -221,15 +196,15 @@ pub fn to_string_test() { <<65_535>> |> bit_array.to_string |> should.equal(Error(Nil)) -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn to_string_erlang_only_test() { <<"ø":utf8, 50:4>> |> bit_array.to_string |> should.equal(Error(Nil)) + + let assert <<_:3, x:bits>> = <<0:3, "ø":utf8>> + x + |> bit_array.to_string + |> should.equal(Ok("ø")) } pub fn is_utf8_test() { @@ -282,12 +257,7 @@ pub fn base64_encode_test() { "QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB", 1024 * 32, )) -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn base64_erlang_only_encode_test() { <<-1:7>> |> bit_array.base64_encode(True) |> should.equal("/g==") @@ -397,12 +367,7 @@ pub fn base16_test() { bit_array.base16_encode(<<161, 178, 195, 212, 229, 246, 120, 145>>) |> should.equal("A1B2C3D4E5F67891") -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn base16_encode_erlang_only_test() { <<-1:7>> |> bit_array.base16_encode() |> should.equal("FE") @@ -463,12 +428,7 @@ pub fn inspect_test() { bit_array.inspect(<<0, 20, 0x20, 255>>) |> should.equal("<<0, 20, 32, 255>>") -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn inspect_erlang_only_test() { bit_array.inspect(<<4:5>>) |> should.equal("<<4:size(5)>>") @@ -479,7 +439,6 @@ pub fn inspect_erlang_only_test() { |> should.equal("<<182, 1:size(1)>>") } -@target(erlang) pub fn compare_test() { bit_array.compare(<<4:5>>, <<4:5>>) |> should.equal(order.Eq) @@ -550,12 +509,7 @@ pub fn starts_with_test() { bit_array.starts_with(<<0, 1, 2>>, <<1>>) |> should.be_false -} -// This test is target specific since it's using non byte-aligned BitArrays -// and those are not supported on the JavaScript target. -@target(erlang) -pub fn starts_with_erlang_only_test() { bit_array.starts_with(<<1:1>>, <<1:1>>) |> should.be_true diff --git a/test/gleam/bytes_tree_test.gleam b/test/gleam/bytes_tree_test.gleam index 3f549f15..e7dd0f07 100644 --- a/test/gleam/bytes_tree_test.gleam +++ b/test/gleam/bytes_tree_test.gleam @@ -18,7 +18,6 @@ pub fn tree_test() { |> should.equal(4) } -@target(erlang) pub fn tree_unaligned_bit_arrays_test() { let data = bytes_tree.from_bit_array(<<-1:5>>) @@ -84,7 +83,6 @@ pub fn concat_bit_arrays_test() { |> should.equal(<<"hey":utf8>>) } -@target(erlang) pub fn concat_unaligned_bit_arrays_test() { bytes_tree.concat_bit_arrays([<<-1:4>>, <<-1:5>>, <<-1:3>>, <<-2:2>>]) |> bytes_tree.to_bit_array