Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unaligned bit arrays on the JavaScript target #761

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

- Unaligned bit arrays on the JavaScript target are now supported by the
`bit_array` module. Note: unaligned bit arrays on JavaScript are supported
starting with Gleam v1.8.

## v0.52.0 - 2025-01-04

- Improved the precision of `float.to_precision`.
Expand Down
12 changes: 5 additions & 7 deletions src/gleam/bit_array.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,20 @@ pub fn from_string(x: String) -> BitArray
/// Returns an integer which is the number of bits in the bit array.
///
@external(erlang, "erlang", "bit_size")
pub fn bit_size(x: BitArray) -> Int {
byte_size(x) * 8
}
@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size")
pub fn bit_size(x: BitArray) -> Int

/// Returns an integer which is the number of bytes in the bit array.
///
@external(erlang, "erlang", "byte_size")
@external(javascript, "../gleam_stdlib.mjs", "length")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_byte_size")
pub fn byte_size(x: BitArray) -> Int

/// Pads a bit array with zeros so that it is a whole number of bytes.
///
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
pub fn pad_to_bytes(x: BitArray) -> BitArray {
x
}
@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes")
pub fn pad_to_bytes(x: BitArray) -> BitArray

/// Creates a new bit array by joining two bit arrays.
///
Expand Down
221 changes: 190 additions & 31 deletions src/gleam_stdlib.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,64 @@ export function bit_array_from_string(string) {
return toBitArray([stringBits(string)]);
}

const BIT_ARRAY_UNALIGNED_SUPPORTED =
new BitArray(new Uint8Array()).bitSize !== undefined;

export function bit_array_bit_size(bit_array) {
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
return bit_array.length * 8;
}

return bit_array.bitSize;
}

export function bit_array_byte_size(bit_array) {
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
return bit_array.length;
}

return bit_array.byteSize;
}

export function bit_array_pad_to_bytes(bit_array) {
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;

// If the bit array is byte aligned it can be returned unchanged
if (trailingBitsCount === 0) {
return bit_array;
}

const finalByte = bit_array.byteAt(bit_array.byteSize - 1);

const unusedBitsCount = 8 - trailingBitsCount;
const correctFinalByte = (finalByte >> unusedBitsCount) << unusedBitsCount;

// If the unused bits in the final byte are already set to zero then the
// existing buffer can be re-used, avoiding a copy
if (finalByte === correctFinalByte) {
return new BitArray(
bit_array.rawBuffer,
bit_array.byteSize * 8,
bit_array.bitOffset,
);
}

// Copy the bit array into a new aligned buffer and set the correct final byte
const buffer = new Uint8Array(bit_array.byteSize);
for (let i = 0; i < buffer.length - 1; i++) {
buffer[i] = bit_array.byteAt(i);
}
buffer[buffer.length - 1] = correctFinalByte;

return new BitArray(buffer);
}

export function bit_array_concat(bit_arrays) {
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
return toBitArray(bit_arrays.toArray());
} else {
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
}
}

export function console_log(term) {
Expand All @@ -333,9 +389,25 @@ export function crash(message) {
}

export function bit_array_to_string(bit_array) {
if (bit_array_bit_size(bit_array) % 8 !== 0) {
return new Error(Nil);
}

try {
const decoder = new TextDecoder("utf-8", { fatal: true });
return new Ok(decoder.decode(bit_array.buffer));
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
if (bit_array.bitOffset === 0) {
return new Ok(decoder.decode(bit_array.rawBuffer));
} else {
const buffer = new Uint8Array(bit_array.byteSize);
for (let i = 0; i < buffer.length; i++) {
buffer[i] = bit_array.byteAt(i);
}
return new Ok(decoder.decode(buffer));
}
} else {
return new Ok(decoder.decode(bit_array.buffer));
}
} catch {
return new Error(Nil);
}
Expand Down Expand Up @@ -415,13 +487,21 @@ export function random_uniform() {
export function bit_array_slice(bits, position, length) {
const start = Math.min(position, position + length);
const end = Math.max(position, position + length);
if (start < 0 || end > bits.length) return new Error(Nil);
const byteOffset = bits.buffer.byteOffset + start;

if (start < 0 || end * 8 > bit_array_bit_size(bits)) {
return new Error(Nil);
}

if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
return new Ok(bits.slice(start * 8, end * 8));
}

const buffer = new Uint8Array(
bits.buffer.buffer,
byteOffset,
Math.abs(length),
bits.buffer.byteOffset + start,
end - start,
);

return new Ok(new BitArray(buffer));
}

Expand Down Expand Up @@ -522,16 +602,20 @@ let b64TextDecoder;
export function encode64(bit_array, padding) {
b64TextDecoder ??= new TextDecoder();

const bytes = bit_array.buffer;
bit_array = bit_array_pad_to_bytes(bit_array);

const m = bytes.length;
const m = bit_array_byte_size(bit_array);
const k = m % 3;
const n = Math.floor(m / 3) * 4 + (k && k + 1);
const N = Math.ceil(m / 3) * 4;
const encoded = new Uint8Array(N);

for (let i = 0, j = 0; j < m; i += 4, j += 3) {
const y = (bytes[j] << 16) + (bytes[j + 1] << 8) + (bytes[j + 2] | 0);
const y =
(bit_array.byteAt(j) << 16) +
(bit_array.byteAt(j + 1) << 8) +
(bit_array.byteAt(j + 2) | 0);

encoded[i] = b64EncodeLookup[y >> 18];
encoded[i + 1] = b64EncodeLookup[(y >> 12) & 0x3f];
encoded[i + 2] = b64EncodeLookup[(y >> 6) & 0x3f];
Expand Down Expand Up @@ -804,7 +888,7 @@ export function inspect(v) {
if (Array.isArray(v)) return `#(${v.map(inspect).join(", ")})`;
if (v instanceof List) return inspectList(v);
if (v instanceof UtfCodepoint) return inspectUtfCodepoint(v);
if (v instanceof BitArray) return inspectBitArray(v);
if (v instanceof BitArray) return `<<${bit_array_inspect(v, "")}>>`;
if (v instanceof CustomType) return inspectCustomType(v);
if (v instanceof Dict) return inspectDict(v);
if (v instanceof Set) return `//js(Set(${[...v].map(inspect).join(", ")}))`;
Expand Down Expand Up @@ -895,17 +979,22 @@ export function inspectList(list) {
return `[${list.toArray().map(inspect).join(", ")}]`;
}

export function inspectBitArray(bits) {
return `<<${Array.from(bits.buffer).join(", ")}>>`;
}

export function inspectUtfCodepoint(codepoint) {
return `//utfcodepoint(${String.fromCodePoint(codepoint.value)})`;
}

export function base16_encode(bit_array) {
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;

let result = "";
for (const byte of bit_array.buffer) {
for (let i = 0; i < bit_array.byteSize; i++) {
let byte = bit_array.byteAt(i);

if (i === bit_array.byteSize - 1 && trailingBitsCount) {
const unusedBitsCount = 8 - trailingBitsCount;
byte = (byte >> unusedBitsCount) << unusedBitsCount;
}

result += byte.toString(16).padStart(2, "0").toUpperCase();
}
return result;
Expand All @@ -923,38 +1012,108 @@ export function base16_decode(string) {
}

export function bit_array_inspect(bits, acc) {
return `${acc}${[...bits.buffer].join(", ")}`;
const bitSize = bit_array_bit_size(bits);

if (bitSize === 0) {
return acc;
}

const byteSize = bit_array_byte_size(bits);

for (let i = 0; i < byteSize - 1; i++) {
acc += bits.byteAt(i).toString();
acc += ", ";
}

if (byteSize * 8 === bitSize) {
acc += bits.byteAt(byteSize - 1).toString();
} else {
const trailingBitsCount = bitSize % 8;
acc += bits.byteAt(byteSize - 1) >> (8 - trailingBitsCount);
acc += `:size(${trailingBitsCount})`;
}

return acc;
}

export function bit_array_compare(first, second) {
for (let i = 0; i < first.length; i++) {
if (i >= second.length) {
return new Gt(); // first has more items
}
const f = first.buffer[i];
const s = second.buffer[i];
let i = 0;

let firstSize = bit_array_bit_size(first);
let secondSize = bit_array_bit_size(second);

while (firstSize >= 8 && secondSize >= 8) {
const f = first.byteAt(i);
const s = second.byteAt(i);

if (f > s) {
return new Gt();
}
if (f < s) {
} else if (f < s) {
return new Lt();
}

i++;
firstSize -= 8;
secondSize -= 8;
}
// This means that either first did not have any items
// or all items in first were equal to second.
if (first.length === second.length) {

if (firstSize === 0 && secondSize === 0) {
return new Eq();
}
return new Lt(); // second has more items

// First has more items, example: "AB" > "A":
if (secondSize === 0) {
return new Gt();
}

// Second has more items, example: "A" < "AB":
if (firstSize === 0) {
return new Lt();
}

// This happens when there are unaligned bit arrays

const f = first.byteAt(i) >> (8 - firstSize);
const s = second.byteAt(i) >> (8 - secondSize);

if (f > s) {
return new Gt();
}
if (f < s) {
return new Lt();
}
if (firstSize > secondSize) {
return new Gt();
}
if (firstSize < secondSize) {
return new Lt();
}

return new Eq();
}

export function bit_array_starts_with(bits, prefix) {
if (prefix.length > bits.length) {
const prefixSize = bit_array_bit_size(prefix);

if (prefixSize > bit_array_bit_size(bits)) {
return false;
}

for (let i = 0; i < prefix.length; i++) {
if (bits.buffer[i] !== prefix.buffer[i]) {
// Check any whole bytes
const byteCount = Math.trunc(prefixSize / 8);
for (let i = 0; i < byteCount; i++) {
if (bits.byteAt(i) !== prefix.byteAt(i)) {
return false;
}
}

// Check any trailing bits at the end of the prefix
if (prefixSize % 8 !== 0) {
const unusedBitsCount = 8 - (prefixSize % 8);
if (
bits.byteAt(byteCount) >> unusedBitsCount !==
prefix.byteAt(byteCount) >> unusedBitsCount
) {
return false;
}
}
Expand Down
Loading
Loading