Skip to content

Commit

Permalink
Optimise string trimming on JavaScript
Browse files Browse the repository at this point in the history
  • Loading branch information
richard-viney committed Nov 12, 2024
1 parent 32f29ae commit 5303d38
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 17 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
`trim_right` functions, which have been deprecated.
- The `result.nil_error` function has been deprecated in favour of
`result.replace_error`.
- The performance of `string.trim`, `string.trim_start`, and `string.trim_end`
has been improved on JavaScript.

## v0.41.0 - 2024-10-31

Expand Down
64 changes: 47 additions & 17 deletions src/gleam_stdlib.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -283,31 +283,61 @@ export function split_once(haystack, needle) {
}
}

const unicode_whitespaces = [
"\u0020", // Space
"\u0009", // Horizontal tab
"\u000A", // Line feed
"\u000B", // Vertical tab
"\u000C", // Form feed
"\u000D", // Carriage return
"\u0085", // Next line
"\u2028", // Line separator
"\u2029", // Paragraph separator
].join("");

const left_trim_regex = new RegExp(`^([${unicode_whitespaces}]*)`, "g");
const right_trim_regex = new RegExp(`([${unicode_whitespaces}]*)$`, "g");
function isUnicodeWhitespace(c) {
return (
c === "\u0020" || // Space
c === "\u0009" || // Horizontal tab
c === "\u000A" || // Line feed
c === "\u000B" || // Vertical tab
c === "\u000C" || // Form feed
c === "\u000D" || // Carriage return
c === "\u0085" || // Next line
c === "\u2028" || // Line separator
c === "\u2029" // Paragraph separator
);
}

export function trim(string) {
return trim_start(trim_end(string));
const start_index = find_non_whitespace_char(string);

let end_index = rfind_non_whitespace_char(string) + 1;
if (end_index < start_index) {
end_index = start_index;
}

return string.substring(start_index, end_index);
}

export function trim_start(string) {
return string.replace(left_trim_regex, "");
return string.substring(find_non_whitespace_char(string));
}

export function trim_end(string) {
return string.replace(right_trim_regex, "");
return string.substring(0, rfind_non_whitespace_char(string) + 1);
}

function find_non_whitespace_char(string) {
let i = 0;

for (; i < string.length; i++) {
if (!isUnicodeWhitespace(string[i])) {
break;
}
}

return i;
}

function rfind_non_whitespace_char(string) {
let i = string.length - 1;

for (; i >= 0; i--) {
if (!isUnicodeWhitespace(string[i])) {
break;
}
}

return i;
}

export function bit_array_from_string(string) {
Expand Down
17 changes: 17 additions & 0 deletions test/gleam/string_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,23 @@ pub fn trim_end_test() {
|> should.equal(" hats")
}

pub fn trim_whole_string_test() {
let s =
"\u{0020}\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0085}\u{2028}\u{2029}"

s
|> string.trim_start
|> should.equal("")

s
|> string.trim_end
|> should.equal("")

s
|> string.trim
|> should.equal("")
}

// unicode whitespaces
pub fn trim_horizontal_tab_test() {
"hats\u{0009}"
Expand Down

0 comments on commit 5303d38

Please sign in to comment.