diff --git a/packages/lexicon/src/validators/primitives.ts b/packages/lexicon/src/validators/primitives.ts index ece7e06237c..16caa70f733 100644 --- a/packages/lexicon/src/validators/primitives.ts +++ b/packages/lexicon/src/validators/primitives.ts @@ -197,21 +197,35 @@ export function string( } } + // Lazily calculated and reused between checks. + let cachedUtf8Len: number | undefined + let cachedGraphemeLen: number | undefined + // maxLength if (typeof def.maxLength === 'number') { - if (utf8Len(value) > def.maxLength) { - return { - success: false, - error: new ValidationError( - `${path} must not be longer than ${def.maxLength} characters`, - ), + if (value.length * 3 <= def.maxLength) { + // If the JavaScript string length * 3 is within the maximum limit, + // its UTF8 length (which <= .length * 3) will also be within. + // Skip validation. + } else { + const len = cachedUtf8Len ?? (cachedUtf8Len = utf8Len(value)) + if (len > def.maxLength) { + return { + success: false, + error: new ValidationError( + `${path} must not be longer than ${def.maxLength} characters`, + ), + } } } } // minLength if (typeof def.minLength === 'number') { - if (utf8Len(value) < def.minLength) { + if (value.length * 3 < def.minLength) { + // If the JavaScript string length * 3 is below the maximum limit, + // its UTF8 length (which <= .length * 3) will also be below. + // Fail early. return { success: false, error: new ValidationError( @@ -219,29 +233,58 @@ export function string( ), } } - } - - // maxGraphemes - if (typeof def.maxGraphemes === 'number') { - if (graphemeLen(value) > def.maxGraphemes) { + const len = cachedUtf8Len ?? (cachedUtf8Len = utf8Len(value)) + if (len < def.minLength) { return { success: false, error: new ValidationError( - `${path} must not be longer than ${def.maxGraphemes} graphemes`, + `${path} must not be shorter than ${def.minLength} characters`, ), } } } + // maxGraphemes + if (typeof def.maxGraphemes === 'number') { + if (value.length <= def.maxGraphemes) { + // If the JavaScript string length is within the maximum limit, + // its grapheme length (which <= .length) will also be within. + // Skip validation. + } else { + const len = cachedGraphemeLen ?? (cachedGraphemeLen = graphemeLen(value)) + if (len > def.maxGraphemes) { + return { + success: false, + error: new ValidationError( + `${path} must not be longer than ${def.maxGraphemes} graphemes`, + ), + } + } + } + } + // minGraphemes if (typeof def.minGraphemes === 'number') { - if (graphemeLen(value) < def.minGraphemes) { + if (value.length < def.minGraphemes) { + // If the JavaScript string length is below the minimal limit, + // its grapheme length (which <= .length) will also be below. + // Fail early. return { success: false, error: new ValidationError( `${path} must not be shorter than ${def.minGraphemes} graphemes`, ), } + } else { + const len = cachedGraphemeLen ?? (cachedGraphemeLen = graphemeLen(value)) + if (len < def.minGraphemes) { + return { + success: false, + error: new ValidationError( + `${path} must not be shorter than ${def.minGraphemes} graphemes`, + ), + } + } } } diff --git a/packages/lexicon/tests/general.test.ts b/packages/lexicon/tests/general.test.ts index ca9cb44dc34..1055033bc80 100644 --- a/packages/lexicon/tests/general.test.ts +++ b/packages/lexicon/tests/general.test.ts @@ -567,45 +567,202 @@ describe('Record validation', () => { }) it('Applies string length constraint', () => { + // Shorter than two UTF8 characters + expect(() => + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: '', + }), + ).toThrow('Record/string must not be shorter than 2 characters') + expect(() => + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: 'a', + }), + ).toThrow('Record/string must not be shorter than 2 characters') + + // Two to four UTF8 characters + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: 'ab', + }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: '\u0301', // Combining acute accent (2 bytes) + }) lex.assertValidRecord('com.example.stringLength', { $type: 'com.example.stringLength', - string: '123', + string: 'a\u0301', // 'a' + combining acute accent (1 + 2 bytes = 3 bytes) }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: 'aé', // 'a' (1 byte) + 'é' (2 bytes) = 3 bytes + }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: 'abc', + }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: '一', // CJK character (3 bytes) + }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: '\uD83D', // Unpaired high surrogate (3 bytes) + }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: 'abcd', + }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: 'éé', // 'é' + 'é' (2 + 2 bytes = 4 bytes) + }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: 'aaé', // 1 + 1 + 2 = 4 bytes + }) + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: '👋', // 4 bytes + }) + expect(() => lex.assertValidRecord('com.example.stringLength', { $type: 'com.example.stringLength', - string: '1', + string: 'abcde', }), - ).toThrow('Record/string must not be shorter than 2 characters') + ).toThrow('Record/string must not be longer than 4 characters') expect(() => lex.assertValidRecord('com.example.stringLength', { $type: 'com.example.stringLength', - string: '12345', + string: 'a\u0301\u0301', // 1 + (2 * 2) = 5 bytes }), ).toThrow('Record/string must not be longer than 4 characters') expect(() => lex.assertValidRecord('com.example.stringLength', { $type: 'com.example.stringLength', - string: '👨‍👩‍👧‍👧', + string: '\uD83D\uD83D', // Two unpaired high surrogates (3 * 2 = 6 bytes) + }), + ).toThrow('Record/string must not be longer than 4 characters') + expect(() => + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: 'ééé', // 2 + 2 + 2 bytes = 6 bytes + }), + ).toThrow('Record/string must not be longer than 4 characters') + expect(() => + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: '👋a', // 4 + 1 bytes = 5 bytes + }), + ).toThrow('Record/string must not be longer than 4 characters') + expect(() => + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: '👨👨', // 4 + 4 = 8 bytes + }), + ).toThrow('Record/string must not be longer than 4 characters') + expect(() => + lex.assertValidRecord('com.example.stringLength', { + $type: 'com.example.stringLength', + string: '👨‍👩‍👧‍👧', // 4 emojis × 4 bytes + 3 ZWJs × 3 bytes = 25 bytes }), ).toThrow('Record/string must not be longer than 4 characters') }) it('Applies grapheme string length constraint', () => { + // Shorter than two graphemes + expect(() => + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: '', + }), + ).toThrow('Record/string must not be shorter than 2 graphemes') + expect(() => + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: '\u0301\u0301\u0301', // Three combining acute accents + }), + ).toThrow('Record/string must not be shorter than 2 graphemes') + expect(() => + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: 'a', + }), + ).toThrow('Record/string must not be shorter than 2 graphemes') + expect(() => + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: 'a\u0301\u0301\u0301\u0301', // 'á́́́' ('a' with four combining acute accents) + }), + ).toThrow('Record/string must not be shorter than 2 graphemes') + expect(() => + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: '5\uFE0F', // '5️' with emoji presentation + }), + ).toThrow('Record/string must not be shorter than 2 graphemes') + expect(() => + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: '👨‍👩‍👧‍👧', + }), + ).toThrow('Record/string must not be shorter than 2 graphemes') + + // Two to four graphemes + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: 'ab', + }) + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: 'a\u0301b', // 'áb' with combining accent + }) + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: 'a\u0301b\u0301', // 'áb́' + }) + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: '😀😀', + }) lex.assertValidRecord('com.example.stringLengthGrapheme', { $type: 'com.example.stringLengthGrapheme', string: '12👨‍👩‍👧‍👧', }) + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: 'abcd', + }) + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: 'a\u0301b\u0301c\u0301d\u0301', // 'áb́ćd́' + }) + + // Longer than four graphemes expect(() => lex.assertValidRecord('com.example.stringLengthGrapheme', { $type: 'com.example.stringLengthGrapheme', - string: '👨‍👩‍👧‍👧', + string: 'abcde', }), - ).toThrow('Record/string must not be shorter than 2 graphemes') + ).toThrow('Record/string must not be longer than 4 graphemes') + expect(() => + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: 'a\u0301b\u0301c\u0301d\u0301e\u0301', // 'áb́ćd́é' + }), + ).toThrow('Record/string must not be longer than 4 graphemes') + expect(() => + lex.assertValidRecord('com.example.stringLengthGrapheme', { + $type: 'com.example.stringLengthGrapheme', + string: '😀😀😀😀😀', + }), + ).toThrow('Record/string must not be longer than 4 graphemes') expect(() => lex.assertValidRecord('com.example.stringLengthGrapheme', { $type: 'com.example.stringLengthGrapheme', - string: '12345', + string: 'ab😀de', }), ).toThrow('Record/string must not be longer than 4 graphemes') })