diff --git a/ast/api_amd64.go b/ast/api_amd64.go index 4631746ed..cd2ae21a7 100644 --- a/ast/api_amd64.go +++ b/ast/api_amd64.go @@ -171,3 +171,25 @@ func DecodeString(src string, pos int) (ret int, v string) { return -int(_ERR_UNSUPPORT_TYPE), "" } } + +// ValidSyntax check if a json has a valid JSON syntax, +// while not validate UTF-8 charset +func ValidSyntax(json string) bool { + fsm := types.NewStateMachine() + p := 0 + ret := native.ValidateOne(&json, &p, fsm, 0) + types.FreeStateMachine(fsm) + + if ret < 0 { + return false + } + + /* check for trailing spaces */ + for ;p < len(json); p++ { + if !isSpace(json[p]) { + return false + } + } + + return true +} diff --git a/ast/api_compat.go b/ast/api_compat.go index 0ee3d4bca..45ee732d4 100644 --- a/ast/api_compat.go +++ b/ast/api_compat.go @@ -136,3 +136,20 @@ func DecodeString(src string, pos int) (ret int, v string) { runtime.KeepAlive(src) return ret, rt.Mem2Str(vv) } + +// ValidSyntax check if a json has a valid JSON syntax, +// while not validate UTF-8 charset +func ValidSyntax(json string) bool { + p, _ := skipValue(json, 0) + if p < 0 { + return false + } + /* check for trailing spaces */ + for ;p < len(json); p++ { + if !isSpace(json[p]) { + return false + } + } + + return true +} diff --git a/ast/parser.go b/ast/parser.go index 3debf60cc..0d41d47ee 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -781,6 +781,8 @@ func (self Parser) ExportError(err types.ParsingError) error { }.Description()) } +// SkipFast skip a json value in fast-skip algs, +// while not strictly validate JSON syntax and UTF-8 charset. func SkipFast(src string, i int) (int, int, error) { p := NewParserObj(src) p.p = i diff --git a/internal/encoder/encoder.go b/internal/encoder/encoder.go index 0a46455eb..d285c2991 100644 --- a/internal/encoder/encoder.go +++ b/internal/encoder/encoder.go @@ -337,7 +337,7 @@ func Valid(data []byte) (ok bool, start int) { s := rt.Mem2Str(data) p := 0 m := types.NewStateMachine() - ret := native.ValidateOne(&s, &p, m) + ret := native.ValidateOne(&s, &p, m, types.F_VALIDATE_STRING) types.FreeStateMachine(m) if ret < 0 { diff --git a/internal/native/avx/native_amd64.go b/internal/native/avx/native_amd64.go index 627ac177c..b48e78d6a 100644 --- a/internal/native/avx/native_amd64.go +++ b/internal/native/avx/native_amd64.go @@ -64,7 +64,7 @@ var ( __skip_number func(s unsafe.Pointer, p unsafe.Pointer) (ret int) - __validate_one func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer) (ret int) + __validate_one func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer, flags uint64) (ret int) __get_by_path func(s unsafe.Pointer, p unsafe.Pointer, path unsafe.Pointer, m unsafe.Pointer) (ret int) @@ -166,8 +166,8 @@ func skip_number(s *string, p *int) (ret int) { } //go:nosplit -func validate_one(s *string, p *int, m *types.StateMachine) (ret int) { - return __validate_one(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m))) +func validate_one(s *string, p *int, m *types.StateMachine, flags uint64) (ret int) { + return __validate_one(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m)), flags) } //go:nosplit diff --git a/internal/native/avx/native_subr_amd64.go b/internal/native/avx/native_subr_amd64.go index 50ae487b4..c092cde39 100644 --- a/internal/native/avx/native_subr_amd64.go +++ b/internal/native/avx/native_subr_amd64.go @@ -387,8 +387,8 @@ var ( } _pcsp__validate_one = [][2]uint32{ {1, 0}, - {35, 8}, - {41, 0}, + {30, 8}, + {36, 0}, } _pcsp__validate_utf8 = [][2]uint32{ {1, 0}, diff --git a/internal/native/avx/native_text_amd64.go b/internal/native/avx/native_text_amd64.go index f3d4a4f7a..256f4062d 100644 --- a/internal/native/avx/native_text_amd64.go +++ b/internal/native/avx/native_text_amd64.go @@ -5853,14 +5853,13 @@ var Text__native_entry__ = []byte{ 0x48, 0x89, 0xd0, //0x000055f4 movq %rdx, %rax 0x48, 0x89, 0xf2, //0x000055f7 movq %rsi, %rdx 0x48, 0x89, 0xfe, //0x000055fa movq %rdi, %rsi - 0xb9, 0x01, 0x00, 0x00, 0x00, //0x000055fd movl $1, %ecx - 0xc4, 0xe1, 0xf9, 0x6e, 0xc1, //0x00005602 vmovq %rcx, %xmm0 + 0xbf, 0x01, 0x00, 0x00, 0x00, //0x000055fd movl $1, %edi + 0xc4, 0xe1, 0xf9, 0x6e, 0xc7, //0x00005602 vmovq %rdi, %xmm0 0xc5, 0xfa, 0x7f, 0x00, //0x00005607 vmovdqu %xmm0, (%rax) - 0xb9, 0x20, 0x00, 0x00, 0x00, //0x0000560b movl $32, %ecx - 0x48, 0x89, 0xc7, //0x00005610 movq %rax, %rdi - 0x5d, //0x00005613 popq %rbp - 0xe9, 0xe7, 0xef, 0xff, 0xff, //0x00005614 jmp _fsm_exec - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //0x00005619 .p2align 4, 0x00 + 0x48, 0x89, 0xc7, //0x0000560b movq %rax, %rdi + 0x5d, //0x0000560e popq %rbp + 0xe9, 0xec, 0xef, 0xff, 0xff, //0x0000560f jmp _fsm_exec + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //0x00005614 .p2align 4, 0x00 //0x00005620 LCPI27_0 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, //0x00005620 QUAD $0x2c2c2c2c2c2c2c2c; QUAD $0x2c2c2c2c2c2c2c2c // .space 16, ',,,,,,,,,,,,,,,,' //0x00005630 LCPI27_1 diff --git a/internal/native/avx2/native_amd64.go b/internal/native/avx2/native_amd64.go index 1a4ea0ae3..3229b885b 100644 --- a/internal/native/avx2/native_amd64.go +++ b/internal/native/avx2/native_amd64.go @@ -64,7 +64,7 @@ var ( __skip_number func(s unsafe.Pointer, p unsafe.Pointer) (ret int) - __validate_one func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer) (ret int) + __validate_one func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer, flags uint64) (ret int) __get_by_path func(s unsafe.Pointer, p unsafe.Pointer, path unsafe.Pointer, m unsafe.Pointer) (ret int) @@ -166,8 +166,8 @@ func skip_number(s *string, p *int) (ret int) { } //go:nosplit -func validate_one(s *string, p *int, m *types.StateMachine) (ret int) { - return __validate_one(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m))) +func validate_one(s *string, p *int, m *types.StateMachine, flags uint64) (ret int) { + return __validate_one(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m)), flags) } //go:nosplit diff --git a/internal/native/avx2/native_subr_amd64.go b/internal/native/avx2/native_subr_amd64.go index 432aea923..679118fc3 100644 --- a/internal/native/avx2/native_subr_amd64.go +++ b/internal/native/avx2/native_subr_amd64.go @@ -395,8 +395,8 @@ var ( } _pcsp__validate_one = [][2]uint32{ {1, 0}, - {35, 8}, - {41, 0}, + {30, 8}, + {36, 0}, } _pcsp__validate_utf8 = [][2]uint32{ {1, 0}, diff --git a/internal/native/avx2/native_text_amd64.go b/internal/native/avx2/native_text_amd64.go index 628e492b2..43a6f39bd 100644 --- a/internal/native/avx2/native_text_amd64.go +++ b/internal/native/avx2/native_text_amd64.go @@ -6702,14 +6702,13 @@ var Text__native_entry__ = []byte{ 0x48, 0x89, 0xd0, //0x00006404 movq %rdx, %rax 0x48, 0x89, 0xf2, //0x00006407 movq %rsi, %rdx 0x48, 0x89, 0xfe, //0x0000640a movq %rdi, %rsi - 0xb9, 0x01, 0x00, 0x00, 0x00, //0x0000640d movl $1, %ecx - 0xc4, 0xe1, 0xf9, 0x6e, 0xc1, //0x00006412 vmovq %rcx, %xmm0 + 0xbf, 0x01, 0x00, 0x00, 0x00, //0x0000640d movl $1, %edi + 0xc4, 0xe1, 0xf9, 0x6e, 0xc7, //0x00006412 vmovq %rdi, %xmm0 0xc5, 0xfa, 0x7f, 0x00, //0x00006417 vmovdqu %xmm0, (%rax) - 0xb9, 0x20, 0x00, 0x00, 0x00, //0x0000641b movl $32, %ecx - 0x48, 0x89, 0xc7, //0x00006420 movq %rax, %rdi - 0x5d, //0x00006423 popq %rbp - 0xe9, 0x27, 0xee, 0xff, 0xff, //0x00006424 jmp _fsm_exec - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //0x00006429 .p2align 5, 0x00 + 0x48, 0x89, 0xc7, //0x0000641b movq %rax, %rdi + 0x5d, //0x0000641e popq %rbp + 0xe9, 0x2c, 0xee, 0xff, 0xff, //0x0000641f jmp _fsm_exec + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //0x00006424 .p2align 5, 0x00 //0x00006440 LCPI28_0 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, //0x00006440 QUAD $0x2c2c2c2c2c2c2c2c; QUAD $0x2c2c2c2c2c2c2c2c // .space 16, ',,,,,,,,,,,,,,,,' 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, //0x00006450 QUAD $0x2c2c2c2c2c2c2c2c; QUAD $0x2c2c2c2c2c2c2c2c // .space 16, ',,,,,,,,,,,,,,,,' diff --git a/internal/native/dispatch_amd64.go b/internal/native/dispatch_amd64.go index 474badebb..84bd61fe5 100644 --- a/internal/native/dispatch_amd64.go +++ b/internal/native/dispatch_amd64.go @@ -85,7 +85,7 @@ var ( __GetByPath func(s unsafe.Pointer, p unsafe.Pointer, path unsafe.Pointer, m unsafe.Pointer) int - __ValidateOne func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer) int + __ValidateOne func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer, flags uint64) int __I64toa func(out unsafe.Pointer, val int64) (ret int) @@ -134,8 +134,8 @@ func GetByPath(s *string, p *int, path *[]interface{}, m *types.StateMachine) in } //go:nosplit -func ValidateOne(s *string, p *int, m *types.StateMachine) int { - return __ValidateOne(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m))) +func ValidateOne(s *string, p *int, m *types.StateMachine, flags uint64) int { + return __ValidateOne(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m)), flags) } //go:nosplit diff --git a/internal/native/native_amd64.tmpl b/internal/native/native_amd64.tmpl index 5839da99c..bfec62aa9 100644 --- a/internal/native/native_amd64.tmpl +++ b/internal/native/native_amd64.tmpl @@ -62,7 +62,7 @@ var ( __skip_number func(s unsafe.Pointer, p unsafe.Pointer) (ret int) - __validate_one func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer) (ret int) + __validate_one func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer, flags uint64) (ret int) __get_by_path func(s unsafe.Pointer, p unsafe.Pointer, path unsafe.Pointer, m unsafe.Pointer) (ret int) @@ -164,8 +164,8 @@ func skip_number(s *string, p *int) (ret int) { } //go:nosplit -func validate_one(s *string, p *int, m *types.StateMachine) (ret int) { - return __validate_one(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m))) +func validate_one(s *string, p *int, m *types.StateMachine, flags uint64) (ret int) { + return __validate_one(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m)), flags) } //go:nosplit diff --git a/internal/native/sse/native_amd64.go b/internal/native/sse/native_amd64.go index c2ab79531..74903e9e1 100644 --- a/internal/native/sse/native_amd64.go +++ b/internal/native/sse/native_amd64.go @@ -64,7 +64,7 @@ var ( __skip_number func(s unsafe.Pointer, p unsafe.Pointer) (ret int) - __validate_one func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer) (ret int) + __validate_one func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer, flags uint64) (ret int) __get_by_path func(s unsafe.Pointer, p unsafe.Pointer, path unsafe.Pointer, m unsafe.Pointer) (ret int) @@ -166,8 +166,8 @@ func skip_number(s *string, p *int) (ret int) { } //go:nosplit -func validate_one(s *string, p *int, m *types.StateMachine) (ret int) { - return __validate_one(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m))) +func validate_one(s *string, p *int, m *types.StateMachine, flags uint64) (ret int) { + return __validate_one(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m)), flags) } //go:nosplit diff --git a/internal/native/sse/native_subr_amd64.go b/internal/native/sse/native_subr_amd64.go index 8c0c2d04e..2c7245929 100644 --- a/internal/native/sse/native_subr_amd64.go +++ b/internal/native/sse/native_subr_amd64.go @@ -388,8 +388,8 @@ var ( } _pcsp__validate_one = [][2]uint32{ {1, 0}, - {35, 8}, - {41, 0}, + {30, 8}, + {36, 0}, } _pcsp__validate_utf8 = [][2]uint32{ {1, 0}, diff --git a/internal/native/sse/native_text_amd64.go b/internal/native/sse/native_text_amd64.go index 7a155f945..0a55d63b9 100644 --- a/internal/native/sse/native_text_amd64.go +++ b/internal/native/sse/native_text_amd64.go @@ -5898,14 +5898,13 @@ var Text__native_entry__ = []byte{ 0x48, 0x89, 0xd0, //0x00005664 movq %rdx, %rax 0x48, 0x89, 0xf2, //0x00005667 movq %rsi, %rdx 0x48, 0x89, 0xfe, //0x0000566a movq %rdi, %rsi - 0xb9, 0x01, 0x00, 0x00, 0x00, //0x0000566d movl $1, %ecx - 0x66, 0x48, 0x0f, 0x6e, 0xc1, //0x00005672 movq %rcx, %xmm0 + 0xbf, 0x01, 0x00, 0x00, 0x00, //0x0000566d movl $1, %edi + 0x66, 0x48, 0x0f, 0x6e, 0xc7, //0x00005672 movq %rdi, %xmm0 0xf3, 0x0f, 0x7f, 0x00, //0x00005677 movdqu %xmm0, (%rax) - 0xb9, 0x20, 0x00, 0x00, 0x00, //0x0000567b movl $32, %ecx - 0x48, 0x89, 0xc7, //0x00005680 movq %rax, %rdi - 0x5d, //0x00005683 popq %rbp - 0xe9, 0xd7, 0xef, 0xff, 0xff, //0x00005684 jmp _fsm_exec - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //0x00005689 .p2align 4, 0x00 + 0x48, 0x89, 0xc7, //0x0000567b movq %rax, %rdi + 0x5d, //0x0000567e popq %rbp + 0xe9, 0xdc, 0xef, 0xff, 0xff, //0x0000567f jmp _fsm_exec + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //0x00005684 .p2align 4, 0x00 //0x00005690 LCPI27_0 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, //0x00005690 QUAD $0x2c2c2c2c2c2c2c2c; QUAD $0x2c2c2c2c2c2c2c2c // .space 16, ',,,,,,,,,,,,,,,,' //0x000056a0 LCPI27_1 diff --git a/native/native.h b/native/native.h index 93d55dc4d..fd655deff 100644 --- a/native/native.h +++ b/native/native.h @@ -145,7 +145,7 @@ bool atof_eisel_lemire64(uint64_t mant, int exp10, int sgn, double *val); double atof_native(const char *sp, ssize_t nb, char *dbuf, ssize_t cap); long validate_string(const GoString *src, long *p); -long validate_one(const GoString *src, long *p, StateMachine *m); +long validate_one(const GoString *src, long *p, StateMachine *m, uint64_t flags); long validate_utf8(const GoString *src, long *p, StateMachine *m); long validate_utf8_fast(const GoString *src); diff --git a/native/scanning.c b/native/scanning.c index ae485d4fc..9ae718bc7 100644 --- a/native/scanning.c +++ b/native/scanning.c @@ -1445,9 +1445,9 @@ long skip_one(const GoString *src, long *p, StateMachine *m, uint64_t flags) { return fsm_exec(m, src, p, flags); } -long validate_one(const GoString *src, long *p, StateMachine *m) { +long validate_one(const GoString *src, long *p, StateMachine *m, uint64_t flags) { fsm_init(m, FSM_VAL); - return fsm_exec(m, src, p, MASK_VALIDATE_STRING); + return fsm_exec(m, src, p, flags); } /* Faster skip api for sonic.ast */