diff --git a/internal/encoder/alg/spec.go b/internal/encoder/alg/spec.go index 88317496c..7f0e81002 100644 --- a/internal/encoder/alg/spec.go +++ b/internal/encoder/alg/spec.go @@ -1,3 +1,4 @@ +//go:build (amd64 && go1.16 && !go1.22) || (arm64 && go1.20 && !go1.22) // +build amd64,go1.16,!go1.22 arm64,go1.20,!go1.22 /** @@ -20,6 +21,7 @@ package alg import ( "runtime" + "strconv" "unsafe" "github.com/bytedance/sonic/internal/native" @@ -150,6 +152,9 @@ func HtmlEscape(dst []byte, src []byte) []byte { } func F64toa(buf []byte, v float64) ([]byte) { + if v == 0 { + return append(buf, '0') + } buf = rt.GuardSlice2(buf, 64) ret := native.F64toa((*byte)(rt.IndexByte(buf, len(buf))), v) if ret > 0 { @@ -160,6 +165,9 @@ func F64toa(buf []byte, v float64) ([]byte) { } func F32toa(buf []byte, v float32) ([]byte) { + if v == 0 { + return append(buf, '0') + } buf = rt.GuardSlice2(buf, 64) ret := native.F32toa((*byte)(rt.IndexByte(buf, len(buf))), v) if ret > 0 { @@ -169,22 +177,39 @@ func F32toa(buf []byte, v float32) ([]byte) { } } +// func I64toa(buf []byte, v int64) ([]byte) { +// if -10 < v && v < 10 { +// buf = rt.GuardSlice2(buf, 32) +// ret := native.I64toa((*byte)(rt.IndexByte(buf, len(buf))), v) +// if ret > 0 { +// return buf[:len(buf)+ret] +// } else { +// return buf +// } +// } else { +// return strconv.AppendInt(buf, v, 10) +// } +// } + +// func U64toa(buf []byte, v uint64) ([]byte) { +// if v < 10 { +// buf = rt.GuardSlice2(buf, 32) +// ret := native.U64toa((*byte)(rt.IndexByte(buf, len(buf))), v) +// if ret > 0 { +// return buf[:len(buf)+ret] +// } else { +// return buf +// } +// } else { +// return strconv.AppendInt(buf, int64(v), 10) +// } +// } + func I64toa(buf []byte, v int64) ([]byte) { - buf = rt.GuardSlice2(buf, 32) - ret := native.I64toa((*byte)(rt.IndexByte(buf, len(buf))), v) - if ret > 0 { - return buf[:len(buf)+ret] - } else { - return buf - } + return strconv.AppendInt(buf, int64(v), 10) } func U64toa(buf []byte, v uint64) ([]byte) { - buf = rt.GuardSlice2(buf, 32) - ret := native.U64toa((*byte)(rt.IndexByte(buf, len(buf))), v) - if ret > 0 { - return buf[:len(buf)+ret] - } else { - return buf - } + return strconv.AppendUint(buf, v, 10) } + diff --git a/internal/encoder/compiler.go b/internal/encoder/compiler.go index 4ad33edd3..edd40d8bc 100644 --- a/internal/encoder/compiler.go +++ b/internal/encoder/compiler.go @@ -244,6 +244,8 @@ func (self *Compiler) compileMap(p *ir.Program, sp int, vt reflect.Type) { func (self *Compiler) compileMapBody(p *ir.Program, sp int, vt reflect.Type) { p.Tag(sp + 1) p.Int(ir.OP_byte, '{') + e := p.PC() + p.Add(ir.OP_is_zero_map) p.Add(ir.OP_save) p.Rtt(ir.OP_map_iter, vt) p.Add(ir.OP_save) @@ -271,6 +273,7 @@ func (self *Compiler) compileMapBody(p *ir.Program, sp int, vt reflect.Type) { p.Pin(j) p.Add(ir.OP_map_stop) p.Add(ir.OP_drop_2) + p.Pin(e) p.Int(ir.OP_byte, '}') } @@ -355,6 +358,8 @@ func (self *Compiler) compileSliceBody(p *ir.Program, sp int, vt reflect.Type) { func (self *Compiler) compileSliceArray(p *ir.Program, sp int, vt reflect.Type) { p.Tag(sp) p.Int(ir.OP_byte, '[') + e := p.PC() + p.Add(ir.OP_is_nil) p.Add(ir.OP_save) p.Add(ir.OP_slice_len) i := p.PC() @@ -368,6 +373,7 @@ func (self *Compiler) compileSliceArray(p *ir.Program, sp int, vt reflect.Type) p.Pin(i) p.Pin(j) p.Add(ir.OP_drop) + p.Pin(e) p.Int(ir.OP_byte, ']') } diff --git a/internal/encoder/vm/vm.go b/internal/encoder/vm/vm.go index 731ae70ab..2311214a1 100644 --- a/internal/encoder/vm/vm.go +++ b/internal/encoder/vm/vm.go @@ -65,9 +65,9 @@ func Execute(b *[]byte, p unsafe.Pointer, s *vars.Stack, flags uint64, prog *ir. pc++ op := ins.Op() - if vars.DebugSyncGC { - print_instr(buf, pc, op, ins) - } + // if vars.DebugSyncGC { + // print_instr(buf, pc, op, ins) + // } switch op { case ir.OP_goto: @@ -124,15 +124,15 @@ func Execute(b *[]byte, p unsafe.Pointer, s *vars.Stack, flags uint64, prog *ir. continue } case ir.OP_null: - buf = append(buf, "null"...) + buf = append(buf, 'n', 'u', 'l', 'l') case ir.OP_str: v := *(*string)(p) buf = alg.Quote(buf, v, false) case ir.OP_bool: if *(*bool)(p) { - buf = append(buf, "true"...) + buf = append(buf, 't', 'r', 'u', 'e') } else { - buf = append(buf, "false"...) + buf = append(buf, 'f', 'a', 'l', 's', 'e') } case ir.OP_i8: v := *(*int8)(p) @@ -226,6 +226,28 @@ func Execute(b *[]byte, p unsafe.Pointer, s *vars.Stack, flags uint64, prog *ir. continue } p = it.It.K + case ir.OP_marshal_text: + vt, itab := ins.Vtab() + var it rt.GoIface + switch vt.Kind() { + case reflect.Interface : + if is_nil(p) { + buf = append(buf, 'n', 'u', 'l', 'l') + continue + } + it = rt.AssertI2I(_T_encoding_TextMarshaler, *(*rt.GoIface)(p)) + case reflect.Ptr, reflect.Map : it = convT2I(p, true, itab) + default : it = convT2I(p, !vt.Indirect(), itab) + } + if err := alg.EncodeTextMarshaler(&buf, *(*encoding.TextMarshaler)(unsafe.Pointer(&it)), (flags)); err != nil { + return err + } + case ir.OP_marshal_text_p: + _, itab := ins.Vtab() + it := convT2I(p, false, itab) + if err := alg.EncodeTextMarshaler(&buf, *(*encoding.TextMarshaler)(unsafe.Pointer(&it)), (flags)); err != nil { + return err + } case ir.OP_map_write_key: if has_opts(flags, alg.BitSortMapKeys) { v := *(*string)(p) @@ -280,38 +302,36 @@ func Execute(b *[]byte, p unsafe.Pointer, s *vars.Stack, flags uint64, prog *ir. } case ir.OP_empty_arr: if has_opts(flags, alg.BitNoNullSliceOrMap) { - buf = append(buf, "[]"...) + buf = append(buf, '[', ']') } else { - buf = append(buf, "null"...) + buf = append(buf, 'n', 'u', 'l', 'l') } case ir.OP_empty_obj: if has_opts(flags, alg.BitNoNullSliceOrMap) { - buf = append(buf, "{}"...) + buf = append(buf, '{', '}') } else { - buf = append(buf, "null"...) + buf = append(buf, 'n', 'u', 'l', 'l') } case ir.OP_marshal: vt, itab := ins.Vtab() - var err error - if buf, err = call_json_marshaler(buf, vt, itab, p, flags, false); err != nil { - return err - } - case ir.OP_marshal_p: - vt, itab := ins.Vtab() - var err error - if buf, err = call_json_marshaler(buf, vt, itab, p, flags, true); err != nil { - return err + var it rt.GoIface + switch vt.Kind() { + case reflect.Interface : + if is_nil(p) { + buf = append(buf, 'n', 'u', 'l', 'l') + continue + } + it = rt.AssertI2I(_T_json_Marshaler, *(*rt.GoIface)(p)) + case reflect.Ptr, reflect.Map : it = convT2I(p, true, itab) + default : it = convT2I(p, !vt.Indirect(), itab) } - case ir.OP_marshal_text: - vt, itab := ins.Vtab() - var err error - if buf, err = call_text_marshaler(buf, vt, itab, p, flags, false); err != nil { + if err := alg.EncodeJsonMarshaler(&buf, *(*json.Marshaler)(unsafe.Pointer(&it)), (flags)); err != nil { return err } - case ir.OP_marshal_text_p: - vt, itab := ins.Vtab() - var err error - if buf, err = call_text_marshaler(buf, vt, itab, p, flags, true); err != nil { + case ir.OP_marshal_p: + _, itab := ins.Vtab() + it := convT2I(p, false, itab) + if err := alg.EncodeJsonMarshaler(&buf, *(*json.Marshaler)(unsafe.Pointer(&it)), (flags)); err != nil { return err } default: @@ -348,49 +368,3 @@ func convT2I(ptr unsafe.Pointer, deref bool, itab *rt.GoItab) (rt.GoIface) { Value: ptr, } } - -func call_text_marshaler(buf []byte, vt *rt.GoType, itab *rt.GoItab, p unsafe.Pointer, flags uint64, pointer bool) ([]byte, error) { - var it rt.GoIface - if !pointer { - switch vt.Kind() { - case reflect.Interface : - if is_nil(p) { - buf = append(buf, "null"...) - return buf, nil - } - it = rt.AssertI2I(_T_encoding_TextMarshaler, *(*rt.GoIface)(p)) - case reflect.Ptr, reflect.Map : it = convT2I(p, true, itab) - default : it = convT2I(p, !vt.Indirect(), itab) - } - - } else { - it = convT2I(p, false, itab) - } - if err := alg.EncodeTextMarshaler(&buf, *(*encoding.TextMarshaler)(unsafe.Pointer(&it)), (flags)); err != nil { - return buf, err - } - return buf, nil -} - -func call_json_marshaler(buf []byte, vt *rt.GoType, itab *rt.GoItab, p unsafe.Pointer, flags uint64, pointer bool) ([]byte, error) { - var it rt.GoIface - if !pointer { - switch vt.Kind() { - case reflect.Interface : - if is_nil(p) { - buf = append(buf, "null"...) - return buf, nil - } - it = rt.AssertI2I(_T_json_Marshaler, *(*rt.GoIface)(p)) - case reflect.Ptr, reflect.Map : it = convT2I(p, true, itab) - default : it = convT2I(p, !vt.Indirect(), itab) - } - - } else { - it = convT2I(p, false, itab) - } - if err := alg.EncodeJsonMarshaler(&buf, *(*json.Marshaler)(unsafe.Pointer(&it)), (flags)); err != nil { - return buf, err - } - return buf, nil -} diff --git a/internal/native/neon/f32toa_arm64.s b/internal/native/neon/f32toa_arm64.s index cc6abecd7..de4aa9da7 100644 --- a/internal/native/neon/f32toa_arm64.s +++ b/internal/native/neon/f32toa_arm64.s @@ -7,8 +7,9 @@ TEXT ·__f32toa_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _f32toa + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 2, 0x00 _f32toa: WORD $0xa9be7bfd // stp fp, lr, [sp, #-32]! @@ -905,7 +906,7 @@ _Digits: WORD $0x37393639 // .ascii 4, '96979899' WORD $0x39393839 // .ascii 4, '9899' WORD $0x00000000 // .p2align 3, 0x00 -_LB_e80ea3d0: // _pow10_ceil_sig_f32.g +_LB_8f8a73de: // _pow10_ceil_sig_f32.g WORD $0x4b43fcf5; WORD $0x81ceb32c // .quad -9093133594791772939 WORD $0x5e14fc32; WORD $0xa2425ff7 // .quad -6754730975062328270 WORD $0x359a3b3f; WORD $0xcad2f7f5 // .quad -3831727700400522433 @@ -997,7 +998,9 @@ _f32toa: MOVD out+0(FP), R0 FMOVD val+8(FP), F0 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__f32toa_entry__(SB) // _f32toa + MOVD ·_subr__f32toa(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+16(FP) RET diff --git a/internal/native/neon/f32toa_subr_arm64.go b/internal/native/neon/f32toa_subr_arm64.go index df71102ac..1e624c14b 100644 --- a/internal/native/neon/f32toa_subr_arm64.go +++ b/internal/native/neon/f32toa_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __f32toa_entry__() uintptr var ( - _subr__f32toa uintptr = 0 + _subr__f32toa uintptr = __f32toa_entry__() + 0 ) const ( diff --git a/internal/native/neon/f64toa_arm64.s b/internal/native/neon/f64toa_arm64.s index 92ce3d329..6a99ed78a 100644 --- a/internal/native/neon/f64toa_arm64.s +++ b/internal/native/neon/f64toa_arm64.s @@ -7,8 +7,9 @@ TEXT ·__f64toa_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _f64toa + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 2, 0x00 _f64toa: WORD $0xa9be7bfd // stp fp, lr, [sp, #-32]! @@ -1221,7 +1222,7 @@ _Digits: WORD $0x37393639 // .ascii 4, '96979899' WORD $0x39393839 // .ascii 4, '9899' // .p2align 3, 0x00 -_LB_40c6cc40: // _pow10_ceil_sig.g +_LB_1c9f0ea5: // _pow10_ceil_sig.g WORD $0xbebcdc4f; WORD $0xff77b1fc // .quad -38366372719436721 WORD $0x13bb0f7b; WORD $0x25e8e89c // .quad 2731688931043774331 WORD $0xf73609b1; WORD $0x9faacf3d // .quad -6941508010590729807 @@ -2470,7 +2471,9 @@ _f64toa: MOVD out+0(FP), R0 FMOVD val+8(FP), F0 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__f64toa_entry__(SB) // _f64toa + MOVD ·_subr__f64toa(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+16(FP) RET diff --git a/internal/native/neon/f64toa_subr_arm64.go b/internal/native/neon/f64toa_subr_arm64.go index f0c980647..e09d800c9 100644 --- a/internal/native/neon/f64toa_subr_arm64.go +++ b/internal/native/neon/f64toa_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __f64toa_entry__() uintptr var ( - _subr__f64toa uintptr = 0 + _subr__f64toa uintptr = __f64toa_entry__() + 0 ) const ( diff --git a/internal/native/neon/fastfloat_arm64_test.go b/internal/native/neon/fastfloat_arm64_test.go index f3c35f4fd..dc00ec967 100644 --- a/internal/native/neon/fastfloat_arm64_test.go +++ b/internal/native/neon/fastfloat_arm64_test.go @@ -84,10 +84,14 @@ func BenchmarkParseFloat64(b *testing.B) { float float64 }{ {"Zero", 0}, - {"Decimal", 33909}, - {"Float", 339.7784}, - {"Exp", -5.09e75}, - {"NegExp", -5.11e-95}, + {"Decimal1", 39}, + {"Decimal2", 33909}, + {"Float1", 3.77}, + {"Float2", 339.778442}, + {"Exp1", 5.9e5}, + {"Exp2", 521.091e25}, + {"NegExp1", -5.1e-5}, + {"NegExp2", -52.132e-35}, {"LongExp", 1.234567890123456e-78}, {"Big", 123456789123456789123456789}, @@ -116,11 +120,14 @@ func BenchmarkParseFloat32(b *testing.B) { float float32 }{ {"Zero", 0}, - {"Integer", 33909}, - {"ExactFraction", 3.375}, - {"Point", 339.7784}, - {"Exp", -5.09e25}, - {"NegExp", -5.11e-25}, + {"Decimal1", 39}, + {"Decimal2", 33909}, + {"Float1", 3.77}, + {"Float2", 339.778442}, + {"Exp1", 5.9e5}, + {"Exp2", 521.091e19}, + {"NegExp1", -5.1e-5}, + {"NegExp2", -52.132e-19}, {"Shortest", 1.234567e-8}, } for _, c := range f32toaBenches { diff --git a/internal/native/neon/get_by_path_arm64.s b/internal/native/neon/get_by_path_arm64.s index 1a1769f12..b7f1406af 100644 --- a/internal/native/neon/get_by_path_arm64.s +++ b/internal/native/neon/get_by_path_arm64.s @@ -7,8 +7,9 @@ TEXT ·__get_by_path_entry__(SB), NOSPLIT, $192 NO_LOCAL_POINTERS - WORD $0x910343ff // add sp, sp, #208 - JMP _get_by_path + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x08040201 @@ -4567,7 +4568,9 @@ _get_by_path: MOVD path+16(FP), R2 MOVD m+24(FP), R3 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__get_by_path_entry__(SB) // _get_by_path + MOVD ·_subr__get_by_path(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+32(FP) RET diff --git a/internal/native/neon/get_by_path_subr_arm64.go b/internal/native/neon/get_by_path_subr_arm64.go index c2aca952e..2d0172721 100644 --- a/internal/native/neon/get_by_path_subr_arm64.go +++ b/internal/native/neon/get_by_path_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __get_by_path_entry__() uintptr var ( - _subr__get_by_path uintptr = 48 + _subr__get_by_path uintptr = __get_by_path_entry__() + 48 ) const ( diff --git a/internal/native/neon/html_escape_arm64.s b/internal/native/neon/html_escape_arm64.s index 7f44ff66e..238562637 100644 --- a/internal/native/neon/html_escape_arm64.s +++ b/internal/native/neon/html_escape_arm64.s @@ -7,8 +7,9 @@ TEXT ·__html_escape_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _html_escape + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x08040201 @@ -1390,7 +1391,9 @@ _html_escape: MOVD dp+16(FP), R2 MOVD dn+24(FP), R3 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__html_escape_entry__(SB) // _html_escape + MOVD ·_subr__html_escape(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+32(FP) RET diff --git a/internal/native/neon/html_escape_subr_arm64.go b/internal/native/neon/html_escape_subr_arm64.go index 9882fb6ad..a0a86e057 100644 --- a/internal/native/neon/html_escape_subr_arm64.go +++ b/internal/native/neon/html_escape_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __html_escape_entry__() uintptr var ( - _subr__html_escape uintptr = 32 + _subr__html_escape uintptr = __html_escape_entry__() + 32 ) const ( diff --git a/internal/native/neon/i64toa_arm64.s b/internal/native/neon/i64toa_arm64.s index 828e7f73b..8d13a066b 100644 --- a/internal/native/neon/i64toa_arm64.s +++ b/internal/native/neon/i64toa_arm64.s @@ -7,8 +7,9 @@ TEXT ·__i64toa_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _i64toa + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 3, 0x00 lCPI0_0: WORD $0x147b20c5 @@ -964,7 +965,9 @@ _i64toa: MOVD out+0(FP), R0 MOVD val+8(FP), R1 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__i64toa_entry__(SB) // _i64toa + MOVD ·_subr__i64toa(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+16(FP) RET diff --git a/internal/native/neon/i64toa_subr_arm64.go b/internal/native/neon/i64toa_subr_arm64.go index 442305e36..7c73cb380 100644 --- a/internal/native/neon/i64toa_subr_arm64.go +++ b/internal/native/neon/i64toa_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __i64toa_entry__() uintptr var ( - _subr__i64toa uintptr = 48 + _subr__i64toa uintptr = __i64toa_entry__() + 48 ) const ( diff --git a/internal/native/neon/lspace_arm64.s b/internal/native/neon/lspace_arm64.s index c62c6f615..e4e2cd2f8 100644 --- a/internal/native/neon/lspace_arm64.s +++ b/internal/native/neon/lspace_arm64.s @@ -7,8 +7,9 @@ TEXT ·__lspace_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _lspace + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 2, 0x00 _lspace: WORD $0xa9be7bfd // stp fp, lr, [sp, #-32]! @@ -54,7 +55,9 @@ _lspace: MOVD nb+8(FP), R1 MOVD off+16(FP), R2 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__lspace_entry__(SB) // _lspace + MOVD ·_subr__lspace(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+24(FP) RET diff --git a/internal/native/neon/lspace_subr_arm64.go b/internal/native/neon/lspace_subr_arm64.go index 89fbf2d7c..e8dc6d56f 100644 --- a/internal/native/neon/lspace_subr_arm64.go +++ b/internal/native/neon/lspace_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __lspace_entry__() uintptr var ( - _subr__lspace uintptr = 0 + _subr__lspace uintptr = __lspace_entry__() + 0 ) const ( diff --git a/internal/native/neon/native_arm64_test.go b/internal/native/neon/native_arm64_test.go index 278063b4e..07644b519 100644 --- a/internal/native/neon/native_arm64_test.go +++ b/internal/native/neon/native_arm64_test.go @@ -36,17 +36,11 @@ import ( "github.com/stretchr/testify/require" ) -//go:noinline -func BREAK() bool { - return true -} - func TestNative_Value(t *testing.T) { runtime.GC() var v types.JsonState s := ` -12345` p := (*rt.GoString)(unsafe.Pointer(&s)) - BREAK() x := value(p.Ptr, p.Len, 0, &v, 0) assert.Equal(t, 9, x) assert.Equal(t, types.V_INTEGER, v.Vt) diff --git a/internal/native/neon/quote_arm64.s b/internal/native/neon/quote_arm64.s index 61cd7dd52..0092813b4 100644 --- a/internal/native/neon/quote_arm64.s +++ b/internal/native/neon/quote_arm64.s @@ -7,8 +7,9 @@ TEXT ·__quote_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _quote + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x08040201 @@ -2546,7 +2547,9 @@ _quote: MOVD dn+24(FP), R3 MOVD flags+32(FP), R4 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__quote_entry__(SB) // _quote + MOVD ·_subr__quote(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+40(FP) RET diff --git a/internal/native/neon/quote_subr_arm64.go b/internal/native/neon/quote_subr_arm64.go index 6f7a8162d..e9d942880 100644 --- a/internal/native/neon/quote_subr_arm64.go +++ b/internal/native/neon/quote_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __quote_entry__() uintptr var ( - _subr__quote uintptr = 32 + _subr__quote uintptr = __quote_entry__() + 32 ) const ( diff --git a/internal/native/neon/skip_array_arm64.s b/internal/native/neon/skip_array_arm64.s index 9e258625a..21a9afcdd 100644 --- a/internal/native/neon/skip_array_arm64.s +++ b/internal/native/neon/skip_array_arm64.s @@ -7,8 +7,9 @@ TEXT ·__skip_array_entry__(SB), NOSPLIT, $96 NO_LOCAL_POINTERS - WORD $0x9101c3ff // add sp, sp, #112 - JMP _skip_array + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x00000001; WORD $0x00000000 // .quad 1 @@ -2370,7 +2371,9 @@ _skip_array: MOVD m+16(FP), R2 MOVD flags+24(FP), R3 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__skip_array_entry__(SB) // _skip_array + MOVD ·_subr__skip_array(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+32(FP) RET diff --git a/internal/native/neon/skip_array_subr_arm64.go b/internal/native/neon/skip_array_subr_arm64.go index 96ce7fdfc..c011f6ffd 100644 --- a/internal/native/neon/skip_array_subr_arm64.go +++ b/internal/native/neon/skip_array_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __skip_array_entry__() uintptr var ( - _subr__skip_array uintptr = 48 + _subr__skip_array uintptr = __skip_array_entry__() + 48 ) const ( diff --git a/internal/native/neon/skip_number_arm64.s b/internal/native/neon/skip_number_arm64.s index af72ff40c..bb590ca3d 100644 --- a/internal/native/neon/skip_number_arm64.s +++ b/internal/native/neon/skip_number_arm64.s @@ -7,8 +7,9 @@ TEXT ·__skip_number_entry__(SB), NOSPLIT, $32 NO_LOCAL_POINTERS - WORD $0x9100c3ff // add sp, sp, #48 - JMP _skip_number + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x08040201 @@ -361,7 +362,9 @@ _skip_number: MOVD s+0(FP), R0 MOVD p+8(FP), R1 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__skip_number_entry__(SB) // _skip_number + MOVD ·_subr__skip_number(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+16(FP) RET diff --git a/internal/native/neon/skip_number_subr_arm64.go b/internal/native/neon/skip_number_subr_arm64.go index e0ff6f5b1..d2ba7d934 100644 --- a/internal/native/neon/skip_number_subr_arm64.go +++ b/internal/native/neon/skip_number_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __skip_number_entry__() uintptr var ( - _subr__skip_number uintptr = 32 + _subr__skip_number uintptr = __skip_number_entry__() + 32 ) const ( diff --git a/internal/native/neon/skip_object_arm64.s b/internal/native/neon/skip_object_arm64.s index eb3cd036d..8de33e475 100644 --- a/internal/native/neon/skip_object_arm64.s +++ b/internal/native/neon/skip_object_arm64.s @@ -7,8 +7,9 @@ TEXT ·__skip_object_entry__(SB), NOSPLIT, $96 NO_LOCAL_POINTERS - WORD $0x9101c3ff // add sp, sp, #112 - JMP _skip_object + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x00000001; WORD $0x00000000 // .quad 1 @@ -2370,7 +2371,9 @@ _skip_object: MOVD m+16(FP), R2 MOVD flags+24(FP), R3 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__skip_object_entry__(SB) // _skip_object + MOVD ·_subr__skip_object(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+32(FP) RET diff --git a/internal/native/neon/skip_object_subr_arm64.go b/internal/native/neon/skip_object_subr_arm64.go index e526aeb93..ab7ab2034 100644 --- a/internal/native/neon/skip_object_subr_arm64.go +++ b/internal/native/neon/skip_object_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __skip_object_entry__() uintptr var ( - _subr__skip_object uintptr = 48 + _subr__skip_object uintptr = __skip_object_entry__() + 48 ) const ( diff --git a/internal/native/neon/skip_one_arm64.s b/internal/native/neon/skip_one_arm64.s index 694c38ec4..3b65cba3f 100644 --- a/internal/native/neon/skip_one_arm64.s +++ b/internal/native/neon/skip_one_arm64.s @@ -7,8 +7,9 @@ TEXT ·__skip_one_entry__(SB), NOSPLIT, $96 NO_LOCAL_POINTERS - WORD $0x9101c3ff // add sp, sp, #112 - JMP _skip_one + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x00000001; WORD $0x00000000 // .quad 1 @@ -2370,7 +2371,9 @@ _skip_one: MOVD m+16(FP), R2 MOVD flags+24(FP), R3 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__skip_one_entry__(SB) // _skip_one + MOVD ·_subr__skip_one(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+32(FP) RET diff --git a/internal/native/neon/skip_one_fast_arm64.s b/internal/native/neon/skip_one_fast_arm64.s index 47f883d50..550166c1f 100644 --- a/internal/native/neon/skip_one_fast_arm64.s +++ b/internal/native/neon/skip_one_fast_arm64.s @@ -7,8 +7,9 @@ TEXT ·__skip_one_fast_entry__(SB), NOSPLIT, $176 NO_LOCAL_POINTERS - WORD $0x910303ff // add sp, sp, #192 - JMP _skip_one_fast + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x08040201 @@ -986,7 +987,9 @@ _skip_one_fast: MOVD s+0(FP), R0 MOVD p+8(FP), R1 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__skip_one_fast_entry__(SB) // _skip_one_fast + MOVD ·_subr__skip_one_fast(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+16(FP) RET diff --git a/internal/native/neon/skip_one_fast_subr_arm64.go b/internal/native/neon/skip_one_fast_subr_arm64.go index b4c2f5d07..4801fa11d 100644 --- a/internal/native/neon/skip_one_fast_subr_arm64.go +++ b/internal/native/neon/skip_one_fast_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __skip_one_fast_entry__() uintptr var ( - _subr__skip_one_fast uintptr = 32 + _subr__skip_one_fast uintptr = __skip_one_fast_entry__() + 32 ) const ( diff --git a/internal/native/neon/skip_one_subr_arm64.go b/internal/native/neon/skip_one_subr_arm64.go index 87c40c784..01b275681 100644 --- a/internal/native/neon/skip_one_subr_arm64.go +++ b/internal/native/neon/skip_one_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __skip_one_entry__() uintptr var ( - _subr__skip_one uintptr = 48 + _subr__skip_one uintptr = __skip_one_entry__() + 48 ) const ( diff --git a/internal/native/neon/u64toa_arm64.s b/internal/native/neon/u64toa_arm64.s index da0f7018c..1b280da77 100644 --- a/internal/native/neon/u64toa_arm64.s +++ b/internal/native/neon/u64toa_arm64.s @@ -7,8 +7,9 @@ TEXT ·__u64toa_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _u64toa + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 3, 0x00 lCPI0_0: WORD $0x147b20c5 @@ -581,7 +582,9 @@ _u64toa: MOVD out+0(FP), R0 MOVD val+8(FP), R1 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__u64toa_entry__(SB) // _u64toa + MOVD ·_subr__u64toa(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+16(FP) RET diff --git a/internal/native/neon/u64toa_subr_arm64.go b/internal/native/neon/u64toa_subr_arm64.go index 3cd0f3d7d..151794d52 100644 --- a/internal/native/neon/u64toa_subr_arm64.go +++ b/internal/native/neon/u64toa_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __u64toa_entry__() uintptr var ( - _subr__u64toa uintptr = 48 + _subr__u64toa uintptr = __u64toa_entry__() + 48 ) const ( diff --git a/internal/native/neon/unquote_arm64.s b/internal/native/neon/unquote_arm64.s index c58cf0052..e43c4de31 100644 --- a/internal/native/neon/unquote_arm64.s +++ b/internal/native/neon/unquote_arm64.s @@ -7,8 +7,9 @@ TEXT ·__unquote_entry__(SB), NOSPLIT, $96 NO_LOCAL_POINTERS - WORD $0x9101c3ff // add sp, sp, #112 - JMP _unquote + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x08040201 @@ -601,7 +602,9 @@ _unquote: MOVD ep+24(FP), R3 MOVD flags+32(FP), R4 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__unquote_entry__(SB) // _unquote + MOVD ·_subr__unquote(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+40(FP) RET diff --git a/internal/native/neon/unquote_subr_arm64.go b/internal/native/neon/unquote_subr_arm64.go index 7f5c3e37b..040e15a3d 100644 --- a/internal/native/neon/unquote_subr_arm64.go +++ b/internal/native/neon/unquote_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __unquote_entry__() uintptr var ( - _subr__unquote uintptr = 32 + _subr__unquote uintptr = __unquote_entry__() + 32 ) const ( diff --git a/internal/native/neon/validate_one_arm64.s b/internal/native/neon/validate_one_arm64.s index 088b02e90..42a34ace3 100644 --- a/internal/native/neon/validate_one_arm64.s +++ b/internal/native/neon/validate_one_arm64.s @@ -7,8 +7,9 @@ TEXT ·__validate_one_entry__(SB), NOSPLIT, $96 NO_LOCAL_POINTERS - WORD $0x9101c3ff // add sp, sp, #112 - JMP _validate_one + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x00000001; WORD $0x00000000 // .quad 1 @@ -1846,7 +1847,9 @@ _validate_one: MOVD p+8(FP), R1 MOVD m+16(FP), R2 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__validate_one_entry__(SB) // _validate_one + MOVD ·_subr__validate_one(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+24(FP) RET diff --git a/internal/native/neon/validate_one_subr_arm64.go b/internal/native/neon/validate_one_subr_arm64.go index 2fa1a5215..cbebeb08b 100644 --- a/internal/native/neon/validate_one_subr_arm64.go +++ b/internal/native/neon/validate_one_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __validate_one_entry__() uintptr var ( - _subr__validate_one uintptr = 48 + _subr__validate_one uintptr = __validate_one_entry__() + 48 ) const ( diff --git a/internal/native/neon/validate_utf8_arm64.s b/internal/native/neon/validate_utf8_arm64.s index f87c1eaac..9398beaa7 100644 --- a/internal/native/neon/validate_utf8_arm64.s +++ b/internal/native/neon/validate_utf8_arm64.s @@ -7,8 +7,9 @@ TEXT ·__validate_utf8_entry__(SB), NOSPLIT, $48 NO_LOCAL_POINTERS - WORD $0x910103ff // add sp, sp, #64 - JMP _validate_utf8 + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 2, 0x00 _validate_utf8: WORD $0xd10103ff // sub sp, sp, #64 @@ -210,7 +211,9 @@ _validate_utf8: MOVD p+8(FP), R1 MOVD m+16(FP), R2 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__validate_utf8_entry__(SB) // _validate_utf8 + MOVD ·_subr__validate_utf8(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+24(FP) RET diff --git a/internal/native/neon/validate_utf8_fast_arm64.s b/internal/native/neon/validate_utf8_fast_arm64.s index 95da166eb..a73dd7660 100644 --- a/internal/native/neon/validate_utf8_fast_arm64.s +++ b/internal/native/neon/validate_utf8_fast_arm64.s @@ -7,8 +7,9 @@ TEXT ·__validate_utf8_fast_entry__(SB), NOSPLIT, $32 NO_LOCAL_POINTERS - WORD $0x9100c3ff // add sp, sp, #48 - JMP _validate_utf8_fast + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 2, 0x00 _validate_utf8_fast: WORD $0xd100c3ff // sub sp, sp, #48 @@ -170,7 +171,9 @@ _entry: _validate_utf8_fast: MOVD s+0(FP), R0 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__validate_utf8_fast_entry__(SB) // _validate_utf8_fast + MOVD ·_subr__validate_utf8_fast(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+8(FP) RET diff --git a/internal/native/neon/validate_utf8_fast_subr_arm64.go b/internal/native/neon/validate_utf8_fast_subr_arm64.go index a127e874a..c5fd3ba5d 100644 --- a/internal/native/neon/validate_utf8_fast_subr_arm64.go +++ b/internal/native/neon/validate_utf8_fast_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __validate_utf8_fast_entry__() uintptr var ( - _subr__validate_utf8_fast uintptr = 0 + _subr__validate_utf8_fast uintptr = __validate_utf8_fast_entry__() + 0 ) const ( diff --git a/internal/native/neon/validate_utf8_subr_arm64.go b/internal/native/neon/validate_utf8_subr_arm64.go index ac8a68285..b79f6e852 100644 --- a/internal/native/neon/validate_utf8_subr_arm64.go +++ b/internal/native/neon/validate_utf8_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __validate_utf8_entry__() uintptr var ( - _subr__validate_utf8 uintptr = 0 + _subr__validate_utf8 uintptr = __validate_utf8_entry__() + 0 ) const ( diff --git a/internal/native/neon/value_arm64.s b/internal/native/neon/value_arm64.s index a6cf69959..ea75ef717 100644 --- a/internal/native/neon/value_arm64.s +++ b/internal/native/neon/value_arm64.s @@ -7,8 +7,9 @@ TEXT ·__value_entry__(SB), NOSPLIT, $96 NO_LOCAL_POINTERS - WORD $0x9101c3ff // add sp, sp, #112 - JMP _value + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x08040201 @@ -5954,7 +5955,9 @@ _value: MOVD v+24(FP), R3 MOVD flags+32(FP), R4 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__value_entry__(SB) // _value + MOVD ·_subr__value(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] MOVD R0, ret+40(FP) RET diff --git a/internal/native/neon/value_subr_arm64.go b/internal/native/neon/value_subr_arm64.go index 238bcf742..df885edce 100644 --- a/internal/native/neon/value_subr_arm64.go +++ b/internal/native/neon/value_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __value_entry__() uintptr var ( - _subr__value uintptr = 32 + _subr__value uintptr = __value_entry__() + 32 ) const ( diff --git a/internal/native/neon/vnumber_arm64.s b/internal/native/neon/vnumber_arm64.s index a531a49ad..6d0863820 100644 --- a/internal/native/neon/vnumber_arm64.s +++ b/internal/native/neon/vnumber_arm64.s @@ -7,8 +7,9 @@ TEXT ·__vnumber_entry__(SB), NOSPLIT, $96 NO_LOCAL_POINTERS - WORD $0x9101c3ff // add sp, sp, #112 - JMP _vnumber + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 2, 0x00 _vnumber: WORD $0xa9b96ffc // stp x28, x27, [sp, #-112]! @@ -4677,7 +4678,9 @@ _vnumber: MOVD p+8(FP), R1 MOVD v+16(FP), R2 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__vnumber_entry__(SB) // _vnumber + MOVD ·_subr__vnumber(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] RET diff --git a/internal/native/neon/vnumber_subr_arm64.go b/internal/native/neon/vnumber_subr_arm64.go index 2f8086dc1..78dd2c313 100644 --- a/internal/native/neon/vnumber_subr_arm64.go +++ b/internal/native/neon/vnumber_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __vnumber_entry__() uintptr var ( - _subr__vnumber uintptr = 0 + _subr__vnumber uintptr = __vnumber_entry__() + 0 ) const ( diff --git a/internal/native/neon/vsigned_arm64.s b/internal/native/neon/vsigned_arm64.s index 8b8acd0c4..db1603647 100644 --- a/internal/native/neon/vsigned_arm64.s +++ b/internal/native/neon/vsigned_arm64.s @@ -7,8 +7,9 @@ TEXT ·__vsigned_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _vsigned + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 2, 0x00 _vsigned: WORD $0xa9be7bfd // stp fp, lr, [sp, #-32]! @@ -138,7 +139,9 @@ _vsigned: MOVD p+8(FP), R1 MOVD v+16(FP), R2 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__vsigned_entry__(SB) // _vsigned + MOVD ·_subr__vsigned(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] RET diff --git a/internal/native/neon/vsigned_subr_arm64.go b/internal/native/neon/vsigned_subr_arm64.go index b436af3da..902bd4dc8 100644 --- a/internal/native/neon/vsigned_subr_arm64.go +++ b/internal/native/neon/vsigned_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __vsigned_entry__() uintptr var ( - _subr__vsigned uintptr = 0 + _subr__vsigned uintptr = __vsigned_entry__() + 0 ) const ( diff --git a/internal/native/neon/vstring_arm64.s b/internal/native/neon/vstring_arm64.s index 9c259c448..f94d5b2de 100644 --- a/internal/native/neon/vstring_arm64.s +++ b/internal/native/neon/vstring_arm64.s @@ -7,8 +7,9 @@ TEXT ·__vstring_entry__(SB), NOSPLIT, $32 NO_LOCAL_POINTERS - WORD $0x9100c3ff // add sp, sp, #48 - JMP _vstring + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 4, 0x00 lCPI0_0: WORD $0x08040201 @@ -632,7 +633,9 @@ _vstring: MOVD v+16(FP), R2 MOVD flags+24(FP), R3 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__vstring_entry__(SB) // _vstring + MOVD ·_subr__vstring(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] RET diff --git a/internal/native/neon/vstring_subr_arm64.go b/internal/native/neon/vstring_subr_arm64.go index aa3e88709..0b4a1cf7d 100644 --- a/internal/native/neon/vstring_subr_arm64.go +++ b/internal/native/neon/vstring_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __vstring_entry__() uintptr var ( - _subr__vstring uintptr = 32 + _subr__vstring uintptr = __vstring_entry__() + 32 ) const ( diff --git a/internal/native/neon/vunsigned_arm64.s b/internal/native/neon/vunsigned_arm64.s index b3c1f3fb3..a51083e98 100644 --- a/internal/native/neon/vunsigned_arm64.s +++ b/internal/native/neon/vunsigned_arm64.s @@ -7,8 +7,9 @@ TEXT ·__vunsigned_entry__(SB), NOSPLIT, $16 NO_LOCAL_POINTERS - WORD $0x910083ff // add sp, sp, #32 - JMP _vunsigned + WORD $0x100000a0 // adr x0, .+20 + MOVD R0, ret(FP) + RET // .p2align 2, 0x00 _vunsigned: WORD $0xa9be7bfd // stp fp, lr, [sp, #-32]! @@ -132,7 +133,9 @@ _vunsigned: MOVD p+8(FP), R1 MOVD v+16(FP), R2 WORD $0xf90007fc // str x28, [sp, #8] - CALL ·__vunsigned_entry__(SB) // _vunsigned + MOVD ·_subr__vunsigned(SB), R11 + WORD $0x1000005e // adr x30, .+8 + JMP (R11) WORD $0xf94007fc // ldr x28, [sp, #8] RET diff --git a/internal/native/neon/vunsigned_subr_arm64.go b/internal/native/neon/vunsigned_subr_arm64.go index f32d6af5a..a78f3f961 100644 --- a/internal/native/neon/vunsigned_subr_arm64.go +++ b/internal/native/neon/vunsigned_subr_arm64.go @@ -9,7 +9,7 @@ package neon func __vunsigned_entry__() uintptr var ( - _subr__vunsigned uintptr = 0 + _subr__vunsigned uintptr = __vunsigned_entry__() + 0 ) const ( diff --git a/scripts/build-arm.sh b/scripts/build-arm.sh index 4f3553940..753a4ffe1 100644 --- a/scripts/build-arm.sh +++ b/scripts/build-arm.sh @@ -5,6 +5,11 @@ SRC_DIR="native/arm" TMP_DIR="output/arm" OUT_DIR="internal/native/neon" TOOL_DIR="tools" +CC=clang +if [ "$1" != "" ]; then + CC=$1 +fi +echo $CC # Create the output directory if it doesn't exist mkdir -p "$TMP_DIR" @@ -19,7 +24,7 @@ for src_file in "$SRC_DIR"/*.c; do asm_file="$TMP_DIR/${base_name}.s" # Compile the source file into an assembly file - clang -Wno-error -Wno-nullability-completeness -mllvm=--go-frame -mllvm=-enable-shrink-wrap=0 -target aarch64-apple-macos11 -march=armv8-a+simd -Itools/simde/simde -mno-red-zone -fno-asynchronous-unwind-tables -fno-builtin -fno-exceptions -fno-rtti -fno-stack-protector -nostdlib -O3 -mno-red-zone -fno-asynchronous-unwind-tables -fno-builtin -fno-exceptions -fno-rtti -fno-stack-protector -nostdlib -S -o "$asm_file" "$src_file" + $CC -Wno-error -Wno-nullability-completeness -mllvm=--go-frame -mllvm=-enable-shrink-wrap=0 -target aarch64-apple-macos11 -march=armv8-a+simd -Itools/simde/simde -mno-red-zone -fno-asynchronous-unwind-tables -fno-builtin -fno-exceptions -fno-rtti -fno-stack-protector -nostdlib -O3 -mno-red-zone -fno-asynchronous-unwind-tables -fno-builtin -fno-exceptions -fno-rtti -fno-stack-protector -nostdlib -S -o "$asm_file" "$src_file" # Execute asm2asm tool python3 ${TOOL_DIR}/asm2arm/arm.py ${OUT_DIR}/${base_name}_arm64.go $asm_file diff --git a/tools/asm2arm/arm.py b/tools/asm2arm/arm.py index e4a5ea8d7..32caa957c 100644 --- a/tools/asm2arm/arm.py +++ b/tools/asm2arm/arm.py @@ -2018,8 +2018,11 @@ def _declare_body(self, name: str): gosize = 0 if size < 16 else size-16 self.out.append('TEXT ·_%s_entry__(SB), NOSPLIT, $%d' % (name, gosize)) self.out.append('\tNO_LOCAL_POINTERS') - self.out.append('\t'+Instruction('add sp, sp, #%d' % size).encoded) - self.out.append('\tJMP %s' % name) + # get current PC + self.out.append('\tWORD $0x100000a0 // adr x0, .+20') + # self.out.append('\t'+Instruction('add sp, sp, #%d' % size).encoded) + self.out.append('\tMOVD R0, ret(FP)') + self.out.append('\tRET') self._LE_4bytes_IntIntr_2_RawIntr() self._reloc() @@ -2049,11 +2052,6 @@ def _declare_function(self, name: str, proto: Prototype): addr = self.code.get(subr) self.subr[subr] = addr size = self.code.stacksize(subr) - - # Notice: golang will insert 3 instructions when stacksize > 0. - # And since we use PCALIGN 16, thus we add 16 bytes PC - if size > 0: - addr += 16 m_size = size + 64 # rsp_sub_size = size + 16 @@ -2100,33 +2098,18 @@ def _declare_function(self, name: str, proto: Prototype): op, reg = REG_MAP[arg.creg.reg] self.out.append('\t%s %s+%d(FP), %s' % (op, arg.name, offs - arg.size, reg)) - # the function starts at zero - if addr == 0 and proto.retv is None: - raise RuntimeError("UNIMPLEMENT FUNC: %s" % name) - self.out.append('\tJMP ·%s(SB) // %s' % (STUB_NAME, subr)) # Go ASM completely ignores the offset of the JMP instruction, # so we need to use indirect jumps instead for tail-call elimination - # raise RuntimeError("UNIMPLEMENT FUNC: %s" % name) - # self.out.append('\tLEAQ ·%s+%d(SB), AX // %s' % (STUB_NAME, addr, subr)) - # self.out.append('\tJMP AX') - # save LR(x30) and Frame Pointer(x29) - - # self.out.append('\tADD $%d, RSP, RSP' % size) - # self.out.append('\tSTP.W (R29, R30), -16(RSP)') - # self.out.append('\tMOVD RSP, R29') - # self.out.append('\tSUB $16, RSP') - # self.out.append('\tADD $%d, RSP' % (size + 32)) self.out.append('\tWORD $0xf90007fc // str x28, [sp, #8]') - self.out.append('\tCALL ·_%s_entry__(SB) // %s' % (subr, subr)) + self.out.append('\tMOVD ·_subr_%s(SB), R11' % (subr)) + self.out.append('\tWORD $0x1000005e // adr x30, .+8') + self.out.append('\tJMP (R11)') + # self.out.append('\tCALL ·_%s_entry__(SB) // %s' % (subr, subr)) self.out.append('\tWORD $0xf94007fc // ldr x28, [sp, #8]') - # self.out.append('\tSUB $%d, RSP' % (size + 32)) - # self.out.append('\tADD $16, RSP') - # self.out.append('\tLDP -8(RSP), (R29, R30)') - # self.out.append('\tADD $16, RSP') - # self.out.append('\tLDP.P 16(RSP), (R29, R30)') - # self.out.append('\tSUB $%d, RSP, RSP' % size) + + # Restore LR and Frame Pointer @@ -2363,8 +2346,8 @@ def main(): print('var (', file = fp) mlen = max(len(s) for s in asm.subr) for name, entry in asm.subr.items(): - # print(' _subr_%s uintptr = _%s_entry__() + %d' % (name.ljust(mlen, ' '), name, entry), file = fp) - print(' _subr_%s uintptr = %d' % (name.ljust(mlen, ' '), entry), file = fp) + print(' _subr_%s uintptr = _%s_entry__() + %d' % (name.ljust(mlen, ' '), name, entry), file = fp) + # print(' _subr_%s uintptr = %d' % (name.ljust(mlen, ' '), entry), file = fp) print(')', file = fp) # dump max stack depth for exported functions