-
Notifications
You must be signed in to change notification settings - Fork 14
/
bzero.S
159 lines (132 loc) · 4.17 KB
/
bzero.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/*
* Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/*
* A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based
* cores.
*
* The algorithm is to align the destination pointer on a 32 byte boundary and then
* blast data 64 bytes at a time, in two stores of 32 bytes per loop.
*/
.text
.align 2
.globl memset
/* void *memset(void *ptr, int c, size_t len); */
memset:
/* move len into r1, unpack c into r2 */
mov r3, r2
and r1, r1, #0xff
orr r1, r1, r1, lsl #8
orr r2, r1, r1, lsl #16
mov r1, r3
b Lbzeroengine
.globl bzero
/* void bzero(void *ptr, size_t len); */
bzero:
/* zero out r2 so we can be just like memset(0) */
mov r2, #0
Lbzeroengine:
/* move the base pointer into r12 and leave r0 alone so that we return the original pointer */
mov r12, r0
/* copy r2 into r3 for 64-bit stores */
mov r3, r2
/* check for zero len */
cmp r1, #0
bxeq lr
/* fall back to a bytewise store for less than 32 bytes */
cmp r1, #32
blt L_bytewise
/* check for 32 byte unaligned ptr */
tst r12, #0x1f
bne L_unaligned
/* make sure we have more than 64 bytes to zero */
cmp r1, #64
blt L_lessthan64aligned
/* >= 64 bytes of len, 32 byte aligned */
L_64ormorealigned:
/* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */
stmfd sp!, { r4-r6, r8, r10-r11 }
mov r4, r2
mov r5, r2
mov r6, r2
mov r8, r2
mov r10, r2
mov r11, r2
/* pre-subtract 64 from the len to avoid an extra compare in the loop */
sub r1, r1, #64
L_64loop:
stmia r12!, { r2-r6, r8, r10-r11 }
subs r1, r1, #64
stmia r12!, { r2-r6, r8, r10-r11 }
bge L_64loop
/* restore the saved regs */
ldmfd sp!, { r4-r6, r8, r10-r11 }
/* check for completion (had previously subtracted an extra 64 from len) */
adds r1, r1, #64
bxeq lr
L_lessthan64aligned:
/* do we have 16 or more bytes left */
cmp r1, #16
stmgeia r12!, { r2-r3 }
stmgeia r12!, { r2-r3 }
subges r1, r1, #16
bgt L_lessthan64aligned
bxeq lr
L_lessthan16aligned:
/* store 0 to 15 bytes */
mov r1, r1, lsl #28 /* move the remaining len bits [3:0] to the flags area of cpsr */
msr cpsr_f, r1
stmmiia r12!, { r2-r3 } /* n is set, store 8 bytes */
streq r2, [r12], #4 /* z is set, store 4 bytes */
strcsh r2, [r12], #2 /* c is set, store 2 bytes */
strvsb r2, [r12], #1 /* v is set, store 1 byte */
bx lr
L_bytewise:
/* bytewise copy, 2 bytes at a time, alignment not guaranteed */
subs r1, r1, #2
strb r2, [r12], #1
strplb r2, [r12], #1
bhi L_bytewise
bx lr
L_unaligned:
/* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */
mov r3, r12, lsl #28
rsb r3, r3, #0x00000000
msr cpsr_f, r3
strvsb r2, [r12], #1 /* v is set, unaligned in the 1s column */
strcsh r2, [r12], #2 /* c is set, unaligned in the 2s column */
streq r2, [r12], #4 /* z is set, unaligned in the 4s column */
strmi r2, [r12], #4 /* n is set, unaligned in the 8s column */
strmi r2, [r12], #4
subs r1, r1, r3, lsr #28
bxeq lr
/* we had previously trashed r3, restore it */
mov r3, r2
/* now make sure we're 32 byte aligned */
tst r12, #(1 << 4)
stmneia r12!, { r2-r3 }
stmneia r12!, { r2-r3 }
subnes r1, r1, #16
/* we're now aligned, check for >= 64 bytes left */
cmp r1, #64
bge L_64ormorealigned
b L_lessthan64aligned