github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/memmove_arm.s (about) 1 // Inferno's libkern/memmove-arm.s 2 // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 // TE or TS are spilled to the stack during bulk register moves. 27 TS = 0 28 TE = 8 29 30 // Warning: the linker will use R11 to synthesize certain instructions. Please 31 // take care and double check with objdump. 32 FROM = 11 33 N = 12 34 TMP = 12 /* N and TMP don't overlap */ 35 TMP1 = 5 36 37 RSHIFT = 5 38 LSHIFT = 6 39 OFFSET = 7 40 41 BR0 = 0 /* shared with TS */ 42 BW0 = 1 43 BR1 = 1 44 BW1 = 2 45 BR2 = 2 46 BW2 = 3 47 BR3 = 3 48 BW3 = 4 49 50 FW0 = 1 51 FR0 = 2 52 FW1 = 2 53 FR1 = 3 54 FW2 = 3 55 FR2 = 4 56 FW3 = 4 57 FR3 = 8 /* shared with TE */ 58 59 TEXT runtime·memmove(SB), 7, $4 60 _memmove: 61 MOVW to+0(FP), R(TS) 62 MOVW from+4(FP), R(FROM) 63 MOVW n+8(FP), R(N) 64 65 ADD R(N), R(TS), R(TE) /* to end pointer */ 66 67 CMP R(FROM), R(TS) 68 BLS _forward 69 70 _back: 71 ADD R(N), R(FROM) /* from end pointer */ 72 CMP $4, R(N) /* need at least 4 bytes to copy */ 73 BLT _b1tail 74 75 _b4align: /* align destination on 4 */ 76 AND.S $3, R(TE), R(TMP) 77 BEQ _b4aligned 78 79 MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ 80 MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ 81 B _b4align 82 83 _b4aligned: /* is source now aligned? */ 84 AND.S $3, R(FROM), R(TMP) 85 BNE _bunaligned 86 87 ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */ 88 MOVW R(TS), savedts+4(SP) 89 _b32loop: 90 CMP R(TMP), R(TE) 91 BLS _b4tail 92 93 MOVM.DB.W (R(FROM)), [R0-R7] 94 MOVM.DB.W [R0-R7], (R(TE)) 95 B _b32loop 96 97 _b4tail: /* do remaining words if possible */ 98 MOVW savedts+4(SP), R(TS) 99 ADD $3, R(TS), R(TMP) 100 _b4loop: 101 CMP R(TMP), R(TE) 102 BLS _b1tail 103 104 MOVW.W -4(R(FROM)), R(TMP1) /* pre-indexed */ 105 MOVW.W R(TMP1), -4(R(TE)) /* pre-indexed */ 106 B _b4loop 107 108 _b1tail: /* remaining bytes */ 109 CMP R(TE), R(TS) 110 BEQ _return 111 112 MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ 113 MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ 114 B _b1tail 115 116 _forward: 117 CMP $4, R(N) /* need at least 4 bytes to copy */ 118 BLT _f1tail 119 120 _f4align: /* align destination on 4 */ 121 AND.S $3, R(TS), R(TMP) 122 BEQ _f4aligned 123 124 MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ 125 MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ 126 B _f4align 127 128 _f4aligned: /* is source now aligned? */ 129 AND.S $3, R(FROM), R(TMP) 130 BNE _funaligned 131 132 SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */ 133 MOVW R(TE), savedte+4(SP) 134 _f32loop: 135 CMP R(TMP), R(TS) 136 BHS _f4tail 137 138 MOVM.IA.W (R(FROM)), [R1-R8] 139 MOVM.IA.W [R1-R8], (R(TS)) 140 B _f32loop 141 142 _f4tail: 143 MOVW savedte+4(SP), R(TE) 144 SUB $3, R(TE), R(TMP) /* do remaining words if possible */ 145 _f4loop: 146 CMP R(TMP), R(TS) 147 BHS _f1tail 148 149 MOVW.P 4(R(FROM)), R(TMP1) /* implicit write back */ 150 MOVW.P R(TMP1), 4(R(TS)) /* implicit write back */ 151 B _f4loop 152 153 _f1tail: 154 CMP R(TS), R(TE) 155 BEQ _return 156 157 MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ 158 MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ 159 B _f1tail 160 161 _return: 162 MOVW to+0(FP), R0 163 RET 164 165 _bunaligned: 166 CMP $2, R(TMP) /* is R(TMP) < 2 ? */ 167 168 MOVW.LT $8, R(RSHIFT) /* (R(n)<<24)|(R(n-1)>>8) */ 169 MOVW.LT $24, R(LSHIFT) 170 MOVW.LT $1, R(OFFSET) 171 172 MOVW.EQ $16, R(RSHIFT) /* (R(n)<<16)|(R(n-1)>>16) */ 173 MOVW.EQ $16, R(LSHIFT) 174 MOVW.EQ $2, R(OFFSET) 175 176 MOVW.GT $24, R(RSHIFT) /* (R(n)<<8)|(R(n-1)>>24) */ 177 MOVW.GT $8, R(LSHIFT) 178 MOVW.GT $3, R(OFFSET) 179 180 ADD $16, R(TS), R(TMP) /* do 16-byte chunks if possible */ 181 CMP R(TMP), R(TE) 182 BLS _b1tail 183 184 BIC $3, R(FROM) /* align source */ 185 MOVW R(TS), savedts+4(SP) 186 MOVW (R(FROM)), R(BR0) /* prime first block register */ 187 188 _bu16loop: 189 CMP R(TMP), R(TE) 190 BLS _bu1tail 191 192 MOVW R(BR0)<<R(LSHIFT), R(BW3) 193 MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)] 194 ORR R(BR3)>>R(RSHIFT), R(BW3) 195 196 MOVW R(BR3)<<R(LSHIFT), R(BW2) 197 ORR R(BR2)>>R(RSHIFT), R(BW2) 198 199 MOVW R(BR2)<<R(LSHIFT), R(BW1) 200 ORR R(BR1)>>R(RSHIFT), R(BW1) 201 202 MOVW R(BR1)<<R(LSHIFT), R(BW0) 203 ORR R(BR0)>>R(RSHIFT), R(BW0) 204 205 MOVM.DB.W [R(BW0)-R(BW3)], (R(TE)) 206 B _bu16loop 207 208 _bu1tail: 209 MOVW savedts+4(SP), R(TS) 210 ADD R(OFFSET), R(FROM) 211 B _b1tail 212 213 _funaligned: 214 CMP $2, R(TMP) 215 216 MOVW.LT $8, R(RSHIFT) /* (R(n+1)<<24)|(R(n)>>8) */ 217 MOVW.LT $24, R(LSHIFT) 218 MOVW.LT $3, R(OFFSET) 219 220 MOVW.EQ $16, R(RSHIFT) /* (R(n+1)<<16)|(R(n)>>16) */ 221 MOVW.EQ $16, R(LSHIFT) 222 MOVW.EQ $2, R(OFFSET) 223 224 MOVW.GT $24, R(RSHIFT) /* (R(n+1)<<8)|(R(n)>>24) */ 225 MOVW.GT $8, R(LSHIFT) 226 MOVW.GT $1, R(OFFSET) 227 228 SUB $16, R(TE), R(TMP) /* do 16-byte chunks if possible */ 229 CMP R(TMP), R(TS) 230 BHS _f1tail 231 232 BIC $3, R(FROM) /* align source */ 233 MOVW R(TE), savedte+4(SP) 234 MOVW.P 4(R(FROM)), R(FR3) /* prime last block register, implicit write back */ 235 236 _fu16loop: 237 CMP R(TMP), R(TS) 238 BHS _fu1tail 239 240 MOVW R(FR3)>>R(RSHIFT), R(FW0) 241 MOVM.IA.W (R(FROM)), [R(FR0),R(FR1),R(FR2),R(FR3)] 242 ORR R(FR0)<<R(LSHIFT), R(FW0) 243 244 MOVW R(FR0)>>R(RSHIFT), R(FW1) 245 ORR R(FR1)<<R(LSHIFT), R(FW1) 246 247 MOVW R(FR1)>>R(RSHIFT), R(FW2) 248 ORR R(FR2)<<R(LSHIFT), R(FW2) 249 250 MOVW R(FR2)>>R(RSHIFT), R(FW3) 251 ORR R(FR3)<<R(LSHIFT), R(FW3) 252 253 MOVM.IA.W [R(FW0),R(FW1),R(FW2),R(FW3)], (R(TS)) 254 B _fu16loop 255 256 _fu1tail: 257 MOVW savedte+4(SP), R(TE) 258 SUB R(OFFSET), R(FROM) 259 B _f1tail