github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/runtime/memmove_arm.s (about) 1 // Inferno's libkern/memmove-arm.s 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-arm.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include "textflag.h" 27 28 // TE or TS are spilled to the stack during bulk register moves. 29 #define TS R0 30 #define TE R8 31 32 // Warning: the linker will use R11 to synthesize certain instructions. Please 33 // take care and double check with objdump. 34 #define FROM R11 35 #define N R12 36 #define TMP R12 /* N and TMP don't overlap */ 37 #define TMP1 R5 38 39 #define RSHIFT R5 40 #define LSHIFT R6 41 #define OFFSET R7 42 43 #define BR0 R0 /* shared with TS */ 44 #define BW0 R1 45 #define BR1 R1 46 #define BW1 R2 47 #define BR2 R2 48 #define BW2 R3 49 #define BR3 R3 50 #define BW3 R4 51 52 #define FW0 R1 53 #define FR0 R2 54 #define FW1 R2 55 #define FR1 R3 56 #define FW2 R3 57 #define FR2 R4 58 #define FW3 R4 59 #define FR3 R8 /* shared with TE */ 60 61 // func memmove(to, from unsafe.Pointer, n uintptr) 62 TEXT runtime·memmove(SB), NOSPLIT, $4-12 63 _memmove: 64 MOVW to+0(FP), TS 65 MOVW from+4(FP), FROM 66 MOVW n+8(FP), N 67 68 ADD N, TS, TE /* to end pointer */ 69 70 CMP FROM, TS 71 BLS _forward 72 73 _back: 74 ADD N, FROM /* from end pointer */ 75 CMP $4, N /* need at least 4 bytes to copy */ 76 BLT _b1tail 77 78 _b4align: /* align destination on 4 */ 79 AND.S $3, TE, TMP 80 BEQ _b4aligned 81 82 MOVBU.W -1(FROM), TMP /* pre-indexed */ 83 MOVBU.W TMP, -1(TE) /* pre-indexed */ 84 B _b4align 85 86 _b4aligned: /* is source now aligned? */ 87 AND.S $3, FROM, TMP 88 BNE _bunaligned 89 90 ADD $31, TS, TMP /* do 32-byte chunks if possible */ 91 MOVW TS, savedts-4(SP) 92 _b32loop: 93 CMP TMP, TE 94 BLS _b4tail 95 96 MOVM.DB.W (FROM), [R0-R7] 97 MOVM.DB.W [R0-R7], (TE) 98 B _b32loop 99 100 _b4tail: /* do remaining words if possible */ 101 MOVW savedts-4(SP), TS 102 ADD $3, TS, TMP 103 _b4loop: 104 CMP TMP, TE 105 BLS _b1tail 106 107 MOVW.W -4(FROM), TMP1 /* pre-indexed */ 108 MOVW.W TMP1, -4(TE) /* pre-indexed */ 109 B _b4loop 110 111 _b1tail: /* remaining bytes */ 112 CMP TE, TS 113 BEQ _return 114 115 MOVBU.W -1(FROM), TMP /* pre-indexed */ 116 MOVBU.W TMP, -1(TE) /* pre-indexed */ 117 B _b1tail 118 119 _forward: 120 CMP $4, N /* need at least 4 bytes to copy */ 121 BLT _f1tail 122 123 _f4align: /* align destination on 4 */ 124 AND.S $3, TS, TMP 125 BEQ _f4aligned 126 127 MOVBU.P 1(FROM), TMP /* implicit write back */ 128 MOVBU.P TMP, 1(TS) /* implicit write back */ 129 B _f4align 130 131 _f4aligned: /* is source now aligned? */ 132 AND.S $3, FROM, TMP 133 BNE _funaligned 134 135 SUB $31, TE, TMP /* do 32-byte chunks if possible */ 136 MOVW TE, savedte-4(SP) 137 _f32loop: 138 CMP TMP, TS 139 BHS _f4tail 140 141 MOVM.IA.W (FROM), [R1-R8] 142 MOVM.IA.W [R1-R8], (TS) 143 B _f32loop 144 145 _f4tail: 146 MOVW savedte-4(SP), TE 147 SUB $3, TE, TMP /* do remaining words if possible */ 148 _f4loop: 149 CMP TMP, TS 150 BHS _f1tail 151 152 MOVW.P 4(FROM), TMP1 /* implicit write back */ 153 MOVW.P TMP1, 4(TS) /* implicit write back */ 154 B _f4loop 155 156 _f1tail: 157 CMP TS, TE 158 BEQ _return 159 160 MOVBU.P 1(FROM), TMP /* implicit write back */ 161 MOVBU.P TMP, 1(TS) /* implicit write back */ 162 B _f1tail 163 164 _return: 165 MOVW to+0(FP), R0 166 RET 167 168 _bunaligned: 169 CMP $2, TMP /* is TMP < 2 ? */ 170 171 MOVW.LT $8, RSHIFT /* (R(n)<<24)|(R(n-1)>>8) */ 172 MOVW.LT $24, LSHIFT 173 MOVW.LT $1, OFFSET 174 175 MOVW.EQ $16, RSHIFT /* (R(n)<<16)|(R(n-1)>>16) */ 176 MOVW.EQ $16, LSHIFT 177 MOVW.EQ $2, OFFSET 178 179 MOVW.GT $24, RSHIFT /* (R(n)<<8)|(R(n-1)>>24) */ 180 MOVW.GT $8, LSHIFT 181 MOVW.GT $3, OFFSET 182 183 ADD $16, TS, TMP /* do 16-byte chunks if possible */ 184 CMP TMP, TE 185 BLS _b1tail 186 187 BIC $3, FROM /* align source */ 188 MOVW TS, savedts-4(SP) 189 MOVW (FROM), BR0 /* prime first block register */ 190 191 _bu16loop: 192 CMP TMP, TE 193 BLS _bu1tail 194 195 MOVW BR0<<LSHIFT, BW3 196 MOVM.DB.W (FROM), [BR0-BR3] 197 ORR BR3>>RSHIFT, BW3 198 199 MOVW BR3<<LSHIFT, BW2 200 ORR BR2>>RSHIFT, BW2 201 202 MOVW BR2<<LSHIFT, BW1 203 ORR BR1>>RSHIFT, BW1 204 205 MOVW BR1<<LSHIFT, BW0 206 ORR BR0>>RSHIFT, BW0 207 208 MOVM.DB.W [BW0-BW3], (TE) 209 B _bu16loop 210 211 _bu1tail: 212 MOVW savedts-4(SP), TS 213 ADD OFFSET, FROM 214 B _b1tail 215 216 _funaligned: 217 CMP $2, TMP 218 219 MOVW.LT $8, RSHIFT /* (R(n+1)<<24)|(R(n)>>8) */ 220 MOVW.LT $24, LSHIFT 221 MOVW.LT $3, OFFSET 222 223 MOVW.EQ $16, RSHIFT /* (R(n+1)<<16)|(R(n)>>16) */ 224 MOVW.EQ $16, LSHIFT 225 MOVW.EQ $2, OFFSET 226 227 MOVW.GT $24, RSHIFT /* (R(n+1)<<8)|(R(n)>>24) */ 228 MOVW.GT $8, LSHIFT 229 MOVW.GT $1, OFFSET 230 231 SUB $16, TE, TMP /* do 16-byte chunks if possible */ 232 CMP TMP, TS 233 BHS _f1tail 234 235 BIC $3, FROM /* align source */ 236 MOVW TE, savedte-4(SP) 237 MOVW.P 4(FROM), FR3 /* prime last block register, implicit write back */ 238 239 _fu16loop: 240 CMP TMP, TS 241 BHS _fu1tail 242 243 MOVW FR3>>RSHIFT, FW0 244 MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3] 245 ORR FR0<<LSHIFT, FW0 246 247 MOVW FR0>>RSHIFT, FW1 248 ORR FR1<<LSHIFT, FW1 249 250 MOVW FR1>>RSHIFT, FW2 251 ORR FR2<<LSHIFT, FW2 252 253 MOVW FR2>>RSHIFT, FW3 254 ORR FR3<<LSHIFT, FW3 255 256 MOVM.IA.W [FW0,FW1,FW2,FW3], (TS) 257 B _fu16loop 258 259 _fu1tail: 260 MOVW savedte-4(SP), TE 261 SUB OFFSET, FROM 262 B _f1tail