github.com/fjballest/golang@v0.0.0-20151209143359-e4c5fe594ca8/src/runtime/memmove_amd64.s (about) 1 // Derived from Inferno's libkern/memmove-386.s (adapted for amd64) 2 // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 // +build !plan9 27 28 #include "textflag.h" 29 30 // void runtime·memmove(void*, void*, uintptr) 31 TEXT runtime·memmove(SB), NOSPLIT, $0-24 32 33 MOVQ to+0(FP), DI 34 MOVQ from+8(FP), SI 35 MOVQ n+16(FP), BX 36 37 // REP instructions have a high startup cost, so we handle small sizes 38 // with some straightline code. The REP MOVSQ instruction is really fast 39 // for large sizes. The cutover is approximately 2K. 40 tail: 41 // move_129through256 or smaller work whether or not the source and the 42 // destination memory regions overlap because they load all data into 43 // registers before writing it back. move_256through2048 on the other 44 // hand can be used only when the memory regions don't overlap or the copy 45 // direction is forward. 46 TESTQ BX, BX 47 JEQ move_0 48 CMPQ BX, $2 49 JBE move_1or2 50 CMPQ BX, $4 51 JBE move_3or4 52 CMPQ BX, $8 53 JB move_5through7 54 JE move_8 55 CMPQ BX, $16 56 JBE move_9through16 57 CMPQ BX, $32 58 JBE move_17through32 59 CMPQ BX, $64 60 JBE move_33through64 61 CMPQ BX, $128 62 JBE move_65through128 63 CMPQ BX, $256 64 JBE move_129through256 65 // TODO: use branch table and BSR to make this just a single dispatch 66 67 /* 68 * check and set for backwards 69 */ 70 CMPQ SI, DI 71 JLS back 72 73 /* 74 * forward copy loop 75 */ 76 forward: 77 CMPQ BX, $2048 78 JLS move_256through2048 79 80 MOVQ BX, CX 81 SHRQ $3, CX 82 ANDQ $7, BX 83 REP; MOVSQ 84 JMP tail 85 86 back: 87 /* 88 * check overlap 89 */ 90 MOVQ SI, CX 91 ADDQ BX, CX 92 CMPQ CX, DI 93 JLS forward 94 95 /* 96 * whole thing backwards has 97 * adjusted addresses 98 */ 99 ADDQ BX, DI 100 ADDQ BX, SI 101 STD 102 103 /* 104 * copy 105 */ 106 MOVQ BX, CX 107 SHRQ $3, CX 108 ANDQ $7, BX 109 110 SUBQ $8, DI 111 SUBQ $8, SI 112 REP; MOVSQ 113 114 CLD 115 ADDQ $8, DI 116 ADDQ $8, SI 117 SUBQ BX, DI 118 SUBQ BX, SI 119 JMP tail 120 121 move_1or2: 122 MOVB (SI), AX 123 MOVB -1(SI)(BX*1), CX 124 MOVB AX, (DI) 125 MOVB CX, -1(DI)(BX*1) 126 RET 127 move_0: 128 RET 129 move_3or4: 130 MOVW (SI), AX 131 MOVW -2(SI)(BX*1), CX 132 MOVW AX, (DI) 133 MOVW CX, -2(DI)(BX*1) 134 RET 135 move_5through7: 136 MOVL (SI), AX 137 MOVL -4(SI)(BX*1), CX 138 MOVL AX, (DI) 139 MOVL CX, -4(DI)(BX*1) 140 RET 141 move_8: 142 // We need a separate case for 8 to make sure we write pointers atomically. 143 MOVQ (SI), AX 144 MOVQ AX, (DI) 145 RET 146 move_9through16: 147 MOVQ (SI), AX 148 MOVQ -8(SI)(BX*1), CX 149 MOVQ AX, (DI) 150 MOVQ CX, -8(DI)(BX*1) 151 RET 152 move_17through32: 153 MOVOU (SI), X0 154 MOVOU -16(SI)(BX*1), X1 155 MOVOU X0, (DI) 156 MOVOU X1, -16(DI)(BX*1) 157 RET 158 move_33through64: 159 MOVOU (SI), X0 160 MOVOU 16(SI), X1 161 MOVOU -32(SI)(BX*1), X2 162 MOVOU -16(SI)(BX*1), X3 163 MOVOU X0, (DI) 164 MOVOU X1, 16(DI) 165 MOVOU X2, -32(DI)(BX*1) 166 MOVOU X3, -16(DI)(BX*1) 167 RET 168 move_65through128: 169 MOVOU (SI), X0 170 MOVOU 16(SI), X1 171 MOVOU 32(SI), X2 172 MOVOU 48(SI), X3 173 MOVOU -64(SI)(BX*1), X4 174 MOVOU -48(SI)(BX*1), X5 175 MOVOU -32(SI)(BX*1), X6 176 MOVOU -16(SI)(BX*1), X7 177 MOVOU X0, (DI) 178 MOVOU X1, 16(DI) 179 MOVOU X2, 32(DI) 180 MOVOU X3, 48(DI) 181 MOVOU X4, -64(DI)(BX*1) 182 MOVOU X5, -48(DI)(BX*1) 183 MOVOU X6, -32(DI)(BX*1) 184 MOVOU X7, -16(DI)(BX*1) 185 RET 186 move_129through256: 187 MOVOU (SI), X0 188 MOVOU 16(SI), X1 189 MOVOU 32(SI), X2 190 MOVOU 48(SI), X3 191 MOVOU 64(SI), X4 192 MOVOU 80(SI), X5 193 MOVOU 96(SI), X6 194 MOVOU 112(SI), X7 195 MOVOU -128(SI)(BX*1), X8 196 MOVOU -112(SI)(BX*1), X9 197 MOVOU -96(SI)(BX*1), X10 198 MOVOU -80(SI)(BX*1), X11 199 MOVOU -64(SI)(BX*1), X12 200 MOVOU -48(SI)(BX*1), X13 201 MOVOU -32(SI)(BX*1), X14 202 MOVOU -16(SI)(BX*1), X15 203 MOVOU X0, (DI) 204 MOVOU X1, 16(DI) 205 MOVOU X2, 32(DI) 206 MOVOU X3, 48(DI) 207 MOVOU X4, 64(DI) 208 MOVOU X5, 80(DI) 209 MOVOU X6, 96(DI) 210 MOVOU X7, 112(DI) 211 MOVOU X8, -128(DI)(BX*1) 212 MOVOU X9, -112(DI)(BX*1) 213 MOVOU X10, -96(DI)(BX*1) 214 MOVOU X11, -80(DI)(BX*1) 215 MOVOU X12, -64(DI)(BX*1) 216 MOVOU X13, -48(DI)(BX*1) 217 MOVOU X14, -32(DI)(BX*1) 218 MOVOU X15, -16(DI)(BX*1) 219 RET 220 move_256through2048: 221 SUBQ $256, BX 222 MOVOU (SI), X0 223 MOVOU 16(SI), X1 224 MOVOU 32(SI), X2 225 MOVOU 48(SI), X3 226 MOVOU 64(SI), X4 227 MOVOU 80(SI), X5 228 MOVOU 96(SI), X6 229 MOVOU 112(SI), X7 230 MOVOU 128(SI), X8 231 MOVOU 144(SI), X9 232 MOVOU 160(SI), X10 233 MOVOU 176(SI), X11 234 MOVOU 192(SI), X12 235 MOVOU 208(SI), X13 236 MOVOU 224(SI), X14 237 MOVOU 240(SI), X15 238 MOVOU X0, (DI) 239 MOVOU X1, 16(DI) 240 MOVOU X2, 32(DI) 241 MOVOU X3, 48(DI) 242 MOVOU X4, 64(DI) 243 MOVOU X5, 80(DI) 244 MOVOU X6, 96(DI) 245 MOVOU X7, 112(DI) 246 MOVOU X8, 128(DI) 247 MOVOU X9, 144(DI) 248 MOVOU X10, 160(DI) 249 MOVOU X11, 176(DI) 250 MOVOU X12, 192(DI) 251 MOVOU X13, 208(DI) 252 MOVOU X14, 224(DI) 253 MOVOU X15, 240(DI) 254 CMPQ BX, $256 255 LEAQ 256(SI), SI 256 LEAQ 256(DI), DI 257 JGE move_256through2048 258 JMP tail