github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/memmove_amd64.s (about) 1 // Derived from Inferno's libkern/memmove-386.s (adapted for amd64) 2 // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include "../../cmd/ld/textflag.h" 27 28 // void runtime·memmove(void*, void*, uintptr) 29 TEXT runtime·memmove(SB), NOSPLIT, $0-24 30 31 MOVQ to+0(FP), DI 32 MOVQ fr+8(FP), SI 33 MOVQ n+16(FP), BX 34 35 // REP instructions have a high startup cost, so we handle small sizes 36 // with some straightline code. The REP MOVSQ instruction is really fast 37 // for large sizes. The cutover is approximately 1K. We implement up to 38 // 256 because that is the maximum SSE register load (loading all data 39 // into registers lets us ignore copy direction). 40 tail: 41 TESTQ BX, BX 42 JEQ move_0 43 CMPQ BX, $2 44 JBE move_1or2 45 CMPQ BX, $4 46 JBE move_3or4 47 CMPQ BX, $8 48 JBE move_5through8 49 CMPQ BX, $16 50 JBE move_9through16 51 CMPQ BX, $32 52 JBE move_17through32 53 CMPQ BX, $64 54 JBE move_33through64 55 CMPQ BX, $128 56 JBE move_65through128 57 CMPQ BX, $256 58 JBE move_129through256 59 // TODO: use branch table and BSR to make this just a single dispatch 60 61 /* 62 * check and set for backwards 63 */ 64 CMPQ SI, DI 65 JLS back 66 67 /* 68 * forward copy loop 69 */ 70 forward: 71 MOVQ BX, CX 72 SHRQ $3, CX 73 ANDQ $7, BX 74 75 REP; MOVSQ 76 JMP tail 77 78 back: 79 /* 80 * check overlap 81 */ 82 MOVQ SI, CX 83 ADDQ BX, CX 84 CMPQ CX, DI 85 JLS forward 86 87 /* 88 * whole thing backwards has 89 * adjusted addresses 90 */ 91 ADDQ BX, DI 92 ADDQ BX, SI 93 STD 94 95 /* 96 * copy 97 */ 98 MOVQ BX, CX 99 SHRQ $3, CX 100 ANDQ $7, BX 101 102 SUBQ $8, DI 103 SUBQ $8, SI 104 REP; MOVSQ 105 106 CLD 107 ADDQ $8, DI 108 ADDQ $8, SI 109 SUBQ BX, DI 110 SUBQ BX, SI 111 JMP tail 112 113 move_1or2: 114 MOVB (SI), AX 115 MOVB -1(SI)(BX*1), CX 116 MOVB AX, (DI) 117 MOVB CX, -1(DI)(BX*1) 118 move_0: 119 RET 120 move_3or4: 121 MOVW (SI), AX 122 MOVW -2(SI)(BX*1), CX 123 MOVW AX, (DI) 124 MOVW CX, -2(DI)(BX*1) 125 RET 126 move_5through8: 127 MOVL (SI), AX 128 MOVL -4(SI)(BX*1), CX 129 MOVL AX, (DI) 130 MOVL CX, -4(DI)(BX*1) 131 RET 132 move_9through16: 133 MOVQ (SI), AX 134 MOVQ -8(SI)(BX*1), CX 135 MOVQ AX, (DI) 136 MOVQ CX, -8(DI)(BX*1) 137 RET 138 move_17through32: 139 MOVOU (SI), X0 140 MOVOU -16(SI)(BX*1), X1 141 MOVOU X0, (DI) 142 MOVOU X1, -16(DI)(BX*1) 143 RET 144 move_33through64: 145 MOVOU (SI), X0 146 MOVOU 16(SI), X1 147 MOVOU -32(SI)(BX*1), X2 148 MOVOU -16(SI)(BX*1), X3 149 MOVOU X0, (DI) 150 MOVOU X1, 16(DI) 151 MOVOU X2, -32(DI)(BX*1) 152 MOVOU X3, -16(DI)(BX*1) 153 RET 154 move_65through128: 155 MOVOU (SI), X0 156 MOVOU 16(SI), X1 157 MOVOU 32(SI), X2 158 MOVOU 48(SI), X3 159 MOVOU -64(SI)(BX*1), X4 160 MOVOU -48(SI)(BX*1), X5 161 MOVOU -32(SI)(BX*1), X6 162 MOVOU -16(SI)(BX*1), X7 163 MOVOU X0, (DI) 164 MOVOU X1, 16(DI) 165 MOVOU X2, 32(DI) 166 MOVOU X3, 48(DI) 167 MOVOU X4, -64(DI)(BX*1) 168 MOVOU X5, -48(DI)(BX*1) 169 MOVOU X6, -32(DI)(BX*1) 170 MOVOU X7, -16(DI)(BX*1) 171 RET 172 move_129through256: 173 MOVOU (SI), X0 174 MOVOU 16(SI), X1 175 MOVOU 32(SI), X2 176 MOVOU 48(SI), X3 177 MOVOU 64(SI), X4 178 MOVOU 80(SI), X5 179 MOVOU 96(SI), X6 180 MOVOU 112(SI), X7 181 MOVOU -128(SI)(BX*1), X8 182 MOVOU -112(SI)(BX*1), X9 183 MOVOU -96(SI)(BX*1), X10 184 MOVOU -80(SI)(BX*1), X11 185 MOVOU -64(SI)(BX*1), X12 186 MOVOU -48(SI)(BX*1), X13 187 MOVOU -32(SI)(BX*1), X14 188 MOVOU -16(SI)(BX*1), X15 189 MOVOU X0, (DI) 190 MOVOU X1, 16(DI) 191 MOVOU X2, 32(DI) 192 MOVOU X3, 48(DI) 193 MOVOU X4, 64(DI) 194 MOVOU X5, 80(DI) 195 MOVOU X6, 96(DI) 196 MOVOU X7, 112(DI) 197 MOVOU X8, -128(DI)(BX*1) 198 MOVOU X9, -112(DI)(BX*1) 199 MOVOU X10, -96(DI)(BX*1) 200 MOVOU X11, -80(DI)(BX*1) 201 MOVOU X12, -64(DI)(BX*1) 202 MOVOU X13, -48(DI)(BX*1) 203 MOVOU X14, -32(DI)(BX*1) 204 MOVOU X15, -16(DI)(BX*1) 205 RET