github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/runtime/memmove_386.s (about) 1 // Inferno's libkern/memmove-386.s 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-386.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 // +build !plan9 27 28 #include "go_asm.h" 29 #include "textflag.h" 30 31 // func memmove(to, from unsafe.Pointer, n uintptr) 32 TEXT runtime·memmove(SB), NOSPLIT, $0-12 33 MOVL to+0(FP), DI 34 MOVL from+4(FP), SI 35 MOVL n+8(FP), BX 36 37 // REP instructions have a high startup cost, so we handle small sizes 38 // with some straightline code. The REP MOVSL instruction is really fast 39 // for large sizes. The cutover is approximately 1K. We implement up to 40 // 128 because that is the maximum SSE register load (loading all data 41 // into registers lets us ignore copy direction). 42 tail: 43 // BSR+branch table make almost all memmove/memclr benchmarks worse. Not worth doing. 44 TESTL BX, BX 45 JEQ move_0 46 CMPL BX, $2 47 JBE move_1or2 48 CMPL BX, $4 49 JB move_3 50 JE move_4 51 CMPL BX, $8 52 JBE move_5through8 53 CMPL BX, $16 54 JBE move_9through16 55 CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 56 JNE nosse2 57 CMPL BX, $32 58 JBE move_17through32 59 CMPL BX, $64 60 JBE move_33through64 61 CMPL BX, $128 62 JBE move_65through128 63 64 nosse2: 65 /* 66 * check and set for backwards 67 */ 68 CMPL SI, DI 69 JLS back 70 71 /* 72 * forward copy loop 73 */ 74 forward: 75 // If REP MOVSB isn't fast, don't use it 76 CMPB internal∕cpu·X86+const_offsetX86HasERMS(SB), $1 // enhanced REP MOVSB/STOSB 77 JNE fwdBy4 78 79 // Check alignment 80 MOVL SI, AX 81 ORL DI, AX 82 TESTL $3, AX 83 JEQ fwdBy4 84 85 // Do 1 byte at a time 86 MOVL BX, CX 87 REP; MOVSB 88 RET 89 90 fwdBy4: 91 // Do 4 bytes at a time 92 MOVL BX, CX 93 SHRL $2, CX 94 ANDL $3, BX 95 REP; MOVSL 96 JMP tail 97 98 /* 99 * check overlap 100 */ 101 back: 102 MOVL SI, CX 103 ADDL BX, CX 104 CMPL CX, DI 105 JLS forward 106 /* 107 * whole thing backwards has 108 * adjusted addresses 109 */ 110 111 ADDL BX, DI 112 ADDL BX, SI 113 STD 114 115 /* 116 * copy 117 */ 118 MOVL BX, CX 119 SHRL $2, CX 120 ANDL $3, BX 121 122 SUBL $4, DI 123 SUBL $4, SI 124 REP; MOVSL 125 126 CLD 127 ADDL $4, DI 128 ADDL $4, SI 129 SUBL BX, DI 130 SUBL BX, SI 131 JMP tail 132 133 move_1or2: 134 MOVB (SI), AX 135 MOVB -1(SI)(BX*1), CX 136 MOVB AX, (DI) 137 MOVB CX, -1(DI)(BX*1) 138 RET 139 move_0: 140 RET 141 move_3: 142 MOVW (SI), AX 143 MOVB 2(SI), CX 144 MOVW AX, (DI) 145 MOVB CX, 2(DI) 146 RET 147 move_4: 148 // We need a separate case for 4 to make sure we write pointers atomically. 149 MOVL (SI), AX 150 MOVL AX, (DI) 151 RET 152 move_5through8: 153 MOVL (SI), AX 154 MOVL -4(SI)(BX*1), CX 155 MOVL AX, (DI) 156 MOVL CX, -4(DI)(BX*1) 157 RET 158 move_9through16: 159 MOVL (SI), AX 160 MOVL 4(SI), CX 161 MOVL -8(SI)(BX*1), DX 162 MOVL -4(SI)(BX*1), BP 163 MOVL AX, (DI) 164 MOVL CX, 4(DI) 165 MOVL DX, -8(DI)(BX*1) 166 MOVL BP, -4(DI)(BX*1) 167 RET 168 move_17through32: 169 MOVOU (SI), X0 170 MOVOU -16(SI)(BX*1), X1 171 MOVOU X0, (DI) 172 MOVOU X1, -16(DI)(BX*1) 173 RET 174 move_33through64: 175 MOVOU (SI), X0 176 MOVOU 16(SI), X1 177 MOVOU -32(SI)(BX*1), X2 178 MOVOU -16(SI)(BX*1), X3 179 MOVOU X0, (DI) 180 MOVOU X1, 16(DI) 181 MOVOU X2, -32(DI)(BX*1) 182 MOVOU X3, -16(DI)(BX*1) 183 RET 184 move_65through128: 185 MOVOU (SI), X0 186 MOVOU 16(SI), X1 187 MOVOU 32(SI), X2 188 MOVOU 48(SI), X3 189 MOVOU -64(SI)(BX*1), X4 190 MOVOU -48(SI)(BX*1), X5 191 MOVOU -32(SI)(BX*1), X6 192 MOVOU -16(SI)(BX*1), X7 193 MOVOU X0, (DI) 194 MOVOU X1, 16(DI) 195 MOVOU X2, 32(DI) 196 MOVOU X3, 48(DI) 197 MOVOU X4, -64(DI)(BX*1) 198 MOVOU X5, -48(DI)(BX*1) 199 MOVOU X6, -32(DI)(BX*1) 200 MOVOU X7, -16(DI)(BX*1) 201 RET