github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/memmove_386.s (about) 1 // Inferno's libkern/memmove-386.s 2 // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include "../../cmd/ld/textflag.h" 27 28 TEXT runtime·memmove(SB), NOSPLIT, $0-12 29 MOVL to+0(FP), DI 30 MOVL fr+4(FP), SI 31 MOVL n+8(FP), BX 32 33 // REP instructions have a high startup cost, so we handle small sizes 34 // with some straightline code. The REP MOVSL instruction is really fast 35 // for large sizes. The cutover is approximately 1K. We implement up to 36 // 128 because that is the maximum SSE register load (loading all data 37 // into registers lets us ignore copy direction). 38 tail: 39 TESTL BX, BX 40 JEQ move_0 41 CMPL BX, $2 42 JBE move_1or2 43 CMPL BX, $4 44 JBE move_3or4 45 CMPL BX, $8 46 JBE move_5through8 47 CMPL BX, $16 48 JBE move_9through16 49 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 50 JEQ nosse2 51 CMPL BX, $32 52 JBE move_17through32 53 CMPL BX, $64 54 JBE move_33through64 55 CMPL BX, $128 56 JBE move_65through128 57 // TODO: use branch table and BSR to make this just a single dispatch 58 59 nosse2: 60 /* 61 * check and set for backwards 62 */ 63 CMPL SI, DI 64 JLS back 65 66 /* 67 * forward copy loop 68 */ 69 forward: 70 MOVL BX, CX 71 SHRL $2, CX 72 ANDL $3, BX 73 74 REP; MOVSL 75 JMP tail 76 /* 77 * check overlap 78 */ 79 back: 80 MOVL SI, CX 81 ADDL BX, CX 82 CMPL CX, DI 83 JLS forward 84 /* 85 * whole thing backwards has 86 * adjusted addresses 87 */ 88 89 ADDL BX, DI 90 ADDL BX, SI 91 STD 92 93 /* 94 * copy 95 */ 96 MOVL BX, CX 97 SHRL $2, CX 98 ANDL $3, BX 99 100 SUBL $4, DI 101 SUBL $4, SI 102 REP; MOVSL 103 104 CLD 105 ADDL $4, DI 106 ADDL $4, SI 107 SUBL BX, DI 108 SUBL BX, SI 109 JMP tail 110 111 move_1or2: 112 MOVB (SI), AX 113 MOVB -1(SI)(BX*1), CX 114 MOVB AX, (DI) 115 MOVB CX, -1(DI)(BX*1) 116 move_0: 117 RET 118 move_3or4: 119 MOVW (SI), AX 120 MOVW -2(SI)(BX*1), CX 121 MOVW AX, (DI) 122 MOVW CX, -2(DI)(BX*1) 123 RET 124 move_5through8: 125 MOVL (SI), AX 126 MOVL -4(SI)(BX*1), CX 127 MOVL AX, (DI) 128 MOVL CX, -4(DI)(BX*1) 129 RET 130 move_9through16: 131 MOVL (SI), AX 132 MOVL 4(SI), CX 133 MOVL -8(SI)(BX*1), DX 134 MOVL -4(SI)(BX*1), BP 135 MOVL AX, (DI) 136 MOVL CX, 4(DI) 137 MOVL DX, -8(DI)(BX*1) 138 MOVL BP, -4(DI)(BX*1) 139 RET 140 move_17through32: 141 MOVOU (SI), X0 142 MOVOU -16(SI)(BX*1), X1 143 MOVOU X0, (DI) 144 MOVOU X1, -16(DI)(BX*1) 145 RET 146 move_33through64: 147 MOVOU (SI), X0 148 MOVOU 16(SI), X1 149 MOVOU -32(SI)(BX*1), X2 150 MOVOU -16(SI)(BX*1), X3 151 MOVOU X0, (DI) 152 MOVOU X1, 16(DI) 153 MOVOU X2, -32(DI)(BX*1) 154 MOVOU X3, -16(DI)(BX*1) 155 RET 156 move_65through128: 157 MOVOU (SI), X0 158 MOVOU 16(SI), X1 159 MOVOU 32(SI), X2 160 MOVOU 48(SI), X3 161 MOVOU -64(SI)(BX*1), X4 162 MOVOU -48(SI)(BX*1), X5 163 MOVOU -32(SI)(BX*1), X6 164 MOVOU -16(SI)(BX*1), X7 165 MOVOU X0, (DI) 166 MOVOU X1, 16(DI) 167 MOVOU X2, 32(DI) 168 MOVOU X3, 48(DI) 169 MOVOU X4, -64(DI)(BX*1) 170 MOVOU X5, -48(DI)(BX*1) 171 MOVOU X6, -32(DI)(BX*1) 172 MOVOU X7, -16(DI)(BX*1) 173 RET