github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/mkduff.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 // runtime·duffzero is a Duff's device for zeroing memory. 8 // The compiler jumps to computed addresses within 9 // the routine to zero chunks of memory. 10 // Do not change duffzero without also 11 // changing the uses in cmd/compile/internal/*/*.go. 12 13 // runtime·duffcopy is a Duff's device for copying memory. 14 // The compiler jumps to computed addresses within 15 // the routine to copy chunks of memory. 16 // Source and destination must not overlap. 17 // Do not change duffcopy without also 18 // changing the uses in cmd/compile/internal/*/*.go. 19 20 // See the zero* and copy* generators below 21 // for architecture-specific comments. 22 23 // mkduff generates duff_*.s. 24 package main 25 26 import ( 27 "bytes" 28 "fmt" 29 "io" 30 "log" 31 "os" 32 ) 33 34 func main() { 35 gen("amd64", notags, zeroAMD64, copyAMD64) 36 gen("386", notags, zero386, copy386) 37 gen("arm", notags, zeroARM, copyARM) 38 gen("arm64", notags, zeroARM64, copyARM64) 39 gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) 40 gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x) 41 gen("riscv64", notags, zeroRISCV64, copyRISCV64) 42 } 43 44 func gen(arch string, tags, zero, copy func(io.Writer)) { 45 var buf bytes.Buffer 46 47 fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.") 48 fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.") 49 fmt.Fprintln(&buf, "// See mkduff.go for comments.") 50 tags(&buf) 51 fmt.Fprintln(&buf, "#include \"textflag.h\"") 52 fmt.Fprintln(&buf) 53 zero(&buf) 54 fmt.Fprintln(&buf) 55 copy(&buf) 56 57 if err := os.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil { 58 log.Fatalln(err) 59 } 60 } 61 62 func notags(w io.Writer) { fmt.Fprintln(w) } 63 64 func zeroAMD64(w io.Writer) { 65 // X0: zero 66 // DI: ptr to memory to be zeroed 67 // DI is updated as a side effect. 68 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") 69 for i := 0; i < 16; i++ { 70 fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)") 71 fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)") 72 fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)") 73 fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)") 74 fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags 75 fmt.Fprintln(w) 76 } 77 fmt.Fprintln(w, "\tRET") 78 } 79 80 func copyAMD64(w io.Writer) { 81 // SI: ptr to source memory 82 // DI: ptr to destination memory 83 // SI and DI are updated as a side effect. 84 // 85 // This is equivalent to a sequence of MOVSQ but 86 // for some reason that is 3.5x slower than this code. 87 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") 88 for i := 0; i < 64; i++ { 89 fmt.Fprintln(w, "\tMOVUPS\t(SI), X0") 90 fmt.Fprintln(w, "\tADDQ\t$16, SI") 91 fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)") 92 fmt.Fprintln(w, "\tADDQ\t$16, DI") 93 fmt.Fprintln(w) 94 } 95 fmt.Fprintln(w, "\tRET") 96 } 97 98 func zero386(w io.Writer) { 99 // AX: zero 100 // DI: ptr to memory to be zeroed 101 // DI is updated as a side effect. 102 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") 103 for i := 0; i < 128; i++ { 104 fmt.Fprintln(w, "\tSTOSL") 105 } 106 fmt.Fprintln(w, "\tRET") 107 } 108 109 func copy386(w io.Writer) { 110 // SI: ptr to source memory 111 // DI: ptr to destination memory 112 // SI and DI are updated as a side effect. 113 // 114 // This is equivalent to a sequence of MOVSL but 115 // for some reason MOVSL is really slow. 116 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") 117 for i := 0; i < 128; i++ { 118 fmt.Fprintln(w, "\tMOVL\t(SI), CX") 119 fmt.Fprintln(w, "\tADDL\t$4, SI") 120 fmt.Fprintln(w, "\tMOVL\tCX, (DI)") 121 fmt.Fprintln(w, "\tADDL\t$4, DI") 122 fmt.Fprintln(w) 123 } 124 fmt.Fprintln(w, "\tRET") 125 } 126 127 func zeroARM(w io.Writer) { 128 // R0: zero 129 // R1: ptr to memory to be zeroed 130 // R1 is updated as a side effect. 131 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") 132 for i := 0; i < 128; i++ { 133 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)") 134 } 135 fmt.Fprintln(w, "\tRET") 136 } 137 138 func copyARM(w io.Writer) { 139 // R0: scratch space 140 // R1: ptr to source memory 141 // R2: ptr to destination memory 142 // R1 and R2 are updated as a side effect 143 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") 144 for i := 0; i < 128; i++ { 145 fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0") 146 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)") 147 fmt.Fprintln(w) 148 } 149 fmt.Fprintln(w, "\tRET") 150 } 151 152 func zeroARM64(w io.Writer) { 153 // ZR: always zero 154 // R20: ptr to memory to be zeroed 155 // On return, R20 points to the last zeroed dword. 156 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") 157 for i := 0; i < 63; i++ { 158 fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R20)") 159 } 160 fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R20)") 161 fmt.Fprintln(w, "\tRET") 162 } 163 164 func copyARM64(w io.Writer) { 165 // R20: ptr to source memory 166 // R21: ptr to destination memory 167 // R26, R27 (aka REGTMP): scratch space 168 // R20 and R21 are updated as a side effect 169 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") 170 171 for i := 0; i < 64; i++ { 172 fmt.Fprintln(w, "\tLDP.P\t16(R20), (R26, R27)") 173 fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R21)") 174 fmt.Fprintln(w) 175 } 176 fmt.Fprintln(w, "\tRET") 177 } 178 179 func tagsPPC64x(w io.Writer) { 180 fmt.Fprintln(w) 181 fmt.Fprintln(w, "// +build ppc64 ppc64le") 182 fmt.Fprintln(w) 183 } 184 185 func zeroPPC64x(w io.Writer) { 186 // R0: always zero 187 // R3 (aka REGRT1): ptr to memory to be zeroed - 8 188 // On return, R3 points to the last zeroed dword. 189 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") 190 for i := 0; i < 128; i++ { 191 fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)") 192 } 193 fmt.Fprintln(w, "\tRET") 194 } 195 196 func copyPPC64x(w io.Writer) { 197 // duffcopy is not used on PPC64. 198 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") 199 fmt.Fprintln(w, "\tUNDEF") 200 } 201 202 func tagsMIPS64x(w io.Writer) { 203 fmt.Fprintln(w) 204 fmt.Fprintln(w, "// +build mips64 mips64le") 205 fmt.Fprintln(w) 206 } 207 208 func zeroMIPS64x(w io.Writer) { 209 // R0: always zero 210 // R1 (aka REGRT1): ptr to memory to be zeroed - 8 211 // On return, R1 points to the last zeroed dword. 212 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") 213 for i := 0; i < 128; i++ { 214 fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)") 215 fmt.Fprintln(w, "\tADDV\t$8, R1") 216 } 217 fmt.Fprintln(w, "\tRET") 218 } 219 220 func copyMIPS64x(w io.Writer) { 221 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") 222 for i := 0; i < 128; i++ { 223 fmt.Fprintln(w, "\tMOVV\t(R1), R23") 224 fmt.Fprintln(w, "\tADDV\t$8, R1") 225 fmt.Fprintln(w, "\tMOVV\tR23, (R2)") 226 fmt.Fprintln(w, "\tADDV\t$8, R2") 227 fmt.Fprintln(w) 228 } 229 fmt.Fprintln(w, "\tRET") 230 } 231 232 func zeroRISCV64(w io.Writer) { 233 // ZERO: always zero 234 // X10: ptr to memory to be zeroed 235 // X10 is updated as a side effect. 236 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") 237 for i := 0; i < 128; i++ { 238 fmt.Fprintln(w, "\tMOV\tZERO, (X10)") 239 fmt.Fprintln(w, "\tADD\t$8, X10") 240 } 241 fmt.Fprintln(w, "\tRET") 242 } 243 244 func copyRISCV64(w io.Writer) { 245 // X10: ptr to source memory 246 // X11: ptr to destination memory 247 // X10 and X11 are updated as a side effect 248 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") 249 for i := 0; i < 128; i++ { 250 fmt.Fprintln(w, "\tMOV\t(X10), X31") 251 fmt.Fprintln(w, "\tADD\t$8, X10") 252 fmt.Fprintln(w, "\tMOV\tX31, (X11)") 253 fmt.Fprintln(w, "\tADD\t$8, X11") 254 fmt.Fprintln(w) 255 } 256 fmt.Fprintln(w, "\tRET") 257 }