github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/ssa/_gen/main.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // The gen command generates Go code (in the parent directory) for all 6 // the architecture-specific opcodes, blocks, and rewrites. 7 package main 8 9 import ( 10 "bytes" 11 "flag" 12 "fmt" 13 "go/format" 14 "log" 15 "math/bits" 16 "os" 17 "path" 18 "regexp" 19 "runtime" 20 "runtime/pprof" 21 "runtime/trace" 22 "sort" 23 "strings" 24 "sync" 25 ) 26 27 // TODO: capitalize these types, so that we can more easily tell variable names 28 // apart from type names, and avoid awkward func parameters like "arch arch". 29 30 type arch struct { 31 name string 32 pkg string // obj package to import for this arch. 33 genfile string // source file containing opcode code generation. 34 ops []opData 35 blocks []blockData 36 regnames []string 37 ParamIntRegNames string 38 ParamFloatRegNames string 39 gpregmask regMask 40 fpregmask regMask 41 fp32regmask regMask 42 fp64regmask regMask 43 specialregmask regMask 44 framepointerreg int8 45 linkreg int8 46 generic bool 47 imports []string 48 } 49 50 type opData struct { 51 name string 52 reg regInfo 53 asm string 54 typ string // default result type 55 aux string 56 rematerializeable bool 57 argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments 58 commutative bool // this operation is commutative on its first 2 arguments (e.g. addition) 59 resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register 60 resultNotInArgs bool // outputs must not be allocated to the same registers as inputs 61 clobberFlags bool // this op clobbers flags register 62 needIntTemp bool // need a temporary free integer register 63 call bool // is a function call 64 tailCall bool // is a tail call 65 nilCheck bool // this op is a nil check on arg0 66 faultOnNilArg0 bool // this op will fault if arg0 is nil (and aux encodes a small offset) 67 faultOnNilArg1 bool // this op will fault if arg1 is nil (and aux encodes a small offset) 68 hasSideEffects bool // for "reasons", not to be eliminated. E.g., atomic store, #19182. 69 zeroWidth bool // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width. 70 unsafePoint bool // this op is an unsafe point, i.e. not safe for async preemption 71 symEffect string // effect this op has on symbol in aux 72 scale uint8 // amd64/386 indexed load scale 73 } 74 75 type blockData struct { 76 name string // the suffix for this block ("EQ", "LT", etc.) 77 controls int // the number of control values this type of block requires 78 aux string // the type of the Aux/AuxInt value, if any 79 } 80 81 type regInfo struct { 82 // inputs[i] encodes the set of registers allowed for the i'th input. 83 // Inputs that don't use registers (flags, memory, etc.) should be 0. 84 inputs []regMask 85 // clobbers encodes the set of registers that are overwritten by 86 // the instruction (other than the output registers). 87 clobbers regMask 88 // outputs[i] encodes the set of registers allowed for the i'th output. 89 outputs []regMask 90 } 91 92 type regMask uint64 93 94 func (a arch) regMaskComment(r regMask) string { 95 var buf strings.Builder 96 for i := uint64(0); r != 0; i++ { 97 if r&1 != 0 { 98 if buf.Len() == 0 { 99 buf.WriteString(" //") 100 } 101 buf.WriteString(" ") 102 buf.WriteString(a.regnames[i]) 103 } 104 r >>= 1 105 } 106 return buf.String() 107 } 108 109 var archs []arch 110 111 var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") 112 var memprofile = flag.String("memprofile", "", "write memory profile to `file`") 113 var tracefile = flag.String("trace", "", "write trace to `file`") 114 115 func main() { 116 flag.Parse() 117 if *cpuprofile != "" { 118 f, err := os.Create(*cpuprofile) 119 if err != nil { 120 log.Fatal("could not create CPU profile: ", err) 121 } 122 defer f.Close() 123 if err := pprof.StartCPUProfile(f); err != nil { 124 log.Fatal("could not start CPU profile: ", err) 125 } 126 defer pprof.StopCPUProfile() 127 } 128 if *tracefile != "" { 129 f, err := os.Create(*tracefile) 130 if err != nil { 131 log.Fatalf("failed to create trace output file: %v", err) 132 } 133 defer func() { 134 if err := f.Close(); err != nil { 135 log.Fatalf("failed to close trace file: %v", err) 136 } 137 }() 138 139 if err := trace.Start(f); err != nil { 140 log.Fatalf("failed to start trace: %v", err) 141 } 142 defer trace.Stop() 143 } 144 145 sort.Sort(ArchsByName(archs)) 146 147 // The generate tasks are run concurrently, since they are CPU-intensive 148 // that can easily make use of many cores on a machine. 149 // 150 // Note that there is no limit on the concurrency at the moment. On a 151 // four-core laptop at the time of writing, peak RSS usually reaches 152 // ~200MiB, which seems doable by practically any machine nowadays. If 153 // that stops being the case, we can cap this func to a fixed number of 154 // architectures being generated at once. 155 156 tasks := []func(){ 157 genOp, 158 genAllocators, 159 } 160 for _, a := range archs { 161 a := a // the funcs are ran concurrently at a later time 162 tasks = append(tasks, func() { 163 genRules(a) 164 genSplitLoadRules(a) 165 genLateLowerRules(a) 166 }) 167 } 168 var wg sync.WaitGroup 169 for _, task := range tasks { 170 task := task 171 wg.Add(1) 172 go func() { 173 task() 174 wg.Done() 175 }() 176 } 177 wg.Wait() 178 179 if *memprofile != "" { 180 f, err := os.Create(*memprofile) 181 if err != nil { 182 log.Fatal("could not create memory profile: ", err) 183 } 184 defer f.Close() 185 runtime.GC() // get up-to-date statistics 186 if err := pprof.WriteHeapProfile(f); err != nil { 187 log.Fatal("could not write memory profile: ", err) 188 } 189 } 190 } 191 192 func genOp() { 193 w := new(bytes.Buffer) 194 fmt.Fprintf(w, "// Code generated from _gen/*Ops.go using 'go generate'; DO NOT EDIT.\n") 195 fmt.Fprintln(w) 196 fmt.Fprintln(w, "package ssa") 197 198 fmt.Fprintln(w, "import (") 199 fmt.Fprintln(w, "\"github.com/go-asm/go/cmd/obj\"") 200 for _, a := range archs { 201 if a.pkg != "" { 202 fmt.Fprintf(w, "%q\n", a.pkg) 203 } 204 } 205 fmt.Fprintln(w, ")") 206 207 // generate Block* declarations 208 fmt.Fprintln(w, "const (") 209 fmt.Fprintln(w, "BlockInvalid BlockKind = iota") 210 for _, a := range archs { 211 fmt.Fprintln(w) 212 for _, d := range a.blocks { 213 fmt.Fprintf(w, "Block%s%s\n", a.Name(), d.name) 214 } 215 } 216 fmt.Fprintln(w, ")") 217 218 // generate block kind string method 219 fmt.Fprintln(w, "var blockString = [...]string{") 220 fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",") 221 for _, a := range archs { 222 fmt.Fprintln(w) 223 for _, b := range a.blocks { 224 fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.name) 225 } 226 } 227 fmt.Fprintln(w, "}") 228 fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}") 229 230 // generate block kind auxint method 231 fmt.Fprintln(w, "func (k BlockKind) AuxIntType() string {") 232 fmt.Fprintln(w, "switch k {") 233 for _, a := range archs { 234 for _, b := range a.blocks { 235 if b.auxIntType() == "invalid" { 236 continue 237 } 238 fmt.Fprintf(w, "case Block%s%s: return \"%s\"\n", a.Name(), b.name, b.auxIntType()) 239 } 240 } 241 fmt.Fprintln(w, "}") 242 fmt.Fprintln(w, "return \"\"") 243 fmt.Fprintln(w, "}") 244 245 // generate Op* declarations 246 fmt.Fprintln(w, "const (") 247 fmt.Fprintln(w, "OpInvalid Op = iota") // make sure OpInvalid is 0. 248 for _, a := range archs { 249 fmt.Fprintln(w) 250 for _, v := range a.ops { 251 if v.name == "Invalid" { 252 continue 253 } 254 fmt.Fprintf(w, "Op%s%s\n", a.Name(), v.name) 255 } 256 } 257 fmt.Fprintln(w, ")") 258 259 // generate OpInfo table 260 fmt.Fprintln(w, "var opcodeTable = [...]opInfo{") 261 fmt.Fprintln(w, " { name: \"OpInvalid\" },") 262 for _, a := range archs { 263 fmt.Fprintln(w) 264 265 pkg := path.Base(a.pkg) 266 for _, v := range a.ops { 267 if v.name == "Invalid" { 268 continue 269 } 270 fmt.Fprintln(w, "{") 271 fmt.Fprintf(w, "name:\"%s\",\n", v.name) 272 273 // flags 274 if v.aux != "" { 275 fmt.Fprintf(w, "auxType: aux%s,\n", v.aux) 276 } 277 fmt.Fprintf(w, "argLen: %d,\n", v.argLength) 278 279 if v.rematerializeable { 280 if v.reg.clobbers != 0 { 281 log.Fatalf("%s is rematerializeable and clobbers registers", v.name) 282 } 283 if v.clobberFlags { 284 log.Fatalf("%s is rematerializeable and clobbers flags", v.name) 285 } 286 fmt.Fprintln(w, "rematerializeable: true,") 287 } 288 if v.commutative { 289 fmt.Fprintln(w, "commutative: true,") 290 } 291 if v.resultInArg0 { 292 fmt.Fprintln(w, "resultInArg0: true,") 293 // OpConvert's register mask is selected dynamically, 294 // so don't try to check it in the static table. 295 if v.name != "Convert" && v.reg.inputs[0] != v.reg.outputs[0] { 296 log.Fatalf("%s: input[0] and output[0] must use the same registers for %s", a.name, v.name) 297 } 298 if v.name != "Convert" && v.commutative && v.reg.inputs[1] != v.reg.outputs[0] { 299 log.Fatalf("%s: input[1] and output[0] must use the same registers for %s", a.name, v.name) 300 } 301 } 302 if v.resultNotInArgs { 303 fmt.Fprintln(w, "resultNotInArgs: true,") 304 } 305 if v.clobberFlags { 306 fmt.Fprintln(w, "clobberFlags: true,") 307 } 308 if v.needIntTemp { 309 fmt.Fprintln(w, "needIntTemp: true,") 310 } 311 if v.call { 312 fmt.Fprintln(w, "call: true,") 313 } 314 if v.tailCall { 315 fmt.Fprintln(w, "tailCall: true,") 316 } 317 if v.nilCheck { 318 fmt.Fprintln(w, "nilCheck: true,") 319 } 320 if v.faultOnNilArg0 { 321 fmt.Fprintln(w, "faultOnNilArg0: true,") 322 if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" { 323 log.Fatalf("faultOnNilArg0 with aux %s not allowed", v.aux) 324 } 325 } 326 if v.faultOnNilArg1 { 327 fmt.Fprintln(w, "faultOnNilArg1: true,") 328 if v.aux != "Sym" && v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" { 329 log.Fatalf("faultOnNilArg1 with aux %s not allowed", v.aux) 330 } 331 } 332 if v.hasSideEffects { 333 fmt.Fprintln(w, "hasSideEffects: true,") 334 } 335 if v.zeroWidth { 336 fmt.Fprintln(w, "zeroWidth: true,") 337 } 338 if v.unsafePoint { 339 fmt.Fprintln(w, "unsafePoint: true,") 340 } 341 needEffect := strings.HasPrefix(v.aux, "Sym") 342 if v.symEffect != "" { 343 if !needEffect { 344 log.Fatalf("symEffect with aux %s not allowed", v.aux) 345 } 346 fmt.Fprintf(w, "symEffect: Sym%s,\n", strings.Replace(v.symEffect, ",", "|Sym", -1)) 347 } else if needEffect { 348 log.Fatalf("symEffect needed for aux %s", v.aux) 349 } 350 if a.name == "generic" { 351 fmt.Fprintln(w, "generic:true,") 352 fmt.Fprintln(w, "},") // close op 353 // generic ops have no reg info or asm 354 continue 355 } 356 if v.asm != "" { 357 fmt.Fprintf(w, "asm: %s.A%s,\n", pkg, v.asm) 358 } 359 if v.scale != 0 { 360 fmt.Fprintf(w, "scale: %d,\n", v.scale) 361 } 362 fmt.Fprintln(w, "reg:regInfo{") 363 364 // Compute input allocation order. We allocate from the 365 // most to the least constrained input. This order guarantees 366 // that we will always be able to find a register. 367 var s []intPair 368 for i, r := range v.reg.inputs { 369 if r != 0 { 370 s = append(s, intPair{countRegs(r), i}) 371 } 372 } 373 if len(s) > 0 { 374 sort.Sort(byKey(s)) 375 fmt.Fprintln(w, "inputs: []inputInfo{") 376 for _, p := range s { 377 r := v.reg.inputs[p.val] 378 fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r)) 379 } 380 fmt.Fprintln(w, "},") 381 } 382 383 if v.reg.clobbers > 0 { 384 fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers)) 385 } 386 387 // reg outputs 388 s = s[:0] 389 for i, r := range v.reg.outputs { 390 s = append(s, intPair{countRegs(r), i}) 391 } 392 if len(s) > 0 { 393 sort.Sort(byKey(s)) 394 fmt.Fprintln(w, "outputs: []outputInfo{") 395 for _, p := range s { 396 r := v.reg.outputs[p.val] 397 fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r)) 398 } 399 fmt.Fprintln(w, "},") 400 } 401 fmt.Fprintln(w, "},") // close reg info 402 fmt.Fprintln(w, "},") // close op 403 } 404 } 405 fmt.Fprintln(w, "}") 406 407 fmt.Fprintln(w, "func (o Op) Asm() obj.As {return opcodeTable[o].asm}") 408 fmt.Fprintln(w, "func (o Op) Scale() int16 {return int16(opcodeTable[o].scale)}") 409 410 // generate op string method 411 fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }") 412 413 fmt.Fprintln(w, "func (o Op) SymEffect() SymEffect { return opcodeTable[o].symEffect }") 414 fmt.Fprintln(w, "func (o Op) IsCall() bool { return opcodeTable[o].call }") 415 fmt.Fprintln(w, "func (o Op) IsTailCall() bool { return opcodeTable[o].tailCall }") 416 fmt.Fprintln(w, "func (o Op) HasSideEffects() bool { return opcodeTable[o].hasSideEffects }") 417 fmt.Fprintln(w, "func (o Op) UnsafePoint() bool { return opcodeTable[o].unsafePoint }") 418 fmt.Fprintln(w, "func (o Op) ResultInArg0() bool { return opcodeTable[o].resultInArg0 }") 419 420 // generate registers 421 for _, a := range archs { 422 if a.generic { 423 continue 424 } 425 fmt.Fprintf(w, "var registers%s = [...]Register {\n", a.name) 426 var gcRegN int 427 num := map[string]int8{} 428 for i, r := range a.regnames { 429 num[r] = int8(i) 430 pkg := a.pkg[len("github.com/go-asm/go/cmd/obj/"):] 431 var objname string // name in github.com/go-asm/go/cmd/obj/$ARCH 432 switch r { 433 case "SB": 434 // SB isn't a real register. github.com/go-asm/go/cmd/obj expects 0 in this case. 435 objname = "0" 436 case "SP": 437 objname = pkg + ".REGSP" 438 case "g": 439 objname = pkg + ".REGG" 440 default: 441 objname = pkg + ".REG_" + r 442 } 443 // Assign a GC register map index to registers 444 // that may contain pointers. 445 gcRegIdx := -1 446 if a.gpregmask&(1<<uint(i)) != 0 { 447 gcRegIdx = gcRegN 448 gcRegN++ 449 } 450 fmt.Fprintf(w, " {%d, %s, %d, \"%s\"},\n", i, objname, gcRegIdx, r) 451 } 452 parameterRegisterList := func(paramNamesString string) []int8 { 453 paramNamesString = strings.TrimSpace(paramNamesString) 454 if paramNamesString == "" { 455 return nil 456 } 457 paramNames := strings.Split(paramNamesString, " ") 458 var paramRegs []int8 459 for _, regName := range paramNames { 460 if regName == "" { 461 // forgive extra spaces 462 continue 463 } 464 if regNum, ok := num[regName]; ok { 465 paramRegs = append(paramRegs, regNum) 466 delete(num, regName) 467 } else { 468 log.Fatalf("parameter register %s for architecture %s not a register name (or repeated in parameter list)", regName, a.name) 469 } 470 } 471 return paramRegs 472 } 473 474 paramIntRegs := parameterRegisterList(a.ParamIntRegNames) 475 paramFloatRegs := parameterRegisterList(a.ParamFloatRegNames) 476 477 if gcRegN > 32 { 478 // Won't fit in a uint32 mask. 479 log.Fatalf("too many GC registers (%d > 32) on %s", gcRegN, a.name) 480 } 481 fmt.Fprintln(w, "}") 482 fmt.Fprintf(w, "var paramIntReg%s = %#v\n", a.name, paramIntRegs) 483 fmt.Fprintf(w, "var paramFloatReg%s = %#v\n", a.name, paramFloatRegs) 484 fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask) 485 fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask) 486 if a.fp32regmask != 0 { 487 fmt.Fprintf(w, "var fp32RegMask%s = regMask(%d)\n", a.name, a.fp32regmask) 488 } 489 if a.fp64regmask != 0 { 490 fmt.Fprintf(w, "var fp64RegMask%s = regMask(%d)\n", a.name, a.fp64regmask) 491 } 492 fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask) 493 fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg) 494 fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg) 495 } 496 497 // gofmt result 498 b := w.Bytes() 499 var err error 500 b, err = format.Source(b) 501 if err != nil { 502 fmt.Printf("%s\n", w.Bytes()) 503 panic(err) 504 } 505 506 if err := os.WriteFile("../opGen.go", b, 0666); err != nil { 507 log.Fatalf("can't write output: %v\n", err) 508 } 509 510 // Check that the arch genfile handles all the arch-specific opcodes. 511 // This is very much a hack, but it is better than nothing. 512 // 513 // Do a single regexp pass to record all ops being handled in a map, and 514 // then compare that with the ops list. This is much faster than one 515 // regexp pass per opcode. 516 for _, a := range archs { 517 if a.genfile == "" { 518 continue 519 } 520 521 pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name) 522 rxOp, err := regexp.Compile(pattern) 523 if err != nil { 524 log.Fatalf("bad opcode regexp %s: %v", pattern, err) 525 } 526 527 src, err := os.ReadFile(a.genfile) 528 if err != nil { 529 log.Fatalf("can't read %s: %v", a.genfile, err) 530 } 531 seen := make(map[string]bool, len(a.ops)) 532 for _, m := range rxOp.FindAllSubmatch(src, -1) { 533 seen[string(m[1])] = true 534 } 535 for _, op := range a.ops { 536 if !seen[op.name] { 537 log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile) 538 } 539 } 540 } 541 } 542 543 // Name returns the name of the architecture for use in Op* and Block* enumerations. 544 func (a arch) Name() string { 545 s := a.name 546 if s == "generic" { 547 s = "" 548 } 549 return s 550 } 551 552 // countRegs returns the number of set bits in the register mask. 553 func countRegs(r regMask) int { 554 return bits.OnesCount64(uint64(r)) 555 } 556 557 // for sorting a pair of integers by key 558 type intPair struct { 559 key, val int 560 } 561 type byKey []intPair 562 563 func (a byKey) Len() int { return len(a) } 564 func (a byKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 565 func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key } 566 567 type ArchsByName []arch 568 569 func (x ArchsByName) Len() int { return len(x) } 570 func (x ArchsByName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 571 func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name }