github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/internal/obj" 14 "cmd/internal/obj/x86" 15 ) 16 17 // Smallest possible faulting page at address zero. 18 const minZeroPage = 4096 19 20 // ssaRegToReg maps ssa register numbers to obj register numbers. 21 var ssaRegToReg = []int16{ 22 x86.REG_AX, 23 x86.REG_CX, 24 x86.REG_DX, 25 x86.REG_BX, 26 x86.REG_SP, 27 x86.REG_BP, 28 x86.REG_SI, 29 x86.REG_DI, 30 x86.REG_R8, 31 x86.REG_R9, 32 x86.REG_R10, 33 x86.REG_R11, 34 x86.REG_R12, 35 x86.REG_R13, 36 x86.REG_R14, 37 x86.REG_R15, 38 x86.REG_X0, 39 x86.REG_X1, 40 x86.REG_X2, 41 x86.REG_X3, 42 x86.REG_X4, 43 x86.REG_X5, 44 x86.REG_X6, 45 x86.REG_X7, 46 x86.REG_X8, 47 x86.REG_X9, 48 x86.REG_X10, 49 x86.REG_X11, 50 x86.REG_X12, 51 x86.REG_X13, 52 x86.REG_X14, 53 x86.REG_X15, 54 0, // SB isn't a real register. We fill an Addr.Reg field with 0 in this case. 55 } 56 57 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 58 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 59 flive := b.FlagsLiveAtEnd 60 if b.Control != nil && b.Control.Type.IsFlags() { 61 flive = true 62 } 63 for i := len(b.Values) - 1; i >= 0; i-- { 64 v := b.Values[i] 65 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 66 // The "mark" is any non-nil Aux value. 67 v.Aux = v 68 } 69 if v.Type.IsFlags() { 70 flive = false 71 } 72 for _, a := range v.Args { 73 if a.Type.IsFlags() { 74 flive = true 75 } 76 } 77 } 78 } 79 80 // loadByType returns the load instruction of the given type. 81 func loadByType(t ssa.Type) obj.As { 82 // Avoid partial register write 83 if !t.IsFloat() && t.Size() <= 2 { 84 if t.Size() == 1 { 85 return x86.AMOVBLZX 86 } else { 87 return x86.AMOVWLZX 88 } 89 } 90 // Otherwise, there's no difference between load and store opcodes. 91 return storeByType(t) 92 } 93 94 // storeByType returns the store instruction of the given type. 95 func storeByType(t ssa.Type) obj.As { 96 width := t.Size() 97 if t.IsFloat() { 98 switch width { 99 case 4: 100 return x86.AMOVSS 101 case 8: 102 return x86.AMOVSD 103 } 104 } else { 105 switch width { 106 case 1: 107 return x86.AMOVB 108 case 2: 109 return x86.AMOVW 110 case 4: 111 return x86.AMOVL 112 case 8: 113 return x86.AMOVQ 114 } 115 } 116 panic("bad store type") 117 } 118 119 // moveByType returns the reg->reg move instruction of the given type. 120 func moveByType(t ssa.Type) obj.As { 121 if t.IsFloat() { 122 // Moving the whole sse2 register is faster 123 // than moving just the correct low portion of it. 124 // There is no xmm->xmm move with 1 byte opcode, 125 // so use movups, which has 2 byte opcode. 126 return x86.AMOVUPS 127 } else { 128 switch t.Size() { 129 case 1: 130 // Avoids partial register write 131 return x86.AMOVL 132 case 2: 133 return x86.AMOVL 134 case 4: 135 return x86.AMOVL 136 case 8: 137 return x86.AMOVQ 138 case 16: 139 return x86.AMOVUPS // int128s are in SSE registers 140 default: 141 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 142 } 143 } 144 } 145 146 // opregreg emits instructions for 147 // dest := dest(To) op src(From) 148 // and also returns the created obj.Prog so it 149 // may be further adjusted (offset, scale, etc). 150 func opregreg(op obj.As, dest, src int16) *obj.Prog { 151 p := gc.Prog(op) 152 p.From.Type = obj.TYPE_REG 153 p.To.Type = obj.TYPE_REG 154 p.To.Reg = dest 155 p.From.Reg = src 156 return p 157 } 158 159 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD, 160 // See runtime/mkduff.go. 161 func duffStart(size int64) int64 { 162 x, _ := duff(size) 163 return x 164 } 165 func duffAdj(size int64) int64 { 166 _, x := duff(size) 167 return x 168 } 169 170 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 171 // required to use the duffzero mechanism for a block of the given size. 172 func duff(size int64) (int64, int64) { 173 if size < 32 || size > 1024 || size%dzClearStep != 0 { 174 panic("bad duffzero size") 175 } 176 steps := size / dzClearStep 177 blocks := steps / dzBlockLen 178 steps %= dzBlockLen 179 off := dzBlockSize * (dzBlocks - blocks) 180 var adj int64 181 if steps != 0 { 182 off -= dzAddSize 183 off -= dzMovSize * steps 184 adj -= dzClearStep * (dzBlockLen - steps) 185 } 186 return off, adj 187 } 188 189 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 190 s.SetLineno(v.Line) 191 switch v.Op { 192 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 193 r := gc.SSARegNum(v) 194 r1 := gc.SSARegNum(v.Args[0]) 195 r2 := gc.SSARegNum(v.Args[1]) 196 switch { 197 case r == r1: 198 p := gc.Prog(v.Op.Asm()) 199 p.From.Type = obj.TYPE_REG 200 p.From.Reg = r2 201 p.To.Type = obj.TYPE_REG 202 p.To.Reg = r 203 case r == r2: 204 p := gc.Prog(v.Op.Asm()) 205 p.From.Type = obj.TYPE_REG 206 p.From.Reg = r1 207 p.To.Type = obj.TYPE_REG 208 p.To.Reg = r 209 default: 210 var asm obj.As 211 if v.Op == ssa.OpAMD64ADDQ { 212 asm = x86.ALEAQ 213 } else { 214 asm = x86.ALEAL 215 } 216 p := gc.Prog(asm) 217 p.From.Type = obj.TYPE_MEM 218 p.From.Reg = r1 219 p.From.Scale = 1 220 p.From.Index = r2 221 p.To.Type = obj.TYPE_REG 222 p.To.Reg = r 223 } 224 // 2-address opcode arithmetic 225 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 226 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 227 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 228 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 229 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 230 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 231 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 232 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 233 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 234 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 235 ssa.OpAMD64PXOR: 236 r := gc.SSARegNum(v) 237 if r != gc.SSARegNum(v.Args[0]) { 238 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 239 } 240 opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1])) 241 242 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 243 // Arg[0] (the dividend) is in AX. 244 // Arg[1] (the divisor) can be in any other register. 245 // Result[0] (the quotient) is in AX. 246 // Result[1] (the remainder) is in DX. 247 r := gc.SSARegNum(v.Args[1]) 248 249 // Zero extend dividend. 250 c := gc.Prog(x86.AXORL) 251 c.From.Type = obj.TYPE_REG 252 c.From.Reg = x86.REG_DX 253 c.To.Type = obj.TYPE_REG 254 c.To.Reg = x86.REG_DX 255 256 // Issue divide. 257 p := gc.Prog(v.Op.Asm()) 258 p.From.Type = obj.TYPE_REG 259 p.From.Reg = r 260 261 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 262 // Arg[0] (the dividend) is in AX. 263 // Arg[1] (the divisor) can be in any other register. 264 // Result[0] (the quotient) is in AX. 265 // Result[1] (the remainder) is in DX. 266 r := gc.SSARegNum(v.Args[1]) 267 268 // CPU faults upon signed overflow, which occurs when the most 269 // negative int is divided by -1. Handle divide by -1 as a special case. 270 var c *obj.Prog 271 switch v.Op { 272 case ssa.OpAMD64DIVQ: 273 c = gc.Prog(x86.ACMPQ) 274 case ssa.OpAMD64DIVL: 275 c = gc.Prog(x86.ACMPL) 276 case ssa.OpAMD64DIVW: 277 c = gc.Prog(x86.ACMPW) 278 } 279 c.From.Type = obj.TYPE_REG 280 c.From.Reg = r 281 c.To.Type = obj.TYPE_CONST 282 c.To.Offset = -1 283 j1 := gc.Prog(x86.AJEQ) 284 j1.To.Type = obj.TYPE_BRANCH 285 286 // Sign extend dividend. 287 switch v.Op { 288 case ssa.OpAMD64DIVQ: 289 gc.Prog(x86.ACQO) 290 case ssa.OpAMD64DIVL: 291 gc.Prog(x86.ACDQ) 292 case ssa.OpAMD64DIVW: 293 gc.Prog(x86.ACWD) 294 } 295 296 // Issue divide. 297 p := gc.Prog(v.Op.Asm()) 298 p.From.Type = obj.TYPE_REG 299 p.From.Reg = r 300 301 // Skip over -1 fixup code. 302 j2 := gc.Prog(obj.AJMP) 303 j2.To.Type = obj.TYPE_BRANCH 304 305 // Issue -1 fixup code. 306 // n / -1 = -n 307 n1 := gc.Prog(x86.ANEGQ) 308 n1.To.Type = obj.TYPE_REG 309 n1.To.Reg = x86.REG_AX 310 311 // n % -1 == 0 312 n2 := gc.Prog(x86.AXORL) 313 n2.From.Type = obj.TYPE_REG 314 n2.From.Reg = x86.REG_DX 315 n2.To.Type = obj.TYPE_REG 316 n2.To.Reg = x86.REG_DX 317 318 // TODO(khr): issue only the -1 fixup code we need. 319 // For instance, if only the quotient is used, no point in zeroing the remainder. 320 321 j1.To.Val = n1 322 j2.To.Val = s.Pc() 323 324 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB, 325 ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU: 326 // the frontend rewrites constant division by 8/16/32 bit integers into 327 // HMUL by a constant 328 // SSA rewrites generate the 64 bit versions 329 330 // Arg[0] is already in AX as it's the only register we allow 331 // and DX is the only output we care about (the high bits) 332 p := gc.Prog(v.Op.Asm()) 333 p.From.Type = obj.TYPE_REG 334 p.From.Reg = gc.SSARegNum(v.Args[1]) 335 336 // IMULB puts the high portion in AH instead of DL, 337 // so move it to DL for consistency 338 if v.Type.Size() == 1 { 339 m := gc.Prog(x86.AMOVB) 340 m.From.Type = obj.TYPE_REG 341 m.From.Reg = x86.REG_AH 342 m.To.Type = obj.TYPE_REG 343 m.To.Reg = x86.REG_DX 344 } 345 346 case ssa.OpAMD64AVGQU: 347 // compute (x+y)/2 unsigned. 348 // Do a 64-bit add, the overflow goes into the carry. 349 // Shift right once and pull the carry back into the 63rd bit. 350 r := gc.SSARegNum(v) 351 if r != gc.SSARegNum(v.Args[0]) { 352 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 353 } 354 p := gc.Prog(x86.AADDQ) 355 p.From.Type = obj.TYPE_REG 356 p.To.Type = obj.TYPE_REG 357 p.To.Reg = r 358 p.From.Reg = gc.SSARegNum(v.Args[1]) 359 p = gc.Prog(x86.ARCRQ) 360 p.From.Type = obj.TYPE_CONST 361 p.From.Offset = 1 362 p.To.Type = obj.TYPE_REG 363 p.To.Reg = r 364 365 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 366 r := gc.SSARegNum(v) 367 a := gc.SSARegNum(v.Args[0]) 368 if r == a { 369 if v.AuxInt == 1 { 370 var asm obj.As 371 // Software optimization manual recommends add $1,reg. 372 // But inc/dec is 1 byte smaller. ICC always uses inc 373 // Clang/GCC choose depending on flags, but prefer add. 374 // Experiments show that inc/dec is both a little faster 375 // and make a binary a little smaller. 376 if v.Op == ssa.OpAMD64ADDQconst { 377 asm = x86.AINCQ 378 } else { 379 asm = x86.AINCL 380 } 381 p := gc.Prog(asm) 382 p.To.Type = obj.TYPE_REG 383 p.To.Reg = r 384 return 385 } 386 if v.AuxInt == -1 { 387 var asm obj.As 388 if v.Op == ssa.OpAMD64ADDQconst { 389 asm = x86.ADECQ 390 } else { 391 asm = x86.ADECL 392 } 393 p := gc.Prog(asm) 394 p.To.Type = obj.TYPE_REG 395 p.To.Reg = r 396 return 397 } 398 p := gc.Prog(v.Op.Asm()) 399 p.From.Type = obj.TYPE_CONST 400 p.From.Offset = v.AuxInt 401 p.To.Type = obj.TYPE_REG 402 p.To.Reg = r 403 return 404 } 405 var asm obj.As 406 if v.Op == ssa.OpAMD64ADDQconst { 407 asm = x86.ALEAQ 408 } else { 409 asm = x86.ALEAL 410 } 411 p := gc.Prog(asm) 412 p.From.Type = obj.TYPE_MEM 413 p.From.Reg = a 414 p.From.Offset = v.AuxInt 415 p.To.Type = obj.TYPE_REG 416 p.To.Reg = r 417 418 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ: 419 r := gc.SSARegNum(v) 420 if r != gc.SSARegNum(v.Args[0]) { 421 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 422 } 423 p := gc.Prog(v.Op.Asm()) 424 p.From.Type = obj.TYPE_REG 425 p.From.Reg = gc.SSARegNum(v.Args[1]) 426 p.To.Type = obj.TYPE_REG 427 p.To.Reg = r 428 429 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 430 r := gc.SSARegNum(v) 431 if r != gc.SSARegNum(v.Args[0]) { 432 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 433 } 434 p := gc.Prog(v.Op.Asm()) 435 p.From.Type = obj.TYPE_CONST 436 p.From.Offset = v.AuxInt 437 p.To.Type = obj.TYPE_REG 438 p.To.Reg = r 439 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 440 // then we don't need to use resultInArg0 for these ops. 441 //p.From3 = new(obj.Addr) 442 //p.From3.Type = obj.TYPE_REG 443 //p.From3.Reg = gc.SSARegNum(v.Args[0]) 444 445 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 446 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 447 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 448 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 449 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 450 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 451 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 452 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 453 r := gc.SSARegNum(v) 454 if r != gc.SSARegNum(v.Args[0]) { 455 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 456 } 457 p := gc.Prog(v.Op.Asm()) 458 p.From.Type = obj.TYPE_CONST 459 p.From.Offset = v.AuxInt 460 p.To.Type = obj.TYPE_REG 461 p.To.Reg = r 462 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 463 r := gc.SSARegNum(v) 464 p := gc.Prog(v.Op.Asm()) 465 p.From.Type = obj.TYPE_REG 466 p.From.Reg = r 467 p.To.Type = obj.TYPE_REG 468 p.To.Reg = r 469 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 470 r := gc.SSARegNum(v.Args[0]) 471 i := gc.SSARegNum(v.Args[1]) 472 p := gc.Prog(x86.ALEAQ) 473 switch v.Op { 474 case ssa.OpAMD64LEAQ1: 475 p.From.Scale = 1 476 if i == x86.REG_SP { 477 r, i = i, r 478 } 479 case ssa.OpAMD64LEAQ2: 480 p.From.Scale = 2 481 case ssa.OpAMD64LEAQ4: 482 p.From.Scale = 4 483 case ssa.OpAMD64LEAQ8: 484 p.From.Scale = 8 485 } 486 p.From.Type = obj.TYPE_MEM 487 p.From.Reg = r 488 p.From.Index = i 489 gc.AddAux(&p.From, v) 490 p.To.Type = obj.TYPE_REG 491 p.To.Reg = gc.SSARegNum(v) 492 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL: 493 p := gc.Prog(v.Op.Asm()) 494 p.From.Type = obj.TYPE_MEM 495 p.From.Reg = gc.SSARegNum(v.Args[0]) 496 gc.AddAux(&p.From, v) 497 p.To.Type = obj.TYPE_REG 498 p.To.Reg = gc.SSARegNum(v) 499 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 500 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB: 501 opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) 502 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 503 // Go assembler has swapped operands for UCOMISx relative to CMP, 504 // must account for that right here. 505 opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) 506 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 507 p := gc.Prog(v.Op.Asm()) 508 p.From.Type = obj.TYPE_REG 509 p.From.Reg = gc.SSARegNum(v.Args[0]) 510 p.To.Type = obj.TYPE_CONST 511 p.To.Offset = v.AuxInt 512 case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst: 513 p := gc.Prog(v.Op.Asm()) 514 p.From.Type = obj.TYPE_CONST 515 p.From.Offset = v.AuxInt 516 p.To.Type = obj.TYPE_REG 517 p.To.Reg = gc.SSARegNum(v.Args[0]) 518 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 519 x := gc.SSARegNum(v) 520 p := gc.Prog(v.Op.Asm()) 521 p.From.Type = obj.TYPE_CONST 522 p.From.Offset = v.AuxInt 523 p.To.Type = obj.TYPE_REG 524 p.To.Reg = x 525 // If flags are live at this instruction, suppress the 526 // MOV $0,AX -> XOR AX,AX optimization. 527 if v.Aux != nil { 528 p.Mark |= x86.PRESERVEFLAGS 529 } 530 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 531 x := gc.SSARegNum(v) 532 p := gc.Prog(v.Op.Asm()) 533 p.From.Type = obj.TYPE_FCONST 534 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 535 p.To.Type = obj.TYPE_REG 536 p.To.Reg = x 537 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 538 p := gc.Prog(v.Op.Asm()) 539 p.From.Type = obj.TYPE_MEM 540 p.From.Reg = gc.SSARegNum(v.Args[0]) 541 gc.AddAux(&p.From, v) 542 p.To.Type = obj.TYPE_REG 543 p.To.Reg = gc.SSARegNum(v) 544 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: 545 p := gc.Prog(v.Op.Asm()) 546 p.From.Type = obj.TYPE_MEM 547 p.From.Reg = gc.SSARegNum(v.Args[0]) 548 gc.AddAux(&p.From, v) 549 p.From.Scale = 8 550 p.From.Index = gc.SSARegNum(v.Args[1]) 551 p.To.Type = obj.TYPE_REG 552 p.To.Reg = gc.SSARegNum(v) 553 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 554 p := gc.Prog(v.Op.Asm()) 555 p.From.Type = obj.TYPE_MEM 556 p.From.Reg = gc.SSARegNum(v.Args[0]) 557 gc.AddAux(&p.From, v) 558 p.From.Scale = 4 559 p.From.Index = gc.SSARegNum(v.Args[1]) 560 p.To.Type = obj.TYPE_REG 561 p.To.Reg = gc.SSARegNum(v) 562 case ssa.OpAMD64MOVWloadidx2: 563 p := gc.Prog(v.Op.Asm()) 564 p.From.Type = obj.TYPE_MEM 565 p.From.Reg = gc.SSARegNum(v.Args[0]) 566 gc.AddAux(&p.From, v) 567 p.From.Scale = 2 568 p.From.Index = gc.SSARegNum(v.Args[1]) 569 p.To.Type = obj.TYPE_REG 570 p.To.Reg = gc.SSARegNum(v) 571 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 572 r := gc.SSARegNum(v.Args[0]) 573 i := gc.SSARegNum(v.Args[1]) 574 if i == x86.REG_SP { 575 r, i = i, r 576 } 577 p := gc.Prog(v.Op.Asm()) 578 p.From.Type = obj.TYPE_MEM 579 p.From.Reg = r 580 p.From.Scale = 1 581 p.From.Index = i 582 gc.AddAux(&p.From, v) 583 p.To.Type = obj.TYPE_REG 584 p.To.Reg = gc.SSARegNum(v) 585 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: 586 p := gc.Prog(v.Op.Asm()) 587 p.From.Type = obj.TYPE_REG 588 p.From.Reg = gc.SSARegNum(v.Args[1]) 589 p.To.Type = obj.TYPE_MEM 590 p.To.Reg = gc.SSARegNum(v.Args[0]) 591 gc.AddAux(&p.To, v) 592 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: 593 p := gc.Prog(v.Op.Asm()) 594 p.From.Type = obj.TYPE_REG 595 p.From.Reg = gc.SSARegNum(v.Args[2]) 596 p.To.Type = obj.TYPE_MEM 597 p.To.Reg = gc.SSARegNum(v.Args[0]) 598 p.To.Scale = 8 599 p.To.Index = gc.SSARegNum(v.Args[1]) 600 gc.AddAux(&p.To, v) 601 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 602 p := gc.Prog(v.Op.Asm()) 603 p.From.Type = obj.TYPE_REG 604 p.From.Reg = gc.SSARegNum(v.Args[2]) 605 p.To.Type = obj.TYPE_MEM 606 p.To.Reg = gc.SSARegNum(v.Args[0]) 607 p.To.Scale = 4 608 p.To.Index = gc.SSARegNum(v.Args[1]) 609 gc.AddAux(&p.To, v) 610 case ssa.OpAMD64MOVWstoreidx2: 611 p := gc.Prog(v.Op.Asm()) 612 p.From.Type = obj.TYPE_REG 613 p.From.Reg = gc.SSARegNum(v.Args[2]) 614 p.To.Type = obj.TYPE_MEM 615 p.To.Reg = gc.SSARegNum(v.Args[0]) 616 p.To.Scale = 2 617 p.To.Index = gc.SSARegNum(v.Args[1]) 618 gc.AddAux(&p.To, v) 619 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 620 r := gc.SSARegNum(v.Args[0]) 621 i := gc.SSARegNum(v.Args[1]) 622 if i == x86.REG_SP { 623 r, i = i, r 624 } 625 p := gc.Prog(v.Op.Asm()) 626 p.From.Type = obj.TYPE_REG 627 p.From.Reg = gc.SSARegNum(v.Args[2]) 628 p.To.Type = obj.TYPE_MEM 629 p.To.Reg = r 630 p.To.Scale = 1 631 p.To.Index = i 632 gc.AddAux(&p.To, v) 633 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 634 p := gc.Prog(v.Op.Asm()) 635 p.From.Type = obj.TYPE_CONST 636 sc := v.AuxValAndOff() 637 p.From.Offset = sc.Val() 638 p.To.Type = obj.TYPE_MEM 639 p.To.Reg = gc.SSARegNum(v.Args[0]) 640 gc.AddAux2(&p.To, v, sc.Off()) 641 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 642 p := gc.Prog(v.Op.Asm()) 643 p.From.Type = obj.TYPE_CONST 644 sc := v.AuxValAndOff() 645 p.From.Offset = sc.Val() 646 r := gc.SSARegNum(v.Args[0]) 647 i := gc.SSARegNum(v.Args[1]) 648 switch v.Op { 649 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 650 p.To.Scale = 1 651 if i == x86.REG_SP { 652 r, i = i, r 653 } 654 case ssa.OpAMD64MOVWstoreconstidx2: 655 p.To.Scale = 2 656 case ssa.OpAMD64MOVLstoreconstidx4: 657 p.To.Scale = 4 658 case ssa.OpAMD64MOVQstoreconstidx8: 659 p.To.Scale = 8 660 } 661 p.To.Type = obj.TYPE_MEM 662 p.To.Reg = r 663 p.To.Index = i 664 gc.AddAux2(&p.To, v, sc.Off()) 665 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 666 ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, 667 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 668 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 669 opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) 670 case ssa.OpAMD64DUFFZERO: 671 off := duffStart(v.AuxInt) 672 adj := duffAdj(v.AuxInt) 673 var p *obj.Prog 674 if adj != 0 { 675 p = gc.Prog(x86.AADDQ) 676 p.From.Type = obj.TYPE_CONST 677 p.From.Offset = adj 678 p.To.Type = obj.TYPE_REG 679 p.To.Reg = x86.REG_DI 680 } 681 p = gc.Prog(obj.ADUFFZERO) 682 p.To.Type = obj.TYPE_ADDR 683 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 684 p.To.Offset = off 685 case ssa.OpAMD64MOVOconst: 686 if v.AuxInt != 0 { 687 v.Unimplementedf("MOVOconst can only do constant=0") 688 } 689 r := gc.SSARegNum(v) 690 opregreg(x86.AXORPS, r, r) 691 case ssa.OpAMD64DUFFCOPY: 692 p := gc.Prog(obj.ADUFFCOPY) 693 p.To.Type = obj.TYPE_ADDR 694 p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) 695 p.To.Offset = v.AuxInt 696 697 case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 698 if v.Type.IsMemory() { 699 return 700 } 701 x := gc.SSARegNum(v.Args[0]) 702 y := gc.SSARegNum(v) 703 if x != y { 704 opregreg(moveByType(v.Type), y, x) 705 } 706 case ssa.OpLoadReg: 707 if v.Type.IsFlags() { 708 v.Unimplementedf("load flags not implemented: %v", v.LongString()) 709 return 710 } 711 p := gc.Prog(loadByType(v.Type)) 712 n, off := gc.AutoVar(v.Args[0]) 713 p.From.Type = obj.TYPE_MEM 714 p.From.Node = n 715 p.From.Sym = gc.Linksym(n.Sym) 716 p.From.Offset = off 717 if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { 718 p.From.Name = obj.NAME_PARAM 719 p.From.Offset += n.Xoffset 720 } else { 721 p.From.Name = obj.NAME_AUTO 722 } 723 p.To.Type = obj.TYPE_REG 724 p.To.Reg = gc.SSARegNum(v) 725 726 case ssa.OpStoreReg: 727 if v.Type.IsFlags() { 728 v.Unimplementedf("store flags not implemented: %v", v.LongString()) 729 return 730 } 731 p := gc.Prog(storeByType(v.Type)) 732 p.From.Type = obj.TYPE_REG 733 p.From.Reg = gc.SSARegNum(v.Args[0]) 734 n, off := gc.AutoVar(v) 735 p.To.Type = obj.TYPE_MEM 736 p.To.Node = n 737 p.To.Sym = gc.Linksym(n.Sym) 738 p.To.Offset = off 739 if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { 740 p.To.Name = obj.NAME_PARAM 741 p.To.Offset += n.Xoffset 742 } else { 743 p.To.Name = obj.NAME_AUTO 744 } 745 case ssa.OpPhi: 746 gc.CheckLoweredPhi(v) 747 case ssa.OpInitMem: 748 // memory arg needs no code 749 case ssa.OpArg: 750 // input args need no code 751 case ssa.OpAMD64LoweredGetClosurePtr: 752 // Closure pointer is DX. 753 gc.CheckLoweredGetClosurePtr(v) 754 case ssa.OpAMD64LoweredGetG: 755 r := gc.SSARegNum(v) 756 // See the comments in cmd/internal/obj/x86/obj6.go 757 // near CanUse1InsnTLS for a detailed explanation of these instructions. 758 if x86.CanUse1InsnTLS(gc.Ctxt) { 759 // MOVQ (TLS), r 760 p := gc.Prog(x86.AMOVQ) 761 p.From.Type = obj.TYPE_MEM 762 p.From.Reg = x86.REG_TLS 763 p.To.Type = obj.TYPE_REG 764 p.To.Reg = r 765 } else { 766 // MOVQ TLS, r 767 // MOVQ (r)(TLS*1), r 768 p := gc.Prog(x86.AMOVQ) 769 p.From.Type = obj.TYPE_REG 770 p.From.Reg = x86.REG_TLS 771 p.To.Type = obj.TYPE_REG 772 p.To.Reg = r 773 q := gc.Prog(x86.AMOVQ) 774 q.From.Type = obj.TYPE_MEM 775 q.From.Reg = r 776 q.From.Index = x86.REG_TLS 777 q.From.Scale = 1 778 q.To.Type = obj.TYPE_REG 779 q.To.Reg = r 780 } 781 case ssa.OpAMD64CALLstatic: 782 if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { 783 // Deferred calls will appear to be returning to 784 // the CALL deferreturn(SB) that we are about to emit. 785 // However, the stack trace code will show the line 786 // of the instruction byte before the return PC. 787 // To avoid that being an unrelated instruction, 788 // insert an actual hardware NOP that will have the right line number. 789 // This is different from obj.ANOP, which is a virtual no-op 790 // that doesn't make it into the instruction stream. 791 ginsnop() 792 } 793 p := gc.Prog(obj.ACALL) 794 p.To.Type = obj.TYPE_MEM 795 p.To.Name = obj.NAME_EXTERN 796 p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) 797 if gc.Maxarg < v.AuxInt { 798 gc.Maxarg = v.AuxInt 799 } 800 case ssa.OpAMD64CALLclosure: 801 p := gc.Prog(obj.ACALL) 802 p.To.Type = obj.TYPE_REG 803 p.To.Reg = gc.SSARegNum(v.Args[0]) 804 if gc.Maxarg < v.AuxInt { 805 gc.Maxarg = v.AuxInt 806 } 807 case ssa.OpAMD64CALLdefer: 808 p := gc.Prog(obj.ACALL) 809 p.To.Type = obj.TYPE_MEM 810 p.To.Name = obj.NAME_EXTERN 811 p.To.Sym = gc.Linksym(gc.Deferproc.Sym) 812 if gc.Maxarg < v.AuxInt { 813 gc.Maxarg = v.AuxInt 814 } 815 case ssa.OpAMD64CALLgo: 816 p := gc.Prog(obj.ACALL) 817 p.To.Type = obj.TYPE_MEM 818 p.To.Name = obj.NAME_EXTERN 819 p.To.Sym = gc.Linksym(gc.Newproc.Sym) 820 if gc.Maxarg < v.AuxInt { 821 gc.Maxarg = v.AuxInt 822 } 823 case ssa.OpAMD64CALLinter: 824 p := gc.Prog(obj.ACALL) 825 p.To.Type = obj.TYPE_REG 826 p.To.Reg = gc.SSARegNum(v.Args[0]) 827 if gc.Maxarg < v.AuxInt { 828 gc.Maxarg = v.AuxInt 829 } 830 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 831 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 832 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 833 r := gc.SSARegNum(v) 834 if r != gc.SSARegNum(v.Args[0]) { 835 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 836 } 837 p := gc.Prog(v.Op.Asm()) 838 p.To.Type = obj.TYPE_REG 839 p.To.Reg = r 840 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL: 841 p := gc.Prog(v.Op.Asm()) 842 p.From.Type = obj.TYPE_REG 843 p.From.Reg = gc.SSARegNum(v.Args[0]) 844 p.To.Type = obj.TYPE_REG 845 p.To.Reg = gc.SSARegNum0(v) 846 case ssa.OpAMD64SQRTSD: 847 p := gc.Prog(v.Op.Asm()) 848 p.From.Type = obj.TYPE_REG 849 p.From.Reg = gc.SSARegNum(v.Args[0]) 850 p.To.Type = obj.TYPE_REG 851 p.To.Reg = gc.SSARegNum(v) 852 case ssa.OpSP, ssa.OpSB: 853 // nothing to do 854 case ssa.OpSelect0, ssa.OpSelect1: 855 // nothing to do 856 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 857 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 858 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 859 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 860 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 861 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 862 ssa.OpAMD64SETA, ssa.OpAMD64SETAE: 863 p := gc.Prog(v.Op.Asm()) 864 p.To.Type = obj.TYPE_REG 865 p.To.Reg = gc.SSARegNum(v) 866 867 case ssa.OpAMD64SETNEF: 868 p := gc.Prog(v.Op.Asm()) 869 p.To.Type = obj.TYPE_REG 870 p.To.Reg = gc.SSARegNum(v) 871 q := gc.Prog(x86.ASETPS) 872 q.To.Type = obj.TYPE_REG 873 q.To.Reg = x86.REG_AX 874 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 875 opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) 876 877 case ssa.OpAMD64SETEQF: 878 p := gc.Prog(v.Op.Asm()) 879 p.To.Type = obj.TYPE_REG 880 p.To.Reg = gc.SSARegNum(v) 881 q := gc.Prog(x86.ASETPC) 882 q.To.Type = obj.TYPE_REG 883 q.To.Reg = x86.REG_AX 884 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 885 opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) 886 887 case ssa.OpAMD64InvertFlags: 888 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 889 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 890 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 891 case ssa.OpAMD64REPSTOSQ: 892 gc.Prog(x86.AREP) 893 gc.Prog(x86.ASTOSQ) 894 case ssa.OpAMD64REPMOVSQ: 895 gc.Prog(x86.AREP) 896 gc.Prog(x86.AMOVSQ) 897 case ssa.OpVarDef: 898 gc.Gvardef(v.Aux.(*gc.Node)) 899 case ssa.OpVarKill: 900 gc.Gvarkill(v.Aux.(*gc.Node)) 901 case ssa.OpVarLive: 902 gc.Gvarlive(v.Aux.(*gc.Node)) 903 case ssa.OpKeepAlive: 904 if !v.Args[0].Type.IsPtrShaped() { 905 v.Fatalf("keeping non-pointer alive %v", v.Args[0]) 906 } 907 n, off := gc.AutoVar(v.Args[0]) 908 if n == nil { 909 v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) 910 } 911 if off != 0 { 912 v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) 913 } 914 gc.Gvarlive(n) 915 case ssa.OpAMD64LoweredNilCheck: 916 // Optimization - if the subsequent block has a load or store 917 // at the same address, we don't need to issue this instruction. 918 mem := v.Args[1] 919 for _, w := range v.Block.Succs[0].Block().Values { 920 if w.Op == ssa.OpPhi { 921 if w.Type.IsMemory() { 922 mem = w 923 } 924 continue 925 } 926 if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { 927 // w doesn't use a store - can't be a memory op. 928 continue 929 } 930 if w.Args[len(w.Args)-1] != mem { 931 v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) 932 } 933 switch w.Op { 934 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, 935 ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, 936 ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, 937 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, 938 ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore, 939 ssa.OpAMD64MOVQatomicload, ssa.OpAMD64MOVLatomicload: 940 if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { 941 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 942 gc.Warnl(v.Line, "removed nil check") 943 } 944 return 945 } 946 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 947 off := ssa.ValAndOff(v.AuxInt).Off() 948 if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { 949 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 950 gc.Warnl(v.Line, "removed nil check") 951 } 952 return 953 } 954 } 955 if w.Type.IsMemory() || w.Type.IsTuple() && w.Type.FieldType(1).IsMemory() { 956 if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { 957 // these ops are OK 958 mem = w 959 continue 960 } 961 // We can't delay the nil check past the next store. 962 break 963 } 964 } 965 // Issue a load which will fault if the input is nil. 966 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 967 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 968 // but it doesn't have false dependency on AX. 969 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 970 // That trades clobbering flags for clobbering a register. 971 p := gc.Prog(x86.ATESTB) 972 p.From.Type = obj.TYPE_REG 973 p.From.Reg = x86.REG_AX 974 p.To.Type = obj.TYPE_MEM 975 p.To.Reg = gc.SSARegNum(v.Args[0]) 976 gc.AddAux(&p.To, v) 977 if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers 978 gc.Warnl(v.Line, "generated nil check") 979 } 980 case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 981 p := gc.Prog(v.Op.Asm()) 982 p.From.Type = obj.TYPE_MEM 983 p.From.Reg = gc.SSARegNum(v.Args[0]) 984 gc.AddAux(&p.From, v) 985 p.To.Type = obj.TYPE_REG 986 p.To.Reg = gc.SSARegNum0(v) 987 case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 988 r := gc.SSARegNum0(v) 989 if r != gc.SSARegNum(v.Args[0]) { 990 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 991 } 992 p := gc.Prog(v.Op.Asm()) 993 p.From.Type = obj.TYPE_REG 994 p.From.Reg = r 995 p.To.Type = obj.TYPE_MEM 996 p.To.Reg = gc.SSARegNum(v.Args[1]) 997 gc.AddAux(&p.To, v) 998 default: 999 v.Unimplementedf("genValue not implemented: %s", v.LongString()) 1000 } 1001 } 1002 1003 var blockJump = [...]struct { 1004 asm, invasm obj.As 1005 }{ 1006 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1007 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1008 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1009 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1010 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1011 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1012 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1013 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1014 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1015 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1016 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1017 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1018 } 1019 1020 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 1021 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1022 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1023 } 1024 var nefJumps = [2][2]gc.FloatingEQNEJump{ 1025 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1026 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1027 } 1028 1029 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1030 s.SetLineno(b.Line) 1031 1032 switch b.Kind { 1033 case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck: 1034 if b.Succs[0].Block() != next { 1035 p := gc.Prog(obj.AJMP) 1036 p.To.Type = obj.TYPE_BRANCH 1037 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1038 } 1039 case ssa.BlockDefer: 1040 // defer returns in rax: 1041 // 0 if we should continue executing 1042 // 1 if we should jump to deferreturn call 1043 p := gc.Prog(x86.ATESTL) 1044 p.From.Type = obj.TYPE_REG 1045 p.From.Reg = x86.REG_AX 1046 p.To.Type = obj.TYPE_REG 1047 p.To.Reg = x86.REG_AX 1048 p = gc.Prog(x86.AJNE) 1049 p.To.Type = obj.TYPE_BRANCH 1050 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1051 if b.Succs[0].Block() != next { 1052 p := gc.Prog(obj.AJMP) 1053 p.To.Type = obj.TYPE_BRANCH 1054 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1055 } 1056 case ssa.BlockExit: 1057 gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1058 case ssa.BlockRet: 1059 gc.Prog(obj.ARET) 1060 case ssa.BlockRetJmp: 1061 p := gc.Prog(obj.AJMP) 1062 p.To.Type = obj.TYPE_MEM 1063 p.To.Name = obj.NAME_EXTERN 1064 p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) 1065 1066 case ssa.BlockAMD64EQF: 1067 gc.SSAGenFPJump(s, b, next, &eqfJumps) 1068 1069 case ssa.BlockAMD64NEF: 1070 gc.SSAGenFPJump(s, b, next, &nefJumps) 1071 1072 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1073 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1074 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1075 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1076 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1077 jmp := blockJump[b.Kind] 1078 likely := b.Likely 1079 var p *obj.Prog 1080 switch next { 1081 case b.Succs[0].Block(): 1082 p = gc.Prog(jmp.invasm) 1083 likely *= -1 1084 p.To.Type = obj.TYPE_BRANCH 1085 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1086 case b.Succs[1].Block(): 1087 p = gc.Prog(jmp.asm) 1088 p.To.Type = obj.TYPE_BRANCH 1089 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1090 default: 1091 p = gc.Prog(jmp.asm) 1092 p.To.Type = obj.TYPE_BRANCH 1093 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1094 q := gc.Prog(obj.AJMP) 1095 q.To.Type = obj.TYPE_BRANCH 1096 s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) 1097 } 1098 1099 // liblink reorders the instruction stream as it sees fit. 1100 // Pass along what we know so liblink can make use of it. 1101 // TODO: Once we've fully switched to SSA, 1102 // make liblink leave our output alone. 1103 switch likely { 1104 case ssa.BranchUnlikely: 1105 p.From.Type = obj.TYPE_CONST 1106 p.From.Offset = 0 1107 case ssa.BranchLikely: 1108 p.From.Type = obj.TYPE_CONST 1109 p.From.Offset = 1 1110 } 1111 1112 default: 1113 b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1114 } 1115 }