github.com/robhaswell/grandperspective-scan@v0.1.0/test/go-go1.7.1/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/internal/obj" 14 "cmd/internal/obj/x86" 15 ) 16 17 // Smallest possible faulting page at address zero. 18 const minZeroPage = 4096 19 20 // ssaRegToReg maps ssa register numbers to obj register numbers. 21 var ssaRegToReg = []int16{ 22 x86.REG_AX, 23 x86.REG_CX, 24 x86.REG_DX, 25 x86.REG_BX, 26 x86.REG_SP, 27 x86.REG_BP, 28 x86.REG_SI, 29 x86.REG_DI, 30 x86.REG_R8, 31 x86.REG_R9, 32 x86.REG_R10, 33 x86.REG_R11, 34 x86.REG_R12, 35 x86.REG_R13, 36 x86.REG_R14, 37 x86.REG_R15, 38 x86.REG_X0, 39 x86.REG_X1, 40 x86.REG_X2, 41 x86.REG_X3, 42 x86.REG_X4, 43 x86.REG_X5, 44 x86.REG_X6, 45 x86.REG_X7, 46 x86.REG_X8, 47 x86.REG_X9, 48 x86.REG_X10, 49 x86.REG_X11, 50 x86.REG_X12, 51 x86.REG_X13, 52 x86.REG_X14, 53 x86.REG_X15, 54 0, // SB isn't a real register. We fill an Addr.Reg field with 0 in this case. 55 } 56 57 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 58 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 59 flive := b.FlagsLiveAtEnd 60 if b.Control != nil && b.Control.Type.IsFlags() { 61 flive = true 62 } 63 for i := len(b.Values) - 1; i >= 0; i-- { 64 v := b.Values[i] 65 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 66 // The "mark" is any non-nil Aux value. 67 v.Aux = v 68 } 69 if v.Type.IsFlags() { 70 flive = false 71 } 72 for _, a := range v.Args { 73 if a.Type.IsFlags() { 74 flive = true 75 } 76 } 77 } 78 } 79 80 // loadByType returns the load instruction of the given type. 81 func loadByType(t ssa.Type) obj.As { 82 // Avoid partial register write 83 if !t.IsFloat() && t.Size() <= 2 { 84 if t.Size() == 1 { 85 return x86.AMOVBLZX 86 } else { 87 return x86.AMOVWLZX 88 } 89 } 90 // Otherwise, there's no difference between load and store opcodes. 91 return storeByType(t) 92 } 93 94 // storeByType returns the store instruction of the given type. 95 func storeByType(t ssa.Type) obj.As { 96 width := t.Size() 97 if t.IsFloat() { 98 switch width { 99 case 4: 100 return x86.AMOVSS 101 case 8: 102 return x86.AMOVSD 103 } 104 } else { 105 switch width { 106 case 1: 107 return x86.AMOVB 108 case 2: 109 return x86.AMOVW 110 case 4: 111 return x86.AMOVL 112 case 8: 113 return x86.AMOVQ 114 } 115 } 116 panic("bad store type") 117 } 118 119 // moveByType returns the reg->reg move instruction of the given type. 120 func moveByType(t ssa.Type) obj.As { 121 if t.IsFloat() { 122 // Moving the whole sse2 register is faster 123 // than moving just the correct low portion of it. 124 // There is no xmm->xmm move with 1 byte opcode, 125 // so use movups, which has 2 byte opcode. 126 return x86.AMOVUPS 127 } else { 128 switch t.Size() { 129 case 1: 130 // Avoids partial register write 131 return x86.AMOVL 132 case 2: 133 return x86.AMOVL 134 case 4: 135 return x86.AMOVL 136 case 8: 137 return x86.AMOVQ 138 case 16: 139 return x86.AMOVUPS // int128s are in SSE registers 140 default: 141 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 142 } 143 } 144 } 145 146 // opregreg emits instructions for 147 // dest := dest(To) op src(From) 148 // and also returns the created obj.Prog so it 149 // may be further adjusted (offset, scale, etc). 150 func opregreg(op obj.As, dest, src int16) *obj.Prog { 151 p := gc.Prog(op) 152 p.From.Type = obj.TYPE_REG 153 p.To.Type = obj.TYPE_REG 154 p.To.Reg = dest 155 p.From.Reg = src 156 return p 157 } 158 159 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD, 160 // See runtime/mkduff.go. 161 func duffStart(size int64) int64 { 162 x, _ := duff(size) 163 return x 164 } 165 func duffAdj(size int64) int64 { 166 _, x := duff(size) 167 return x 168 } 169 170 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 171 // required to use the duffzero mechanism for a block of the given size. 172 func duff(size int64) (int64, int64) { 173 if size < 32 || size > 1024 || size%dzClearStep != 0 { 174 panic("bad duffzero size") 175 } 176 steps := size / dzClearStep 177 blocks := steps / dzBlockLen 178 steps %= dzBlockLen 179 off := dzBlockSize * (dzBlocks - blocks) 180 var adj int64 181 if steps != 0 { 182 off -= dzAddSize 183 off -= dzMovSize * steps 184 adj -= dzClearStep * (dzBlockLen - steps) 185 } 186 return off, adj 187 } 188 189 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 190 s.SetLineno(v.Line) 191 switch v.Op { 192 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 193 r := gc.SSARegNum(v) 194 r1 := gc.SSARegNum(v.Args[0]) 195 r2 := gc.SSARegNum(v.Args[1]) 196 switch { 197 case r == r1: 198 p := gc.Prog(v.Op.Asm()) 199 p.From.Type = obj.TYPE_REG 200 p.From.Reg = r2 201 p.To.Type = obj.TYPE_REG 202 p.To.Reg = r 203 case r == r2: 204 p := gc.Prog(v.Op.Asm()) 205 p.From.Type = obj.TYPE_REG 206 p.From.Reg = r1 207 p.To.Type = obj.TYPE_REG 208 p.To.Reg = r 209 default: 210 var asm obj.As 211 if v.Op == ssa.OpAMD64ADDQ { 212 asm = x86.ALEAQ 213 } else { 214 asm = x86.ALEAL 215 } 216 p := gc.Prog(asm) 217 p.From.Type = obj.TYPE_MEM 218 p.From.Reg = r1 219 p.From.Scale = 1 220 p.From.Index = r2 221 p.To.Type = obj.TYPE_REG 222 p.To.Reg = r 223 } 224 // 2-address opcode arithmetic 225 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 226 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 227 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 228 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 229 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 230 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 231 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 232 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 233 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 234 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 235 ssa.OpAMD64PXOR: 236 r := gc.SSARegNum(v) 237 if r != gc.SSARegNum(v.Args[0]) { 238 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 239 } 240 opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1])) 241 242 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW, 243 ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU, 244 ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW, 245 ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU: 246 247 // Arg[0] is already in AX as it's the only register we allow 248 // and AX is the only output 249 x := gc.SSARegNum(v.Args[1]) 250 251 // CPU faults upon signed overflow, which occurs when most 252 // negative int is divided by -1. 253 var j *obj.Prog 254 if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || 255 v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ || 256 v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW { 257 258 var c *obj.Prog 259 switch v.Op { 260 case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ: 261 c = gc.Prog(x86.ACMPQ) 262 j = gc.Prog(x86.AJEQ) 263 // go ahead and sign extend to save doing it later 264 gc.Prog(x86.ACQO) 265 266 case ssa.OpAMD64DIVL, ssa.OpAMD64MODL: 267 c = gc.Prog(x86.ACMPL) 268 j = gc.Prog(x86.AJEQ) 269 gc.Prog(x86.ACDQ) 270 271 case ssa.OpAMD64DIVW, ssa.OpAMD64MODW: 272 c = gc.Prog(x86.ACMPW) 273 j = gc.Prog(x86.AJEQ) 274 gc.Prog(x86.ACWD) 275 } 276 c.From.Type = obj.TYPE_REG 277 c.From.Reg = x 278 c.To.Type = obj.TYPE_CONST 279 c.To.Offset = -1 280 281 j.To.Type = obj.TYPE_BRANCH 282 283 } 284 285 // for unsigned ints, we sign extend by setting DX = 0 286 // signed ints were sign extended above 287 if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU || 288 v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU || 289 v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU { 290 c := gc.Prog(x86.AXORQ) 291 c.From.Type = obj.TYPE_REG 292 c.From.Reg = x86.REG_DX 293 c.To.Type = obj.TYPE_REG 294 c.To.Reg = x86.REG_DX 295 } 296 297 p := gc.Prog(v.Op.Asm()) 298 p.From.Type = obj.TYPE_REG 299 p.From.Reg = x 300 301 // signed division, rest of the check for -1 case 302 if j != nil { 303 j2 := gc.Prog(obj.AJMP) 304 j2.To.Type = obj.TYPE_BRANCH 305 306 var n *obj.Prog 307 if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || 308 v.Op == ssa.OpAMD64DIVW { 309 // n * -1 = -n 310 n = gc.Prog(x86.ANEGQ) 311 n.To.Type = obj.TYPE_REG 312 n.To.Reg = x86.REG_AX 313 } else { 314 // n % -1 == 0 315 n = gc.Prog(x86.AXORQ) 316 n.From.Type = obj.TYPE_REG 317 n.From.Reg = x86.REG_DX 318 n.To.Type = obj.TYPE_REG 319 n.To.Reg = x86.REG_DX 320 } 321 322 j.To.Val = n 323 j2.To.Val = s.Pc() 324 } 325 326 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB, 327 ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU: 328 // the frontend rewrites constant division by 8/16/32 bit integers into 329 // HMUL by a constant 330 // SSA rewrites generate the 64 bit versions 331 332 // Arg[0] is already in AX as it's the only register we allow 333 // and DX is the only output we care about (the high bits) 334 p := gc.Prog(v.Op.Asm()) 335 p.From.Type = obj.TYPE_REG 336 p.From.Reg = gc.SSARegNum(v.Args[1]) 337 338 // IMULB puts the high portion in AH instead of DL, 339 // so move it to DL for consistency 340 if v.Type.Size() == 1 { 341 m := gc.Prog(x86.AMOVB) 342 m.From.Type = obj.TYPE_REG 343 m.From.Reg = x86.REG_AH 344 m.To.Type = obj.TYPE_REG 345 m.To.Reg = x86.REG_DX 346 } 347 348 case ssa.OpAMD64AVGQU: 349 // compute (x+y)/2 unsigned. 350 // Do a 64-bit add, the overflow goes into the carry. 351 // Shift right once and pull the carry back into the 63rd bit. 352 r := gc.SSARegNum(v) 353 if r != gc.SSARegNum(v.Args[0]) { 354 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 355 } 356 p := gc.Prog(x86.AADDQ) 357 p.From.Type = obj.TYPE_REG 358 p.To.Type = obj.TYPE_REG 359 p.To.Reg = r 360 p.From.Reg = gc.SSARegNum(v.Args[1]) 361 p = gc.Prog(x86.ARCRQ) 362 p.From.Type = obj.TYPE_CONST 363 p.From.Offset = 1 364 p.To.Type = obj.TYPE_REG 365 p.To.Reg = r 366 367 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 368 r := gc.SSARegNum(v) 369 a := gc.SSARegNum(v.Args[0]) 370 if r == a { 371 if v.AuxInt == 1 { 372 var asm obj.As 373 // Software optimization manual recommends add $1,reg. 374 // But inc/dec is 1 byte smaller. ICC always uses inc 375 // Clang/GCC choose depending on flags, but prefer add. 376 // Experiments show that inc/dec is both a little faster 377 // and make a binary a little smaller. 378 if v.Op == ssa.OpAMD64ADDQconst { 379 asm = x86.AINCQ 380 } else { 381 asm = x86.AINCL 382 } 383 p := gc.Prog(asm) 384 p.To.Type = obj.TYPE_REG 385 p.To.Reg = r 386 return 387 } 388 if v.AuxInt == -1 { 389 var asm obj.As 390 if v.Op == ssa.OpAMD64ADDQconst { 391 asm = x86.ADECQ 392 } else { 393 asm = x86.ADECL 394 } 395 p := gc.Prog(asm) 396 p.To.Type = obj.TYPE_REG 397 p.To.Reg = r 398 return 399 } 400 p := gc.Prog(v.Op.Asm()) 401 p.From.Type = obj.TYPE_CONST 402 p.From.Offset = v.AuxInt 403 p.To.Type = obj.TYPE_REG 404 p.To.Reg = r 405 return 406 } 407 var asm obj.As 408 if v.Op == ssa.OpAMD64ADDQconst { 409 asm = x86.ALEAQ 410 } else { 411 asm = x86.ALEAL 412 } 413 p := gc.Prog(asm) 414 p.From.Type = obj.TYPE_MEM 415 p.From.Reg = a 416 p.From.Offset = v.AuxInt 417 p.To.Type = obj.TYPE_REG 418 p.To.Reg = r 419 420 case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst, 421 ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst: 422 r := gc.SSARegNum(v) 423 if r != gc.SSARegNum(v.Args[0]) { 424 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 425 } 426 427 // Constant into AX 428 p := gc.Prog(moveByType(v.Type)) 429 p.From.Type = obj.TYPE_CONST 430 p.From.Offset = v.AuxInt 431 p.To.Type = obj.TYPE_REG 432 p.To.Reg = x86.REG_AX 433 434 p = gc.Prog(v.Op.Asm()) 435 p.From.Type = obj.TYPE_REG 436 p.From.Reg = x86.REG_AX 437 p.To.Type = obj.TYPE_REG 438 p.To.Reg = r 439 440 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 441 r := gc.SSARegNum(v) 442 if r != gc.SSARegNum(v.Args[0]) { 443 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 444 } 445 p := gc.Prog(v.Op.Asm()) 446 p.From.Type = obj.TYPE_CONST 447 p.From.Offset = v.AuxInt 448 p.To.Type = obj.TYPE_REG 449 p.To.Reg = r 450 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 451 // then we don't need to use resultInArg0 for these ops. 452 //p.From3 = new(obj.Addr) 453 //p.From3.Type = obj.TYPE_REG 454 //p.From3.Reg = gc.SSARegNum(v.Args[0]) 455 456 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 457 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 458 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 459 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 460 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 461 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 462 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 463 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 464 r := gc.SSARegNum(v) 465 if r != gc.SSARegNum(v.Args[0]) { 466 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 467 } 468 p := gc.Prog(v.Op.Asm()) 469 p.From.Type = obj.TYPE_CONST 470 p.From.Offset = v.AuxInt 471 p.To.Type = obj.TYPE_REG 472 p.To.Reg = r 473 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 474 r := gc.SSARegNum(v) 475 p := gc.Prog(v.Op.Asm()) 476 p.From.Type = obj.TYPE_REG 477 p.From.Reg = r 478 p.To.Type = obj.TYPE_REG 479 p.To.Reg = r 480 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 481 r := gc.SSARegNum(v.Args[0]) 482 i := gc.SSARegNum(v.Args[1]) 483 p := gc.Prog(x86.ALEAQ) 484 switch v.Op { 485 case ssa.OpAMD64LEAQ1: 486 p.From.Scale = 1 487 if i == x86.REG_SP { 488 r, i = i, r 489 } 490 case ssa.OpAMD64LEAQ2: 491 p.From.Scale = 2 492 case ssa.OpAMD64LEAQ4: 493 p.From.Scale = 4 494 case ssa.OpAMD64LEAQ8: 495 p.From.Scale = 8 496 } 497 p.From.Type = obj.TYPE_MEM 498 p.From.Reg = r 499 p.From.Index = i 500 gc.AddAux(&p.From, v) 501 p.To.Type = obj.TYPE_REG 502 p.To.Reg = gc.SSARegNum(v) 503 case ssa.OpAMD64LEAQ: 504 p := gc.Prog(x86.ALEAQ) 505 p.From.Type = obj.TYPE_MEM 506 p.From.Reg = gc.SSARegNum(v.Args[0]) 507 gc.AddAux(&p.From, v) 508 p.To.Type = obj.TYPE_REG 509 p.To.Reg = gc.SSARegNum(v) 510 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 511 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB: 512 opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) 513 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 514 // Go assembler has swapped operands for UCOMISx relative to CMP, 515 // must account for that right here. 516 opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) 517 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 518 p := gc.Prog(v.Op.Asm()) 519 p.From.Type = obj.TYPE_REG 520 p.From.Reg = gc.SSARegNum(v.Args[0]) 521 p.To.Type = obj.TYPE_CONST 522 p.To.Offset = v.AuxInt 523 case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst: 524 p := gc.Prog(v.Op.Asm()) 525 p.From.Type = obj.TYPE_CONST 526 p.From.Offset = v.AuxInt 527 p.To.Type = obj.TYPE_REG 528 p.To.Reg = gc.SSARegNum(v.Args[0]) 529 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 530 x := gc.SSARegNum(v) 531 p := gc.Prog(v.Op.Asm()) 532 p.From.Type = obj.TYPE_CONST 533 p.From.Offset = v.AuxInt 534 p.To.Type = obj.TYPE_REG 535 p.To.Reg = x 536 // If flags are live at this instruction, suppress the 537 // MOV $0,AX -> XOR AX,AX optimization. 538 if v.Aux != nil { 539 p.Mark |= x86.PRESERVEFLAGS 540 } 541 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 542 x := gc.SSARegNum(v) 543 p := gc.Prog(v.Op.Asm()) 544 p.From.Type = obj.TYPE_FCONST 545 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 546 p.To.Type = obj.TYPE_REG 547 p.To.Reg = x 548 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 549 p := gc.Prog(v.Op.Asm()) 550 p.From.Type = obj.TYPE_MEM 551 p.From.Reg = gc.SSARegNum(v.Args[0]) 552 gc.AddAux(&p.From, v) 553 p.To.Type = obj.TYPE_REG 554 p.To.Reg = gc.SSARegNum(v) 555 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: 556 p := gc.Prog(v.Op.Asm()) 557 p.From.Type = obj.TYPE_MEM 558 p.From.Reg = gc.SSARegNum(v.Args[0]) 559 gc.AddAux(&p.From, v) 560 p.From.Scale = 8 561 p.From.Index = gc.SSARegNum(v.Args[1]) 562 p.To.Type = obj.TYPE_REG 563 p.To.Reg = gc.SSARegNum(v) 564 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 565 p := gc.Prog(v.Op.Asm()) 566 p.From.Type = obj.TYPE_MEM 567 p.From.Reg = gc.SSARegNum(v.Args[0]) 568 gc.AddAux(&p.From, v) 569 p.From.Scale = 4 570 p.From.Index = gc.SSARegNum(v.Args[1]) 571 p.To.Type = obj.TYPE_REG 572 p.To.Reg = gc.SSARegNum(v) 573 case ssa.OpAMD64MOVWloadidx2: 574 p := gc.Prog(v.Op.Asm()) 575 p.From.Type = obj.TYPE_MEM 576 p.From.Reg = gc.SSARegNum(v.Args[0]) 577 gc.AddAux(&p.From, v) 578 p.From.Scale = 2 579 p.From.Index = gc.SSARegNum(v.Args[1]) 580 p.To.Type = obj.TYPE_REG 581 p.To.Reg = gc.SSARegNum(v) 582 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 583 r := gc.SSARegNum(v.Args[0]) 584 i := gc.SSARegNum(v.Args[1]) 585 if i == x86.REG_SP { 586 r, i = i, r 587 } 588 p := gc.Prog(v.Op.Asm()) 589 p.From.Type = obj.TYPE_MEM 590 p.From.Reg = r 591 p.From.Scale = 1 592 p.From.Index = i 593 gc.AddAux(&p.From, v) 594 p.To.Type = obj.TYPE_REG 595 p.To.Reg = gc.SSARegNum(v) 596 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: 597 p := gc.Prog(v.Op.Asm()) 598 p.From.Type = obj.TYPE_REG 599 p.From.Reg = gc.SSARegNum(v.Args[1]) 600 p.To.Type = obj.TYPE_MEM 601 p.To.Reg = gc.SSARegNum(v.Args[0]) 602 gc.AddAux(&p.To, v) 603 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: 604 p := gc.Prog(v.Op.Asm()) 605 p.From.Type = obj.TYPE_REG 606 p.From.Reg = gc.SSARegNum(v.Args[2]) 607 p.To.Type = obj.TYPE_MEM 608 p.To.Reg = gc.SSARegNum(v.Args[0]) 609 p.To.Scale = 8 610 p.To.Index = gc.SSARegNum(v.Args[1]) 611 gc.AddAux(&p.To, v) 612 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 613 p := gc.Prog(v.Op.Asm()) 614 p.From.Type = obj.TYPE_REG 615 p.From.Reg = gc.SSARegNum(v.Args[2]) 616 p.To.Type = obj.TYPE_MEM 617 p.To.Reg = gc.SSARegNum(v.Args[0]) 618 p.To.Scale = 4 619 p.To.Index = gc.SSARegNum(v.Args[1]) 620 gc.AddAux(&p.To, v) 621 case ssa.OpAMD64MOVWstoreidx2: 622 p := gc.Prog(v.Op.Asm()) 623 p.From.Type = obj.TYPE_REG 624 p.From.Reg = gc.SSARegNum(v.Args[2]) 625 p.To.Type = obj.TYPE_MEM 626 p.To.Reg = gc.SSARegNum(v.Args[0]) 627 p.To.Scale = 2 628 p.To.Index = gc.SSARegNum(v.Args[1]) 629 gc.AddAux(&p.To, v) 630 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 631 r := gc.SSARegNum(v.Args[0]) 632 i := gc.SSARegNum(v.Args[1]) 633 if i == x86.REG_SP { 634 r, i = i, r 635 } 636 p := gc.Prog(v.Op.Asm()) 637 p.From.Type = obj.TYPE_REG 638 p.From.Reg = gc.SSARegNum(v.Args[2]) 639 p.To.Type = obj.TYPE_MEM 640 p.To.Reg = r 641 p.To.Scale = 1 642 p.To.Index = i 643 gc.AddAux(&p.To, v) 644 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 645 p := gc.Prog(v.Op.Asm()) 646 p.From.Type = obj.TYPE_CONST 647 sc := v.AuxValAndOff() 648 p.From.Offset = sc.Val() 649 p.To.Type = obj.TYPE_MEM 650 p.To.Reg = gc.SSARegNum(v.Args[0]) 651 gc.AddAux2(&p.To, v, sc.Off()) 652 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 653 p := gc.Prog(v.Op.Asm()) 654 p.From.Type = obj.TYPE_CONST 655 sc := v.AuxValAndOff() 656 p.From.Offset = sc.Val() 657 r := gc.SSARegNum(v.Args[0]) 658 i := gc.SSARegNum(v.Args[1]) 659 switch v.Op { 660 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 661 p.To.Scale = 1 662 if i == x86.REG_SP { 663 r, i = i, r 664 } 665 case ssa.OpAMD64MOVWstoreconstidx2: 666 p.To.Scale = 2 667 case ssa.OpAMD64MOVLstoreconstidx4: 668 p.To.Scale = 4 669 case ssa.OpAMD64MOVQstoreconstidx8: 670 p.To.Scale = 8 671 } 672 p.To.Type = obj.TYPE_MEM 673 p.To.Reg = r 674 p.To.Index = i 675 gc.AddAux2(&p.To, v, sc.Off()) 676 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 677 ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, 678 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 679 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 680 opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) 681 case ssa.OpAMD64DUFFZERO: 682 off := duffStart(v.AuxInt) 683 adj := duffAdj(v.AuxInt) 684 var p *obj.Prog 685 if adj != 0 { 686 p = gc.Prog(x86.AADDQ) 687 p.From.Type = obj.TYPE_CONST 688 p.From.Offset = adj 689 p.To.Type = obj.TYPE_REG 690 p.To.Reg = x86.REG_DI 691 } 692 p = gc.Prog(obj.ADUFFZERO) 693 p.To.Type = obj.TYPE_ADDR 694 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 695 p.To.Offset = off 696 case ssa.OpAMD64MOVOconst: 697 if v.AuxInt != 0 { 698 v.Unimplementedf("MOVOconst can only do constant=0") 699 } 700 r := gc.SSARegNum(v) 701 opregreg(x86.AXORPS, r, r) 702 case ssa.OpAMD64DUFFCOPY: 703 p := gc.Prog(obj.ADUFFCOPY) 704 p.To.Type = obj.TYPE_ADDR 705 p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) 706 p.To.Offset = v.AuxInt 707 708 case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 709 if v.Type.IsMemory() { 710 return 711 } 712 x := gc.SSARegNum(v.Args[0]) 713 y := gc.SSARegNum(v) 714 if x != y { 715 opregreg(moveByType(v.Type), y, x) 716 } 717 case ssa.OpLoadReg: 718 if v.Type.IsFlags() { 719 v.Unimplementedf("load flags not implemented: %v", v.LongString()) 720 return 721 } 722 p := gc.Prog(loadByType(v.Type)) 723 n, off := gc.AutoVar(v.Args[0]) 724 p.From.Type = obj.TYPE_MEM 725 p.From.Node = n 726 p.From.Sym = gc.Linksym(n.Sym) 727 p.From.Offset = off 728 if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { 729 p.From.Name = obj.NAME_PARAM 730 p.From.Offset += n.Xoffset 731 } else { 732 p.From.Name = obj.NAME_AUTO 733 } 734 p.To.Type = obj.TYPE_REG 735 p.To.Reg = gc.SSARegNum(v) 736 737 case ssa.OpStoreReg: 738 if v.Type.IsFlags() { 739 v.Unimplementedf("store flags not implemented: %v", v.LongString()) 740 return 741 } 742 p := gc.Prog(storeByType(v.Type)) 743 p.From.Type = obj.TYPE_REG 744 p.From.Reg = gc.SSARegNum(v.Args[0]) 745 n, off := gc.AutoVar(v) 746 p.To.Type = obj.TYPE_MEM 747 p.To.Node = n 748 p.To.Sym = gc.Linksym(n.Sym) 749 p.To.Offset = off 750 if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { 751 p.To.Name = obj.NAME_PARAM 752 p.To.Offset += n.Xoffset 753 } else { 754 p.To.Name = obj.NAME_AUTO 755 } 756 case ssa.OpPhi: 757 // just check to make sure regalloc and stackalloc did it right 758 if v.Type.IsMemory() { 759 return 760 } 761 f := v.Block.Func 762 loc := f.RegAlloc[v.ID] 763 for _, a := range v.Args { 764 if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead? 765 v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func) 766 } 767 } 768 case ssa.OpInitMem: 769 // memory arg needs no code 770 case ssa.OpArg: 771 // input args need no code 772 case ssa.OpAMD64LoweredGetClosurePtr: 773 // Output is hardwired to DX only, 774 // and DX contains the closure pointer on 775 // closure entry, and this "instruction" 776 // is scheduled to the very beginning 777 // of the entry block. 778 case ssa.OpAMD64LoweredGetG: 779 r := gc.SSARegNum(v) 780 // See the comments in cmd/internal/obj/x86/obj6.go 781 // near CanUse1InsnTLS for a detailed explanation of these instructions. 782 if x86.CanUse1InsnTLS(gc.Ctxt) { 783 // MOVQ (TLS), r 784 p := gc.Prog(x86.AMOVQ) 785 p.From.Type = obj.TYPE_MEM 786 p.From.Reg = x86.REG_TLS 787 p.To.Type = obj.TYPE_REG 788 p.To.Reg = r 789 } else { 790 // MOVQ TLS, r 791 // MOVQ (r)(TLS*1), r 792 p := gc.Prog(x86.AMOVQ) 793 p.From.Type = obj.TYPE_REG 794 p.From.Reg = x86.REG_TLS 795 p.To.Type = obj.TYPE_REG 796 p.To.Reg = r 797 q := gc.Prog(x86.AMOVQ) 798 q.From.Type = obj.TYPE_MEM 799 q.From.Reg = r 800 q.From.Index = x86.REG_TLS 801 q.From.Scale = 1 802 q.To.Type = obj.TYPE_REG 803 q.To.Reg = r 804 } 805 case ssa.OpAMD64CALLstatic: 806 if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { 807 // Deferred calls will appear to be returning to 808 // the CALL deferreturn(SB) that we are about to emit. 809 // However, the stack trace code will show the line 810 // of the instruction byte before the return PC. 811 // To avoid that being an unrelated instruction, 812 // insert an actual hardware NOP that will have the right line number. 813 // This is different from obj.ANOP, which is a virtual no-op 814 // that doesn't make it into the instruction stream. 815 ginsnop() 816 } 817 p := gc.Prog(obj.ACALL) 818 p.To.Type = obj.TYPE_MEM 819 p.To.Name = obj.NAME_EXTERN 820 p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) 821 if gc.Maxarg < v.AuxInt { 822 gc.Maxarg = v.AuxInt 823 } 824 case ssa.OpAMD64CALLclosure: 825 p := gc.Prog(obj.ACALL) 826 p.To.Type = obj.TYPE_REG 827 p.To.Reg = gc.SSARegNum(v.Args[0]) 828 if gc.Maxarg < v.AuxInt { 829 gc.Maxarg = v.AuxInt 830 } 831 case ssa.OpAMD64CALLdefer: 832 p := gc.Prog(obj.ACALL) 833 p.To.Type = obj.TYPE_MEM 834 p.To.Name = obj.NAME_EXTERN 835 p.To.Sym = gc.Linksym(gc.Deferproc.Sym) 836 if gc.Maxarg < v.AuxInt { 837 gc.Maxarg = v.AuxInt 838 } 839 case ssa.OpAMD64CALLgo: 840 p := gc.Prog(obj.ACALL) 841 p.To.Type = obj.TYPE_MEM 842 p.To.Name = obj.NAME_EXTERN 843 p.To.Sym = gc.Linksym(gc.Newproc.Sym) 844 if gc.Maxarg < v.AuxInt { 845 gc.Maxarg = v.AuxInt 846 } 847 case ssa.OpAMD64CALLinter: 848 p := gc.Prog(obj.ACALL) 849 p.To.Type = obj.TYPE_REG 850 p.To.Reg = gc.SSARegNum(v.Args[0]) 851 if gc.Maxarg < v.AuxInt { 852 gc.Maxarg = v.AuxInt 853 } 854 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 855 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 856 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 857 r := gc.SSARegNum(v) 858 if r != gc.SSARegNum(v.Args[0]) { 859 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 860 } 861 p := gc.Prog(v.Op.Asm()) 862 p.To.Type = obj.TYPE_REG 863 p.To.Reg = r 864 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW, 865 ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW, 866 ssa.OpAMD64SQRTSD: 867 p := gc.Prog(v.Op.Asm()) 868 p.From.Type = obj.TYPE_REG 869 p.From.Reg = gc.SSARegNum(v.Args[0]) 870 p.To.Type = obj.TYPE_REG 871 p.To.Reg = gc.SSARegNum(v) 872 case ssa.OpSP, ssa.OpSB: 873 // nothing to do 874 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 875 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 876 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 877 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 878 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 879 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 880 ssa.OpAMD64SETA, ssa.OpAMD64SETAE: 881 p := gc.Prog(v.Op.Asm()) 882 p.To.Type = obj.TYPE_REG 883 p.To.Reg = gc.SSARegNum(v) 884 885 case ssa.OpAMD64SETNEF: 886 p := gc.Prog(v.Op.Asm()) 887 p.To.Type = obj.TYPE_REG 888 p.To.Reg = gc.SSARegNum(v) 889 q := gc.Prog(x86.ASETPS) 890 q.To.Type = obj.TYPE_REG 891 q.To.Reg = x86.REG_AX 892 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 893 opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) 894 895 case ssa.OpAMD64SETEQF: 896 p := gc.Prog(v.Op.Asm()) 897 p.To.Type = obj.TYPE_REG 898 p.To.Reg = gc.SSARegNum(v) 899 q := gc.Prog(x86.ASETPC) 900 q.To.Type = obj.TYPE_REG 901 q.To.Reg = x86.REG_AX 902 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 903 opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) 904 905 case ssa.OpAMD64InvertFlags: 906 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 907 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 908 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 909 case ssa.OpAMD64REPSTOSQ: 910 gc.Prog(x86.AREP) 911 gc.Prog(x86.ASTOSQ) 912 case ssa.OpAMD64REPMOVSQ: 913 gc.Prog(x86.AREP) 914 gc.Prog(x86.AMOVSQ) 915 case ssa.OpVarDef: 916 gc.Gvardef(v.Aux.(*gc.Node)) 917 case ssa.OpVarKill: 918 gc.Gvarkill(v.Aux.(*gc.Node)) 919 case ssa.OpVarLive: 920 gc.Gvarlive(v.Aux.(*gc.Node)) 921 case ssa.OpKeepAlive: 922 if !v.Args[0].Type.IsPtrShaped() { 923 v.Fatalf("keeping non-pointer alive %v", v.Args[0]) 924 } 925 n, off := gc.AutoVar(v.Args[0]) 926 if n == nil { 927 v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) 928 } 929 if off != 0 { 930 v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) 931 } 932 gc.Gvarlive(n) 933 case ssa.OpAMD64LoweredNilCheck: 934 // Optimization - if the subsequent block has a load or store 935 // at the same address, we don't need to issue this instruction. 936 mem := v.Args[1] 937 for _, w := range v.Block.Succs[0].Block().Values { 938 if w.Op == ssa.OpPhi { 939 if w.Type.IsMemory() { 940 mem = w 941 } 942 continue 943 } 944 if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { 945 // w doesn't use a store - can't be a memory op. 946 continue 947 } 948 if w.Args[len(w.Args)-1] != mem { 949 v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) 950 } 951 switch w.Op { 952 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, 953 ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, 954 ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, 955 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, 956 ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore: 957 if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { 958 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 959 gc.Warnl(v.Line, "removed nil check") 960 } 961 return 962 } 963 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 964 off := ssa.ValAndOff(v.AuxInt).Off() 965 if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { 966 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 967 gc.Warnl(v.Line, "removed nil check") 968 } 969 return 970 } 971 } 972 if w.Type.IsMemory() { 973 if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { 974 // these ops are OK 975 mem = w 976 continue 977 } 978 // We can't delay the nil check past the next store. 979 break 980 } 981 } 982 // Issue a load which will fault if the input is nil. 983 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 984 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 985 // but it doesn't have false dependency on AX. 986 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 987 // That trades clobbering flags for clobbering a register. 988 p := gc.Prog(x86.ATESTB) 989 p.From.Type = obj.TYPE_REG 990 p.From.Reg = x86.REG_AX 991 p.To.Type = obj.TYPE_MEM 992 p.To.Reg = gc.SSARegNum(v.Args[0]) 993 gc.AddAux(&p.To, v) 994 if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers 995 gc.Warnl(v.Line, "generated nil check") 996 } 997 default: 998 v.Unimplementedf("genValue not implemented: %s", v.LongString()) 999 } 1000 } 1001 1002 var blockJump = [...]struct { 1003 asm, invasm obj.As 1004 }{ 1005 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1006 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1007 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1008 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1009 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1010 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1011 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1012 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1013 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1014 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1015 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1016 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1017 } 1018 1019 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 1020 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1021 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1022 } 1023 var nefJumps = [2][2]gc.FloatingEQNEJump{ 1024 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1025 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1026 } 1027 1028 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1029 s.SetLineno(b.Line) 1030 1031 switch b.Kind { 1032 case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck: 1033 if b.Succs[0].Block() != next { 1034 p := gc.Prog(obj.AJMP) 1035 p.To.Type = obj.TYPE_BRANCH 1036 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1037 } 1038 case ssa.BlockDefer: 1039 // defer returns in rax: 1040 // 0 if we should continue executing 1041 // 1 if we should jump to deferreturn call 1042 p := gc.Prog(x86.ATESTL) 1043 p.From.Type = obj.TYPE_REG 1044 p.From.Reg = x86.REG_AX 1045 p.To.Type = obj.TYPE_REG 1046 p.To.Reg = x86.REG_AX 1047 p = gc.Prog(x86.AJNE) 1048 p.To.Type = obj.TYPE_BRANCH 1049 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1050 if b.Succs[0].Block() != next { 1051 p := gc.Prog(obj.AJMP) 1052 p.To.Type = obj.TYPE_BRANCH 1053 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1054 } 1055 case ssa.BlockExit: 1056 gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1057 case ssa.BlockRet: 1058 gc.Prog(obj.ARET) 1059 case ssa.BlockRetJmp: 1060 p := gc.Prog(obj.AJMP) 1061 p.To.Type = obj.TYPE_MEM 1062 p.To.Name = obj.NAME_EXTERN 1063 p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) 1064 1065 case ssa.BlockAMD64EQF: 1066 gc.SSAGenFPJump(s, b, next, &eqfJumps) 1067 1068 case ssa.BlockAMD64NEF: 1069 gc.SSAGenFPJump(s, b, next, &nefJumps) 1070 1071 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1072 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1073 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1074 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1075 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1076 jmp := blockJump[b.Kind] 1077 likely := b.Likely 1078 var p *obj.Prog 1079 switch next { 1080 case b.Succs[0].Block(): 1081 p = gc.Prog(jmp.invasm) 1082 likely *= -1 1083 p.To.Type = obj.TYPE_BRANCH 1084 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1085 case b.Succs[1].Block(): 1086 p = gc.Prog(jmp.asm) 1087 p.To.Type = obj.TYPE_BRANCH 1088 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1089 default: 1090 p = gc.Prog(jmp.asm) 1091 p.To.Type = obj.TYPE_BRANCH 1092 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1093 q := gc.Prog(obj.AJMP) 1094 q.To.Type = obj.TYPE_BRANCH 1095 s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) 1096 } 1097 1098 // liblink reorders the instruction stream as it sees fit. 1099 // Pass along what we know so liblink can make use of it. 1100 // TODO: Once we've fully switched to SSA, 1101 // make liblink leave our output alone. 1102 switch likely { 1103 case ssa.BranchUnlikely: 1104 p.From.Type = obj.TYPE_CONST 1105 p.From.Offset = 0 1106 case ssa.BranchLikely: 1107 p.From.Type = obj.TYPE_CONST 1108 p.From.Offset = 1 1109 } 1110 1111 default: 1112 b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1113 } 1114 }