github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/internal/obj" 14 "cmd/internal/obj/x86" 15 ) 16 17 // Smallest possible faulting page at address zero. 18 const minZeroPage = 4096 19 20 // ssaRegToReg maps ssa register numbers to obj register numbers. 21 var ssaRegToReg = []int16{ 22 x86.REG_AX, 23 x86.REG_CX, 24 x86.REG_DX, 25 x86.REG_BX, 26 x86.REG_SP, 27 x86.REG_BP, 28 x86.REG_SI, 29 x86.REG_DI, 30 x86.REG_R8, 31 x86.REG_R9, 32 x86.REG_R10, 33 x86.REG_R11, 34 x86.REG_R12, 35 x86.REG_R13, 36 x86.REG_R14, 37 x86.REG_R15, 38 x86.REG_X0, 39 x86.REG_X1, 40 x86.REG_X2, 41 x86.REG_X3, 42 x86.REG_X4, 43 x86.REG_X5, 44 x86.REG_X6, 45 x86.REG_X7, 46 x86.REG_X8, 47 x86.REG_X9, 48 x86.REG_X10, 49 x86.REG_X11, 50 x86.REG_X12, 51 x86.REG_X13, 52 x86.REG_X14, 53 x86.REG_X15, 54 0, // SB isn't a real register. We fill an Addr.Reg field with 0 in this case. 55 } 56 57 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 58 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 59 flive := b.FlagsLiveAtEnd 60 if b.Control != nil && b.Control.Type.IsFlags() { 61 flive = true 62 } 63 for i := len(b.Values) - 1; i >= 0; i-- { 64 v := b.Values[i] 65 if flive && (v.Op == ssa.OpAMD64MOVBconst || v.Op == ssa.OpAMD64MOVWconst || v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 66 // The "mark" is any non-nil Aux value. 67 v.Aux = v 68 } 69 if v.Type.IsFlags() { 70 flive = false 71 } 72 for _, a := range v.Args { 73 if a.Type.IsFlags() { 74 flive = true 75 } 76 } 77 } 78 } 79 80 // loadByType returns the load instruction of the given type. 81 func loadByType(t ssa.Type) obj.As { 82 // Avoid partial register write 83 if !t.IsFloat() && t.Size() <= 2 { 84 if t.Size() == 1 { 85 return x86.AMOVBLZX 86 } else { 87 return x86.AMOVWLZX 88 } 89 } 90 // Otherwise, there's no difference between load and store opcodes. 91 return storeByType(t) 92 } 93 94 // storeByType returns the store instruction of the given type. 95 func storeByType(t ssa.Type) obj.As { 96 width := t.Size() 97 if t.IsFloat() { 98 switch width { 99 case 4: 100 return x86.AMOVSS 101 case 8: 102 return x86.AMOVSD 103 } 104 } else { 105 switch width { 106 case 1: 107 return x86.AMOVB 108 case 2: 109 return x86.AMOVW 110 case 4: 111 return x86.AMOVL 112 case 8: 113 return x86.AMOVQ 114 } 115 } 116 panic("bad store type") 117 } 118 119 // moveByType returns the reg->reg move instruction of the given type. 120 func moveByType(t ssa.Type) obj.As { 121 if t.IsFloat() { 122 // Moving the whole sse2 register is faster 123 // than moving just the correct low portion of it. 124 // There is no xmm->xmm move with 1 byte opcode, 125 // so use movups, which has 2 byte opcode. 126 return x86.AMOVUPS 127 } else { 128 switch t.Size() { 129 case 1: 130 // Avoids partial register write 131 return x86.AMOVL 132 case 2: 133 return x86.AMOVL 134 case 4: 135 return x86.AMOVL 136 case 8: 137 return x86.AMOVQ 138 case 16: 139 return x86.AMOVUPS // int128s are in SSE registers 140 default: 141 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 142 } 143 } 144 panic("bad register type") 145 } 146 147 // opregreg emits instructions for 148 // dest := dest(To) op src(From) 149 // and also returns the created obj.Prog so it 150 // may be further adjusted (offset, scale, etc). 151 func opregreg(op obj.As, dest, src int16) *obj.Prog { 152 p := gc.Prog(op) 153 p.From.Type = obj.TYPE_REG 154 p.To.Type = obj.TYPE_REG 155 p.To.Reg = dest 156 p.From.Reg = src 157 return p 158 } 159 160 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 161 s.SetLineno(v.Line) 162 switch v.Op { 163 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL, ssa.OpAMD64ADDW, ssa.OpAMD64ADDB: 164 r := gc.SSARegNum(v) 165 r1 := gc.SSARegNum(v.Args[0]) 166 r2 := gc.SSARegNum(v.Args[1]) 167 switch { 168 case r == r1: 169 p := gc.Prog(v.Op.Asm()) 170 p.From.Type = obj.TYPE_REG 171 p.From.Reg = r2 172 p.To.Type = obj.TYPE_REG 173 p.To.Reg = r 174 case r == r2: 175 p := gc.Prog(v.Op.Asm()) 176 p.From.Type = obj.TYPE_REG 177 p.From.Reg = r1 178 p.To.Type = obj.TYPE_REG 179 p.To.Reg = r 180 default: 181 var asm obj.As 182 if v.Op == ssa.OpAMD64ADDQ { 183 asm = x86.ALEAQ 184 } else { 185 asm = x86.ALEAL 186 } 187 p := gc.Prog(asm) 188 p.From.Type = obj.TYPE_MEM 189 p.From.Reg = r1 190 p.From.Scale = 1 191 p.From.Index = r2 192 p.To.Type = obj.TYPE_REG 193 p.To.Reg = r 194 } 195 // 2-address opcode arithmetic, symmetric 196 case ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, 197 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ANDW, ssa.OpAMD64ANDB, 198 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64ORW, ssa.OpAMD64ORB, 199 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, ssa.OpAMD64XORW, ssa.OpAMD64XORB, 200 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64MULW, ssa.OpAMD64MULB, 201 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64PXOR: 202 r := gc.SSARegNum(v) 203 x := gc.SSARegNum(v.Args[0]) 204 y := gc.SSARegNum(v.Args[1]) 205 if x != r && y != r { 206 opregreg(moveByType(v.Type), r, x) 207 x = r 208 } 209 p := gc.Prog(v.Op.Asm()) 210 p.From.Type = obj.TYPE_REG 211 p.To.Type = obj.TYPE_REG 212 p.To.Reg = r 213 if x == r { 214 p.From.Reg = y 215 } else { 216 p.From.Reg = x 217 } 218 // 2-address opcode arithmetic, not symmetric 219 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, ssa.OpAMD64SUBW, ssa.OpAMD64SUBB: 220 r := gc.SSARegNum(v) 221 x := gc.SSARegNum(v.Args[0]) 222 y := gc.SSARegNum(v.Args[1]) 223 var neg bool 224 if y == r { 225 // compute -(y-x) instead 226 x, y = y, x 227 neg = true 228 } 229 if x != r { 230 opregreg(moveByType(v.Type), r, x) 231 } 232 opregreg(v.Op.Asm(), r, y) 233 234 if neg { 235 if v.Op == ssa.OpAMD64SUBQ { 236 p := gc.Prog(x86.ANEGQ) 237 p.To.Type = obj.TYPE_REG 238 p.To.Reg = r 239 } else { // Avoids partial registers write 240 p := gc.Prog(x86.ANEGL) 241 p.To.Type = obj.TYPE_REG 242 p.To.Reg = r 243 } 244 } 245 case ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD: 246 r := gc.SSARegNum(v) 247 x := gc.SSARegNum(v.Args[0]) 248 y := gc.SSARegNum(v.Args[1]) 249 if y == r && x != r { 250 // r/y := x op r/y, need to preserve x and rewrite to 251 // r/y := r/y op x15 252 x15 := int16(x86.REG_X15) 253 // register move y to x15 254 // register move x to y 255 // rename y with x15 256 opregreg(moveByType(v.Type), x15, y) 257 opregreg(moveByType(v.Type), r, x) 258 y = x15 259 } else if x != r { 260 opregreg(moveByType(v.Type), r, x) 261 } 262 opregreg(v.Op.Asm(), r, y) 263 264 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW, 265 ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU, 266 ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW, 267 ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU: 268 269 // Arg[0] is already in AX as it's the only register we allow 270 // and AX is the only output 271 x := gc.SSARegNum(v.Args[1]) 272 273 // CPU faults upon signed overflow, which occurs when most 274 // negative int is divided by -1. 275 var j *obj.Prog 276 if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || 277 v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ || 278 v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW { 279 280 var c *obj.Prog 281 switch v.Op { 282 case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ: 283 c = gc.Prog(x86.ACMPQ) 284 j = gc.Prog(x86.AJEQ) 285 // go ahead and sign extend to save doing it later 286 gc.Prog(x86.ACQO) 287 288 case ssa.OpAMD64DIVL, ssa.OpAMD64MODL: 289 c = gc.Prog(x86.ACMPL) 290 j = gc.Prog(x86.AJEQ) 291 gc.Prog(x86.ACDQ) 292 293 case ssa.OpAMD64DIVW, ssa.OpAMD64MODW: 294 c = gc.Prog(x86.ACMPW) 295 j = gc.Prog(x86.AJEQ) 296 gc.Prog(x86.ACWD) 297 } 298 c.From.Type = obj.TYPE_REG 299 c.From.Reg = x 300 c.To.Type = obj.TYPE_CONST 301 c.To.Offset = -1 302 303 j.To.Type = obj.TYPE_BRANCH 304 305 } 306 307 // for unsigned ints, we sign extend by setting DX = 0 308 // signed ints were sign extended above 309 if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU || 310 v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU || 311 v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU { 312 c := gc.Prog(x86.AXORQ) 313 c.From.Type = obj.TYPE_REG 314 c.From.Reg = x86.REG_DX 315 c.To.Type = obj.TYPE_REG 316 c.To.Reg = x86.REG_DX 317 } 318 319 p := gc.Prog(v.Op.Asm()) 320 p.From.Type = obj.TYPE_REG 321 p.From.Reg = x 322 323 // signed division, rest of the check for -1 case 324 if j != nil { 325 j2 := gc.Prog(obj.AJMP) 326 j2.To.Type = obj.TYPE_BRANCH 327 328 var n *obj.Prog 329 if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || 330 v.Op == ssa.OpAMD64DIVW { 331 // n * -1 = -n 332 n = gc.Prog(x86.ANEGQ) 333 n.To.Type = obj.TYPE_REG 334 n.To.Reg = x86.REG_AX 335 } else { 336 // n % -1 == 0 337 n = gc.Prog(x86.AXORQ) 338 n.From.Type = obj.TYPE_REG 339 n.From.Reg = x86.REG_DX 340 n.To.Type = obj.TYPE_REG 341 n.To.Reg = x86.REG_DX 342 } 343 344 j.To.Val = n 345 j2.To.Val = s.Pc() 346 } 347 348 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB, 349 ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU: 350 // the frontend rewrites constant division by 8/16/32 bit integers into 351 // HMUL by a constant 352 // SSA rewrites generate the 64 bit versions 353 354 // Arg[0] is already in AX as it's the only register we allow 355 // and DX is the only output we care about (the high bits) 356 p := gc.Prog(v.Op.Asm()) 357 p.From.Type = obj.TYPE_REG 358 p.From.Reg = gc.SSARegNum(v.Args[1]) 359 360 // IMULB puts the high portion in AH instead of DL, 361 // so move it to DL for consistency 362 if v.Type.Size() == 1 { 363 m := gc.Prog(x86.AMOVB) 364 m.From.Type = obj.TYPE_REG 365 m.From.Reg = x86.REG_AH 366 m.To.Type = obj.TYPE_REG 367 m.To.Reg = x86.REG_DX 368 } 369 370 case ssa.OpAMD64AVGQU: 371 // compute (x+y)/2 unsigned. 372 // Do a 64-bit add, the overflow goes into the carry. 373 // Shift right once and pull the carry back into the 63rd bit. 374 r := gc.SSARegNum(v) 375 x := gc.SSARegNum(v.Args[0]) 376 y := gc.SSARegNum(v.Args[1]) 377 if x != r && y != r { 378 opregreg(moveByType(v.Type), r, x) 379 x = r 380 } 381 p := gc.Prog(x86.AADDQ) 382 p.From.Type = obj.TYPE_REG 383 p.To.Type = obj.TYPE_REG 384 p.To.Reg = r 385 if x == r { 386 p.From.Reg = y 387 } else { 388 p.From.Reg = x 389 } 390 p = gc.Prog(x86.ARCRQ) 391 p.From.Type = obj.TYPE_CONST 392 p.From.Offset = 1 393 p.To.Type = obj.TYPE_REG 394 p.To.Reg = r 395 396 case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB, 397 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 398 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB: 399 x := gc.SSARegNum(v.Args[0]) 400 r := gc.SSARegNum(v) 401 if x != r { 402 if r == x86.REG_CX { 403 v.Fatalf("can't implement %s, target and shift both in CX", v.LongString()) 404 } 405 p := gc.Prog(moveByType(v.Type)) 406 p.From.Type = obj.TYPE_REG 407 p.From.Reg = x 408 p.To.Type = obj.TYPE_REG 409 p.To.Reg = r 410 } 411 p := gc.Prog(v.Op.Asm()) 412 p.From.Type = obj.TYPE_REG 413 p.From.Reg = gc.SSARegNum(v.Args[1]) // should be CX 414 p.To.Type = obj.TYPE_REG 415 p.To.Reg = r 416 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst, ssa.OpAMD64ADDWconst, ssa.OpAMD64ADDBconst: 417 r := gc.SSARegNum(v) 418 a := gc.SSARegNum(v.Args[0]) 419 if r == a { 420 if v.AuxInt == 1 { 421 var asm obj.As 422 // Software optimization manual recommends add $1,reg. 423 // But inc/dec is 1 byte smaller. ICC always uses inc 424 // Clang/GCC choose depending on flags, but prefer add. 425 // Experiments show that inc/dec is both a little faster 426 // and make a binary a little smaller. 427 if v.Op == ssa.OpAMD64ADDQconst { 428 asm = x86.AINCQ 429 } else { 430 asm = x86.AINCL 431 } 432 p := gc.Prog(asm) 433 p.To.Type = obj.TYPE_REG 434 p.To.Reg = r 435 return 436 } else if v.AuxInt == -1 { 437 var asm obj.As 438 if v.Op == ssa.OpAMD64ADDQconst { 439 asm = x86.ADECQ 440 } else { 441 asm = x86.ADECL 442 } 443 p := gc.Prog(asm) 444 p.To.Type = obj.TYPE_REG 445 p.To.Reg = r 446 return 447 } else { 448 p := gc.Prog(v.Op.Asm()) 449 p.From.Type = obj.TYPE_CONST 450 p.From.Offset = v.AuxInt 451 p.To.Type = obj.TYPE_REG 452 p.To.Reg = r 453 return 454 } 455 } 456 var asm obj.As 457 if v.Op == ssa.OpAMD64ADDQconst { 458 asm = x86.ALEAQ 459 } else { 460 asm = x86.ALEAL 461 } 462 p := gc.Prog(asm) 463 p.From.Type = obj.TYPE_MEM 464 p.From.Reg = a 465 p.From.Offset = v.AuxInt 466 p.To.Type = obj.TYPE_REG 467 p.To.Reg = r 468 469 case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst, 470 ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst: 471 r := gc.SSARegNum(v) 472 x := gc.SSARegNum(v.Args[0]) 473 // Arg0 is in/out, move in to out if not already same 474 if r != x { 475 p := gc.Prog(moveByType(v.Type)) 476 p.From.Type = obj.TYPE_REG 477 p.From.Reg = x 478 p.To.Type = obj.TYPE_REG 479 p.To.Reg = r 480 } 481 482 // Constant into AX, after arg0 movement in case arg0 is in AX 483 p := gc.Prog(moveByType(v.Type)) 484 p.From.Type = obj.TYPE_CONST 485 p.From.Offset = v.AuxInt 486 p.To.Type = obj.TYPE_REG 487 p.To.Reg = x86.REG_AX 488 489 p = gc.Prog(v.Op.Asm()) 490 p.From.Type = obj.TYPE_REG 491 p.From.Reg = x86.REG_AX 492 p.To.Type = obj.TYPE_REG 493 p.To.Reg = r 494 495 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst: 496 r := gc.SSARegNum(v) 497 x := gc.SSARegNum(v.Args[0]) 498 if r != x { 499 p := gc.Prog(moveByType(v.Type)) 500 p.From.Type = obj.TYPE_REG 501 p.From.Reg = x 502 p.To.Type = obj.TYPE_REG 503 p.To.Reg = r 504 } 505 p := gc.Prog(v.Op.Asm()) 506 p.From.Type = obj.TYPE_CONST 507 p.From.Offset = v.AuxInt 508 p.To.Type = obj.TYPE_REG 509 p.To.Reg = r 510 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 511 // instead of using the MOVQ above. 512 //p.From3 = new(obj.Addr) 513 //p.From3.Type = obj.TYPE_REG 514 //p.From3.Reg = gc.SSARegNum(v.Args[0]) 515 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64SUBWconst, ssa.OpAMD64SUBBconst: 516 x := gc.SSARegNum(v.Args[0]) 517 r := gc.SSARegNum(v) 518 // We have 3-op add (lea), so transforming a = b - const into 519 // a = b + (- const), saves us 1 instruction. We can't fit 520 // - (-1 << 31) into 4 bytes offset in lea. 521 // We handle 2-address just fine below. 522 if v.AuxInt == -1<<31 || x == r { 523 if x != r { 524 // This code compensates for the fact that the register allocator 525 // doesn't understand 2-address instructions yet. TODO: fix that. 526 p := gc.Prog(moveByType(v.Type)) 527 p.From.Type = obj.TYPE_REG 528 p.From.Reg = x 529 p.To.Type = obj.TYPE_REG 530 p.To.Reg = r 531 } 532 p := gc.Prog(v.Op.Asm()) 533 p.From.Type = obj.TYPE_CONST 534 p.From.Offset = v.AuxInt 535 p.To.Type = obj.TYPE_REG 536 p.To.Reg = r 537 } else if x == r && v.AuxInt == -1 { 538 var asm obj.As 539 // x = x - (-1) is the same as x++ 540 // See OpAMD64ADDQconst comments about inc vs add $1,reg 541 if v.Op == ssa.OpAMD64SUBQconst { 542 asm = x86.AINCQ 543 } else { 544 asm = x86.AINCL 545 } 546 p := gc.Prog(asm) 547 p.To.Type = obj.TYPE_REG 548 p.To.Reg = r 549 } else if x == r && v.AuxInt == 1 { 550 var asm obj.As 551 if v.Op == ssa.OpAMD64SUBQconst { 552 asm = x86.ADECQ 553 } else { 554 asm = x86.ADECL 555 } 556 p := gc.Prog(asm) 557 p.To.Type = obj.TYPE_REG 558 p.To.Reg = r 559 } else { 560 var asm obj.As 561 if v.Op == ssa.OpAMD64SUBQconst { 562 asm = x86.ALEAQ 563 } else { 564 asm = x86.ALEAL 565 } 566 p := gc.Prog(asm) 567 p.From.Type = obj.TYPE_MEM 568 p.From.Reg = x 569 p.From.Offset = -v.AuxInt 570 p.To.Type = obj.TYPE_REG 571 p.To.Reg = r 572 } 573 574 case ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ANDWconst, ssa.OpAMD64ANDBconst, 575 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64ORWconst, ssa.OpAMD64ORBconst, 576 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64XORWconst, ssa.OpAMD64XORBconst, 577 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHLWconst, 578 ssa.OpAMD64SHLBconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, 579 ssa.OpAMD64SHRBconst, ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, 580 ssa.OpAMD64SARBconst, ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, 581 ssa.OpAMD64ROLBconst: 582 // This code compensates for the fact that the register allocator 583 // doesn't understand 2-address instructions yet. TODO: fix that. 584 x := gc.SSARegNum(v.Args[0]) 585 r := gc.SSARegNum(v) 586 if x != r { 587 p := gc.Prog(moveByType(v.Type)) 588 p.From.Type = obj.TYPE_REG 589 p.From.Reg = x 590 p.To.Type = obj.TYPE_REG 591 p.To.Reg = r 592 } 593 p := gc.Prog(v.Op.Asm()) 594 p.From.Type = obj.TYPE_CONST 595 p.From.Offset = v.AuxInt 596 p.To.Type = obj.TYPE_REG 597 p.To.Reg = r 598 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 599 r := gc.SSARegNum(v) 600 p := gc.Prog(v.Op.Asm()) 601 p.From.Type = obj.TYPE_REG 602 p.From.Reg = r 603 p.To.Type = obj.TYPE_REG 604 p.To.Reg = r 605 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 606 r := gc.SSARegNum(v.Args[0]) 607 i := gc.SSARegNum(v.Args[1]) 608 p := gc.Prog(x86.ALEAQ) 609 switch v.Op { 610 case ssa.OpAMD64LEAQ1: 611 p.From.Scale = 1 612 if i == x86.REG_SP { 613 r, i = i, r 614 } 615 case ssa.OpAMD64LEAQ2: 616 p.From.Scale = 2 617 case ssa.OpAMD64LEAQ4: 618 p.From.Scale = 4 619 case ssa.OpAMD64LEAQ8: 620 p.From.Scale = 8 621 } 622 p.From.Type = obj.TYPE_MEM 623 p.From.Reg = r 624 p.From.Index = i 625 gc.AddAux(&p.From, v) 626 p.To.Type = obj.TYPE_REG 627 p.To.Reg = gc.SSARegNum(v) 628 case ssa.OpAMD64LEAQ: 629 p := gc.Prog(x86.ALEAQ) 630 p.From.Type = obj.TYPE_MEM 631 p.From.Reg = gc.SSARegNum(v.Args[0]) 632 gc.AddAux(&p.From, v) 633 p.To.Type = obj.TYPE_REG 634 p.To.Reg = gc.SSARegNum(v) 635 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 636 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB: 637 opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) 638 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 639 // Go assembler has swapped operands for UCOMISx relative to CMP, 640 // must account for that right here. 641 opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) 642 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 643 p := gc.Prog(v.Op.Asm()) 644 p.From.Type = obj.TYPE_REG 645 p.From.Reg = gc.SSARegNum(v.Args[0]) 646 p.To.Type = obj.TYPE_CONST 647 p.To.Offset = v.AuxInt 648 case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst: 649 p := gc.Prog(v.Op.Asm()) 650 p.From.Type = obj.TYPE_CONST 651 p.From.Offset = v.AuxInt 652 p.To.Type = obj.TYPE_REG 653 p.To.Reg = gc.SSARegNum(v.Args[0]) 654 case ssa.OpAMD64MOVBconst, ssa.OpAMD64MOVWconst, ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 655 x := gc.SSARegNum(v) 656 p := gc.Prog(v.Op.Asm()) 657 p.From.Type = obj.TYPE_CONST 658 p.From.Offset = v.AuxInt 659 p.To.Type = obj.TYPE_REG 660 p.To.Reg = x 661 // If flags are live at this instruction, suppress the 662 // MOV $0,AX -> XOR AX,AX optimization. 663 if v.Aux != nil { 664 p.Mark |= x86.PRESERVEFLAGS 665 } 666 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 667 x := gc.SSARegNum(v) 668 p := gc.Prog(v.Op.Asm()) 669 p.From.Type = obj.TYPE_FCONST 670 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 671 p.To.Type = obj.TYPE_REG 672 p.To.Reg = x 673 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 674 p := gc.Prog(v.Op.Asm()) 675 p.From.Type = obj.TYPE_MEM 676 p.From.Reg = gc.SSARegNum(v.Args[0]) 677 gc.AddAux(&p.From, v) 678 p.To.Type = obj.TYPE_REG 679 p.To.Reg = gc.SSARegNum(v) 680 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: 681 p := gc.Prog(v.Op.Asm()) 682 p.From.Type = obj.TYPE_MEM 683 p.From.Reg = gc.SSARegNum(v.Args[0]) 684 gc.AddAux(&p.From, v) 685 p.From.Scale = 8 686 p.From.Index = gc.SSARegNum(v.Args[1]) 687 p.To.Type = obj.TYPE_REG 688 p.To.Reg = gc.SSARegNum(v) 689 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 690 p := gc.Prog(v.Op.Asm()) 691 p.From.Type = obj.TYPE_MEM 692 p.From.Reg = gc.SSARegNum(v.Args[0]) 693 gc.AddAux(&p.From, v) 694 p.From.Scale = 4 695 p.From.Index = gc.SSARegNum(v.Args[1]) 696 p.To.Type = obj.TYPE_REG 697 p.To.Reg = gc.SSARegNum(v) 698 case ssa.OpAMD64MOVWloadidx2: 699 p := gc.Prog(v.Op.Asm()) 700 p.From.Type = obj.TYPE_MEM 701 p.From.Reg = gc.SSARegNum(v.Args[0]) 702 gc.AddAux(&p.From, v) 703 p.From.Scale = 2 704 p.From.Index = gc.SSARegNum(v.Args[1]) 705 p.To.Type = obj.TYPE_REG 706 p.To.Reg = gc.SSARegNum(v) 707 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 708 r := gc.SSARegNum(v.Args[0]) 709 i := gc.SSARegNum(v.Args[1]) 710 if i == x86.REG_SP { 711 r, i = i, r 712 } 713 p := gc.Prog(v.Op.Asm()) 714 p.From.Type = obj.TYPE_MEM 715 p.From.Reg = r 716 p.From.Scale = 1 717 p.From.Index = i 718 gc.AddAux(&p.From, v) 719 p.To.Type = obj.TYPE_REG 720 p.To.Reg = gc.SSARegNum(v) 721 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: 722 p := gc.Prog(v.Op.Asm()) 723 p.From.Type = obj.TYPE_REG 724 p.From.Reg = gc.SSARegNum(v.Args[1]) 725 p.To.Type = obj.TYPE_MEM 726 p.To.Reg = gc.SSARegNum(v.Args[0]) 727 gc.AddAux(&p.To, v) 728 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: 729 p := gc.Prog(v.Op.Asm()) 730 p.From.Type = obj.TYPE_REG 731 p.From.Reg = gc.SSARegNum(v.Args[2]) 732 p.To.Type = obj.TYPE_MEM 733 p.To.Reg = gc.SSARegNum(v.Args[0]) 734 p.To.Scale = 8 735 p.To.Index = gc.SSARegNum(v.Args[1]) 736 gc.AddAux(&p.To, v) 737 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 738 p := gc.Prog(v.Op.Asm()) 739 p.From.Type = obj.TYPE_REG 740 p.From.Reg = gc.SSARegNum(v.Args[2]) 741 p.To.Type = obj.TYPE_MEM 742 p.To.Reg = gc.SSARegNum(v.Args[0]) 743 p.To.Scale = 4 744 p.To.Index = gc.SSARegNum(v.Args[1]) 745 gc.AddAux(&p.To, v) 746 case ssa.OpAMD64MOVWstoreidx2: 747 p := gc.Prog(v.Op.Asm()) 748 p.From.Type = obj.TYPE_REG 749 p.From.Reg = gc.SSARegNum(v.Args[2]) 750 p.To.Type = obj.TYPE_MEM 751 p.To.Reg = gc.SSARegNum(v.Args[0]) 752 p.To.Scale = 2 753 p.To.Index = gc.SSARegNum(v.Args[1]) 754 gc.AddAux(&p.To, v) 755 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 756 r := gc.SSARegNum(v.Args[0]) 757 i := gc.SSARegNum(v.Args[1]) 758 if i == x86.REG_SP { 759 r, i = i, r 760 } 761 p := gc.Prog(v.Op.Asm()) 762 p.From.Type = obj.TYPE_REG 763 p.From.Reg = gc.SSARegNum(v.Args[2]) 764 p.To.Type = obj.TYPE_MEM 765 p.To.Reg = r 766 p.To.Scale = 1 767 p.To.Index = i 768 gc.AddAux(&p.To, v) 769 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 770 p := gc.Prog(v.Op.Asm()) 771 p.From.Type = obj.TYPE_CONST 772 sc := v.AuxValAndOff() 773 p.From.Offset = sc.Val() 774 p.To.Type = obj.TYPE_MEM 775 p.To.Reg = gc.SSARegNum(v.Args[0]) 776 gc.AddAux2(&p.To, v, sc.Off()) 777 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 778 p := gc.Prog(v.Op.Asm()) 779 p.From.Type = obj.TYPE_CONST 780 sc := v.AuxValAndOff() 781 p.From.Offset = sc.Val() 782 r := gc.SSARegNum(v.Args[0]) 783 i := gc.SSARegNum(v.Args[1]) 784 switch v.Op { 785 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 786 p.To.Scale = 1 787 if i == x86.REG_SP { 788 r, i = i, r 789 } 790 case ssa.OpAMD64MOVWstoreconstidx2: 791 p.To.Scale = 2 792 case ssa.OpAMD64MOVLstoreconstidx4: 793 p.To.Scale = 4 794 case ssa.OpAMD64MOVQstoreconstidx8: 795 p.To.Scale = 8 796 } 797 p.To.Type = obj.TYPE_MEM 798 p.To.Reg = r 799 p.To.Index = i 800 gc.AddAux2(&p.To, v, sc.Off()) 801 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 802 ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, 803 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 804 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 805 opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) 806 case ssa.OpAMD64DUFFZERO: 807 p := gc.Prog(obj.ADUFFZERO) 808 p.To.Type = obj.TYPE_ADDR 809 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 810 p.To.Offset = v.AuxInt 811 case ssa.OpAMD64MOVOconst: 812 if v.AuxInt != 0 { 813 v.Unimplementedf("MOVOconst can only do constant=0") 814 } 815 r := gc.SSARegNum(v) 816 opregreg(x86.AXORPS, r, r) 817 case ssa.OpAMD64DUFFCOPY: 818 p := gc.Prog(obj.ADUFFCOPY) 819 p.To.Type = obj.TYPE_ADDR 820 p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) 821 p.To.Offset = v.AuxInt 822 823 case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 824 if v.Type.IsMemory() { 825 return 826 } 827 x := gc.SSARegNum(v.Args[0]) 828 y := gc.SSARegNum(v) 829 if x != y { 830 opregreg(moveByType(v.Type), y, x) 831 } 832 case ssa.OpLoadReg: 833 if v.Type.IsFlags() { 834 v.Unimplementedf("load flags not implemented: %v", v.LongString()) 835 return 836 } 837 p := gc.Prog(loadByType(v.Type)) 838 n, off := gc.AutoVar(v.Args[0]) 839 p.From.Type = obj.TYPE_MEM 840 p.From.Node = n 841 p.From.Sym = gc.Linksym(n.Sym) 842 p.From.Offset = off 843 if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { 844 p.From.Name = obj.NAME_PARAM 845 p.From.Offset += n.Xoffset 846 } else { 847 p.From.Name = obj.NAME_AUTO 848 } 849 p.To.Type = obj.TYPE_REG 850 p.To.Reg = gc.SSARegNum(v) 851 852 case ssa.OpStoreReg: 853 if v.Type.IsFlags() { 854 v.Unimplementedf("store flags not implemented: %v", v.LongString()) 855 return 856 } 857 p := gc.Prog(storeByType(v.Type)) 858 p.From.Type = obj.TYPE_REG 859 p.From.Reg = gc.SSARegNum(v.Args[0]) 860 n, off := gc.AutoVar(v) 861 p.To.Type = obj.TYPE_MEM 862 p.To.Node = n 863 p.To.Sym = gc.Linksym(n.Sym) 864 p.To.Offset = off 865 if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { 866 p.To.Name = obj.NAME_PARAM 867 p.To.Offset += n.Xoffset 868 } else { 869 p.To.Name = obj.NAME_AUTO 870 } 871 case ssa.OpPhi: 872 // just check to make sure regalloc and stackalloc did it right 873 if v.Type.IsMemory() { 874 return 875 } 876 f := v.Block.Func 877 loc := f.RegAlloc[v.ID] 878 for _, a := range v.Args { 879 if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead? 880 v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func) 881 } 882 } 883 case ssa.OpInitMem: 884 // memory arg needs no code 885 case ssa.OpArg: 886 // input args need no code 887 case ssa.OpAMD64LoweredGetClosurePtr: 888 // Output is hardwired to DX only, 889 // and DX contains the closure pointer on 890 // closure entry, and this "instruction" 891 // is scheduled to the very beginning 892 // of the entry block. 893 case ssa.OpAMD64LoweredGetG: 894 r := gc.SSARegNum(v) 895 // See the comments in cmd/internal/obj/x86/obj6.go 896 // near CanUse1InsnTLS for a detailed explanation of these instructions. 897 if x86.CanUse1InsnTLS(gc.Ctxt) { 898 // MOVQ (TLS), r 899 p := gc.Prog(x86.AMOVQ) 900 p.From.Type = obj.TYPE_MEM 901 p.From.Reg = x86.REG_TLS 902 p.To.Type = obj.TYPE_REG 903 p.To.Reg = r 904 } else { 905 // MOVQ TLS, r 906 // MOVQ (r)(TLS*1), r 907 p := gc.Prog(x86.AMOVQ) 908 p.From.Type = obj.TYPE_REG 909 p.From.Reg = x86.REG_TLS 910 p.To.Type = obj.TYPE_REG 911 p.To.Reg = r 912 q := gc.Prog(x86.AMOVQ) 913 q.From.Type = obj.TYPE_MEM 914 q.From.Reg = r 915 q.From.Index = x86.REG_TLS 916 q.From.Scale = 1 917 q.To.Type = obj.TYPE_REG 918 q.To.Reg = r 919 } 920 case ssa.OpAMD64CALLstatic: 921 if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { 922 // Deferred calls will appear to be returning to 923 // the CALL deferreturn(SB) that we are about to emit. 924 // However, the stack trace code will show the line 925 // of the instruction byte before the return PC. 926 // To avoid that being an unrelated instruction, 927 // insert an actual hardware NOP that will have the right line number. 928 // This is different from obj.ANOP, which is a virtual no-op 929 // that doesn't make it into the instruction stream. 930 ginsnop() 931 } 932 p := gc.Prog(obj.ACALL) 933 p.To.Type = obj.TYPE_MEM 934 p.To.Name = obj.NAME_EXTERN 935 p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) 936 if gc.Maxarg < v.AuxInt { 937 gc.Maxarg = v.AuxInt 938 } 939 case ssa.OpAMD64CALLclosure: 940 p := gc.Prog(obj.ACALL) 941 p.To.Type = obj.TYPE_REG 942 p.To.Reg = gc.SSARegNum(v.Args[0]) 943 if gc.Maxarg < v.AuxInt { 944 gc.Maxarg = v.AuxInt 945 } 946 case ssa.OpAMD64CALLdefer: 947 p := gc.Prog(obj.ACALL) 948 p.To.Type = obj.TYPE_MEM 949 p.To.Name = obj.NAME_EXTERN 950 p.To.Sym = gc.Linksym(gc.Deferproc.Sym) 951 if gc.Maxarg < v.AuxInt { 952 gc.Maxarg = v.AuxInt 953 } 954 case ssa.OpAMD64CALLgo: 955 p := gc.Prog(obj.ACALL) 956 p.To.Type = obj.TYPE_MEM 957 p.To.Name = obj.NAME_EXTERN 958 p.To.Sym = gc.Linksym(gc.Newproc.Sym) 959 if gc.Maxarg < v.AuxInt { 960 gc.Maxarg = v.AuxInt 961 } 962 case ssa.OpAMD64CALLinter: 963 p := gc.Prog(obj.ACALL) 964 p.To.Type = obj.TYPE_REG 965 p.To.Reg = gc.SSARegNum(v.Args[0]) 966 if gc.Maxarg < v.AuxInt { 967 gc.Maxarg = v.AuxInt 968 } 969 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB, 970 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 971 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB: 972 x := gc.SSARegNum(v.Args[0]) 973 r := gc.SSARegNum(v) 974 if x != r { 975 p := gc.Prog(moveByType(v.Type)) 976 p.From.Type = obj.TYPE_REG 977 p.From.Reg = x 978 p.To.Type = obj.TYPE_REG 979 p.To.Reg = r 980 } 981 p := gc.Prog(v.Op.Asm()) 982 p.To.Type = obj.TYPE_REG 983 p.To.Reg = r 984 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW, 985 ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW, 986 ssa.OpAMD64SQRTSD: 987 p := gc.Prog(v.Op.Asm()) 988 p.From.Type = obj.TYPE_REG 989 p.From.Reg = gc.SSARegNum(v.Args[0]) 990 p.To.Type = obj.TYPE_REG 991 p.To.Reg = gc.SSARegNum(v) 992 case ssa.OpSP, ssa.OpSB: 993 // nothing to do 994 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 995 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 996 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 997 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 998 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 999 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 1000 ssa.OpAMD64SETA, ssa.OpAMD64SETAE: 1001 p := gc.Prog(v.Op.Asm()) 1002 p.To.Type = obj.TYPE_REG 1003 p.To.Reg = gc.SSARegNum(v) 1004 1005 case ssa.OpAMD64SETNEF: 1006 p := gc.Prog(v.Op.Asm()) 1007 p.To.Type = obj.TYPE_REG 1008 p.To.Reg = gc.SSARegNum(v) 1009 q := gc.Prog(x86.ASETPS) 1010 q.To.Type = obj.TYPE_REG 1011 q.To.Reg = x86.REG_AX 1012 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 1013 opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) 1014 1015 case ssa.OpAMD64SETEQF: 1016 p := gc.Prog(v.Op.Asm()) 1017 p.To.Type = obj.TYPE_REG 1018 p.To.Reg = gc.SSARegNum(v) 1019 q := gc.Prog(x86.ASETPC) 1020 q.To.Type = obj.TYPE_REG 1021 q.To.Reg = x86.REG_AX 1022 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 1023 opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) 1024 1025 case ssa.OpAMD64InvertFlags: 1026 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1027 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 1028 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1029 case ssa.OpAMD64REPSTOSQ: 1030 gc.Prog(x86.AREP) 1031 gc.Prog(x86.ASTOSQ) 1032 case ssa.OpAMD64REPMOVSQ: 1033 gc.Prog(x86.AREP) 1034 gc.Prog(x86.AMOVSQ) 1035 case ssa.OpVarDef: 1036 gc.Gvardef(v.Aux.(*gc.Node)) 1037 case ssa.OpVarKill: 1038 gc.Gvarkill(v.Aux.(*gc.Node)) 1039 case ssa.OpVarLive: 1040 gc.Gvarlive(v.Aux.(*gc.Node)) 1041 case ssa.OpAMD64LoweredNilCheck: 1042 // Optimization - if the subsequent block has a load or store 1043 // at the same address, we don't need to issue this instruction. 1044 mem := v.Args[1] 1045 for _, w := range v.Block.Succs[0].Values { 1046 if w.Op == ssa.OpPhi { 1047 if w.Type.IsMemory() { 1048 mem = w 1049 } 1050 continue 1051 } 1052 if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { 1053 // w doesn't use a store - can't be a memory op. 1054 continue 1055 } 1056 if w.Args[len(w.Args)-1] != mem { 1057 v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) 1058 } 1059 switch w.Op { 1060 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, 1061 ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, 1062 ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, 1063 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, 1064 ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore: 1065 if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { 1066 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 1067 gc.Warnl(v.Line, "removed nil check") 1068 } 1069 return 1070 } 1071 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 1072 off := ssa.ValAndOff(v.AuxInt).Off() 1073 if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { 1074 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 1075 gc.Warnl(v.Line, "removed nil check") 1076 } 1077 return 1078 } 1079 } 1080 if w.Type.IsMemory() { 1081 if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { 1082 // these ops are OK 1083 mem = w 1084 continue 1085 } 1086 // We can't delay the nil check past the next store. 1087 break 1088 } 1089 } 1090 // Issue a load which will fault if the input is nil. 1091 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 1092 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 1093 // but it doesn't have false dependency on AX. 1094 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 1095 // That trades clobbering flags for clobbering a register. 1096 p := gc.Prog(x86.ATESTB) 1097 p.From.Type = obj.TYPE_REG 1098 p.From.Reg = x86.REG_AX 1099 p.To.Type = obj.TYPE_MEM 1100 p.To.Reg = gc.SSARegNum(v.Args[0]) 1101 gc.AddAux(&p.To, v) 1102 if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers 1103 gc.Warnl(v.Line, "generated nil check") 1104 } 1105 default: 1106 v.Unimplementedf("genValue not implemented: %s", v.LongString()) 1107 } 1108 } 1109 1110 var blockJump = [...]struct { 1111 asm, invasm obj.As 1112 }{ 1113 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1114 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1115 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1116 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1117 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1118 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1119 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1120 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1121 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1122 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1123 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1124 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1125 } 1126 1127 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 1128 {{x86.AJNE, 1}, {x86.AJPS, 1}}, // next == b.Succs[0] 1129 {{x86.AJNE, 1}, {x86.AJPC, 0}}, // next == b.Succs[1] 1130 } 1131 var nefJumps = [2][2]gc.FloatingEQNEJump{ 1132 {{x86.AJNE, 0}, {x86.AJPC, 1}}, // next == b.Succs[0] 1133 {{x86.AJNE, 0}, {x86.AJPS, 0}}, // next == b.Succs[1] 1134 } 1135 1136 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1137 s.SetLineno(b.Line) 1138 1139 switch b.Kind { 1140 case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck: 1141 if b.Succs[0] != next { 1142 p := gc.Prog(obj.AJMP) 1143 p.To.Type = obj.TYPE_BRANCH 1144 s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]}) 1145 } 1146 case ssa.BlockDefer: 1147 // defer returns in rax: 1148 // 0 if we should continue executing 1149 // 1 if we should jump to deferreturn call 1150 p := gc.Prog(x86.ATESTL) 1151 p.From.Type = obj.TYPE_REG 1152 p.From.Reg = x86.REG_AX 1153 p.To.Type = obj.TYPE_REG 1154 p.To.Reg = x86.REG_AX 1155 p = gc.Prog(x86.AJNE) 1156 p.To.Type = obj.TYPE_BRANCH 1157 s.Branches = append(s.Branches, gc.Branch{p, b.Succs[1]}) 1158 if b.Succs[0] != next { 1159 p := gc.Prog(obj.AJMP) 1160 p.To.Type = obj.TYPE_BRANCH 1161 s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]}) 1162 } 1163 case ssa.BlockExit: 1164 gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1165 case ssa.BlockRet: 1166 gc.Prog(obj.ARET) 1167 case ssa.BlockRetJmp: 1168 p := gc.Prog(obj.AJMP) 1169 p.To.Type = obj.TYPE_MEM 1170 p.To.Name = obj.NAME_EXTERN 1171 p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) 1172 1173 case ssa.BlockAMD64EQF: 1174 gc.SSAGenFPJump(s, b, next, &eqfJumps) 1175 1176 case ssa.BlockAMD64NEF: 1177 gc.SSAGenFPJump(s, b, next, &nefJumps) 1178 1179 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1180 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1181 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1182 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1183 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1184 jmp := blockJump[b.Kind] 1185 likely := b.Likely 1186 var p *obj.Prog 1187 switch next { 1188 case b.Succs[0]: 1189 p = gc.Prog(jmp.invasm) 1190 likely *= -1 1191 p.To.Type = obj.TYPE_BRANCH 1192 s.Branches = append(s.Branches, gc.Branch{p, b.Succs[1]}) 1193 case b.Succs[1]: 1194 p = gc.Prog(jmp.asm) 1195 p.To.Type = obj.TYPE_BRANCH 1196 s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]}) 1197 default: 1198 p = gc.Prog(jmp.asm) 1199 p.To.Type = obj.TYPE_BRANCH 1200 s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]}) 1201 q := gc.Prog(obj.AJMP) 1202 q.To.Type = obj.TYPE_BRANCH 1203 s.Branches = append(s.Branches, gc.Branch{q, b.Succs[1]}) 1204 } 1205 1206 // liblink reorders the instruction stream as it sees fit. 1207 // Pass along what we know so liblink can make use of it. 1208 // TODO: Once we've fully switched to SSA, 1209 // make liblink leave our output alone. 1210 switch likely { 1211 case ssa.BranchUnlikely: 1212 p.From.Type = obj.TYPE_CONST 1213 p.From.Offset = 0 1214 case ssa.BranchLikely: 1215 p.From.Type = obj.TYPE_CONST 1216 p.From.Offset = 1 1217 } 1218 1219 default: 1220 b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1221 } 1222 }