github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/internal/obj" 14 "cmd/internal/obj/x86" 15 ) 16 17 // Smallest possible faulting page at address zero. 18 const minZeroPage = 4096 19 20 // ssaRegToReg maps ssa register numbers to obj register numbers. 21 var ssaRegToReg = []int16{ 22 x86.REG_AX, 23 x86.REG_CX, 24 x86.REG_DX, 25 x86.REG_BX, 26 x86.REG_SP, 27 x86.REG_BP, 28 x86.REG_SI, 29 x86.REG_DI, 30 x86.REG_R8, 31 x86.REG_R9, 32 x86.REG_R10, 33 x86.REG_R11, 34 x86.REG_R12, 35 x86.REG_R13, 36 x86.REG_R14, 37 x86.REG_R15, 38 x86.REG_X0, 39 x86.REG_X1, 40 x86.REG_X2, 41 x86.REG_X3, 42 x86.REG_X4, 43 x86.REG_X5, 44 x86.REG_X6, 45 x86.REG_X7, 46 x86.REG_X8, 47 x86.REG_X9, 48 x86.REG_X10, 49 x86.REG_X11, 50 x86.REG_X12, 51 x86.REG_X13, 52 x86.REG_X14, 53 x86.REG_X15, 54 0, // SB isn't a real register. We fill an Addr.Reg field with 0 in this case. 55 } 56 57 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 58 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 59 flive := b.FlagsLiveAtEnd 60 if b.Control != nil && b.Control.Type.IsFlags() { 61 flive = true 62 } 63 for i := len(b.Values) - 1; i >= 0; i-- { 64 v := b.Values[i] 65 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 66 // The "mark" is any non-nil Aux value. 67 v.Aux = v 68 } 69 if v.Type.IsFlags() { 70 flive = false 71 } 72 for _, a := range v.Args { 73 if a.Type.IsFlags() { 74 flive = true 75 } 76 } 77 } 78 } 79 80 // loadByType returns the load instruction of the given type. 81 func loadByType(t ssa.Type) obj.As { 82 // Avoid partial register write 83 if !t.IsFloat() && t.Size() <= 2 { 84 if t.Size() == 1 { 85 return x86.AMOVBLZX 86 } else { 87 return x86.AMOVWLZX 88 } 89 } 90 // Otherwise, there's no difference between load and store opcodes. 91 return storeByType(t) 92 } 93 94 // storeByType returns the store instruction of the given type. 95 func storeByType(t ssa.Type) obj.As { 96 width := t.Size() 97 if t.IsFloat() { 98 switch width { 99 case 4: 100 return x86.AMOVSS 101 case 8: 102 return x86.AMOVSD 103 } 104 } else { 105 switch width { 106 case 1: 107 return x86.AMOVB 108 case 2: 109 return x86.AMOVW 110 case 4: 111 return x86.AMOVL 112 case 8: 113 return x86.AMOVQ 114 } 115 } 116 panic("bad store type") 117 } 118 119 // moveByType returns the reg->reg move instruction of the given type. 120 func moveByType(t ssa.Type) obj.As { 121 if t.IsFloat() { 122 // Moving the whole sse2 register is faster 123 // than moving just the correct low portion of it. 124 // There is no xmm->xmm move with 1 byte opcode, 125 // so use movups, which has 2 byte opcode. 126 return x86.AMOVUPS 127 } else { 128 switch t.Size() { 129 case 1: 130 // Avoids partial register write 131 return x86.AMOVL 132 case 2: 133 return x86.AMOVL 134 case 4: 135 return x86.AMOVL 136 case 8: 137 return x86.AMOVQ 138 case 16: 139 return x86.AMOVUPS // int128s are in SSE registers 140 default: 141 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 142 } 143 } 144 } 145 146 // opregreg emits instructions for 147 // dest := dest(To) op src(From) 148 // and also returns the created obj.Prog so it 149 // may be further adjusted (offset, scale, etc). 150 func opregreg(op obj.As, dest, src int16) *obj.Prog { 151 p := gc.Prog(op) 152 p.From.Type = obj.TYPE_REG 153 p.To.Type = obj.TYPE_REG 154 p.To.Reg = dest 155 p.From.Reg = src 156 return p 157 } 158 159 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD, 160 // See runtime/mkduff.go. 161 func duffStart(size int64) int64 { 162 x, _ := duff(size) 163 return x 164 } 165 func duffAdj(size int64) int64 { 166 _, x := duff(size) 167 return x 168 } 169 170 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 171 // required to use the duffzero mechanism for a block of the given size. 172 func duff(size int64) (int64, int64) { 173 if size < 32 || size > 1024 || size%dzClearStep != 0 { 174 panic("bad duffzero size") 175 } 176 steps := size / dzClearStep 177 blocks := steps / dzBlockLen 178 steps %= dzBlockLen 179 off := dzBlockSize * (dzBlocks - blocks) 180 var adj int64 181 if steps != 0 { 182 off -= dzAddSize 183 off -= dzMovSize * steps 184 adj -= dzClearStep * (dzBlockLen - steps) 185 } 186 return off, adj 187 } 188 189 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 190 s.SetLineno(v.Line) 191 switch v.Op { 192 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 193 r := gc.SSARegNum(v) 194 r1 := gc.SSARegNum(v.Args[0]) 195 r2 := gc.SSARegNum(v.Args[1]) 196 switch { 197 case r == r1: 198 p := gc.Prog(v.Op.Asm()) 199 p.From.Type = obj.TYPE_REG 200 p.From.Reg = r2 201 p.To.Type = obj.TYPE_REG 202 p.To.Reg = r 203 case r == r2: 204 p := gc.Prog(v.Op.Asm()) 205 p.From.Type = obj.TYPE_REG 206 p.From.Reg = r1 207 p.To.Type = obj.TYPE_REG 208 p.To.Reg = r 209 default: 210 var asm obj.As 211 if v.Op == ssa.OpAMD64ADDQ { 212 asm = x86.ALEAQ 213 } else { 214 asm = x86.ALEAL 215 } 216 p := gc.Prog(asm) 217 p.From.Type = obj.TYPE_MEM 218 p.From.Reg = r1 219 p.From.Scale = 1 220 p.From.Index = r2 221 p.To.Type = obj.TYPE_REG 222 p.To.Reg = r 223 } 224 // 2-address opcode arithmetic 225 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 226 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 227 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 228 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 229 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 230 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 231 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 232 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 233 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 234 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 235 ssa.OpAMD64PXOR: 236 r := gc.SSARegNum(v) 237 if r != gc.SSARegNum(v.Args[0]) { 238 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 239 } 240 opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1])) 241 242 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 243 // Arg[0] (the dividend) is in AX. 244 // Arg[1] (the divisor) can be in any other register. 245 // Result[0] (the quotient) is in AX. 246 // Result[1] (the remainder) is in DX. 247 r := gc.SSARegNum(v.Args[1]) 248 249 // Zero extend dividend. 250 c := gc.Prog(x86.AXORL) 251 c.From.Type = obj.TYPE_REG 252 c.From.Reg = x86.REG_DX 253 c.To.Type = obj.TYPE_REG 254 c.To.Reg = x86.REG_DX 255 256 // Issue divide. 257 p := gc.Prog(v.Op.Asm()) 258 p.From.Type = obj.TYPE_REG 259 p.From.Reg = r 260 261 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 262 // Arg[0] (the dividend) is in AX. 263 // Arg[1] (the divisor) can be in any other register. 264 // Result[0] (the quotient) is in AX. 265 // Result[1] (the remainder) is in DX. 266 r := gc.SSARegNum(v.Args[1]) 267 268 // CPU faults upon signed overflow, which occurs when the most 269 // negative int is divided by -1. Handle divide by -1 as a special case. 270 var c *obj.Prog 271 switch v.Op { 272 case ssa.OpAMD64DIVQ: 273 c = gc.Prog(x86.ACMPQ) 274 case ssa.OpAMD64DIVL: 275 c = gc.Prog(x86.ACMPL) 276 case ssa.OpAMD64DIVW: 277 c = gc.Prog(x86.ACMPW) 278 } 279 c.From.Type = obj.TYPE_REG 280 c.From.Reg = r 281 c.To.Type = obj.TYPE_CONST 282 c.To.Offset = -1 283 j1 := gc.Prog(x86.AJEQ) 284 j1.To.Type = obj.TYPE_BRANCH 285 286 // Sign extend dividend. 287 switch v.Op { 288 case ssa.OpAMD64DIVQ: 289 gc.Prog(x86.ACQO) 290 case ssa.OpAMD64DIVL: 291 gc.Prog(x86.ACDQ) 292 case ssa.OpAMD64DIVW: 293 gc.Prog(x86.ACWD) 294 } 295 296 // Issue divide. 297 p := gc.Prog(v.Op.Asm()) 298 p.From.Type = obj.TYPE_REG 299 p.From.Reg = r 300 301 // Skip over -1 fixup code. 302 j2 := gc.Prog(obj.AJMP) 303 j2.To.Type = obj.TYPE_BRANCH 304 305 // Issue -1 fixup code. 306 // n / -1 = -n 307 n1 := gc.Prog(x86.ANEGQ) 308 n1.To.Type = obj.TYPE_REG 309 n1.To.Reg = x86.REG_AX 310 311 // n % -1 == 0 312 n2 := gc.Prog(x86.AXORL) 313 n2.From.Type = obj.TYPE_REG 314 n2.From.Reg = x86.REG_DX 315 n2.To.Type = obj.TYPE_REG 316 n2.To.Reg = x86.REG_DX 317 318 // TODO(khr): issue only the -1 fixup code we need. 319 // For instance, if only the quotient is used, no point in zeroing the remainder. 320 321 j1.To.Val = n1 322 j2.To.Val = s.Pc() 323 324 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB, 325 ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU: 326 // the frontend rewrites constant division by 8/16/32 bit integers into 327 // HMUL by a constant 328 // SSA rewrites generate the 64 bit versions 329 330 // Arg[0] is already in AX as it's the only register we allow 331 // and DX is the only output we care about (the high bits) 332 p := gc.Prog(v.Op.Asm()) 333 p.From.Type = obj.TYPE_REG 334 p.From.Reg = gc.SSARegNum(v.Args[1]) 335 336 // IMULB puts the high portion in AH instead of DL, 337 // so move it to DL for consistency 338 if v.Type.Size() == 1 { 339 m := gc.Prog(x86.AMOVB) 340 m.From.Type = obj.TYPE_REG 341 m.From.Reg = x86.REG_AH 342 m.To.Type = obj.TYPE_REG 343 m.To.Reg = x86.REG_DX 344 } 345 346 case ssa.OpAMD64AVGQU: 347 // compute (x+y)/2 unsigned. 348 // Do a 64-bit add, the overflow goes into the carry. 349 // Shift right once and pull the carry back into the 63rd bit. 350 r := gc.SSARegNum(v) 351 if r != gc.SSARegNum(v.Args[0]) { 352 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 353 } 354 p := gc.Prog(x86.AADDQ) 355 p.From.Type = obj.TYPE_REG 356 p.To.Type = obj.TYPE_REG 357 p.To.Reg = r 358 p.From.Reg = gc.SSARegNum(v.Args[1]) 359 p = gc.Prog(x86.ARCRQ) 360 p.From.Type = obj.TYPE_CONST 361 p.From.Offset = 1 362 p.To.Type = obj.TYPE_REG 363 p.To.Reg = r 364 365 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 366 r := gc.SSARegNum(v) 367 a := gc.SSARegNum(v.Args[0]) 368 if r == a { 369 if v.AuxInt == 1 { 370 var asm obj.As 371 // Software optimization manual recommends add $1,reg. 372 // But inc/dec is 1 byte smaller. ICC always uses inc 373 // Clang/GCC choose depending on flags, but prefer add. 374 // Experiments show that inc/dec is both a little faster 375 // and make a binary a little smaller. 376 if v.Op == ssa.OpAMD64ADDQconst { 377 asm = x86.AINCQ 378 } else { 379 asm = x86.AINCL 380 } 381 p := gc.Prog(asm) 382 p.To.Type = obj.TYPE_REG 383 p.To.Reg = r 384 return 385 } 386 if v.AuxInt == -1 { 387 var asm obj.As 388 if v.Op == ssa.OpAMD64ADDQconst { 389 asm = x86.ADECQ 390 } else { 391 asm = x86.ADECL 392 } 393 p := gc.Prog(asm) 394 p.To.Type = obj.TYPE_REG 395 p.To.Reg = r 396 return 397 } 398 p := gc.Prog(v.Op.Asm()) 399 p.From.Type = obj.TYPE_CONST 400 p.From.Offset = v.AuxInt 401 p.To.Type = obj.TYPE_REG 402 p.To.Reg = r 403 return 404 } 405 var asm obj.As 406 if v.Op == ssa.OpAMD64ADDQconst { 407 asm = x86.ALEAQ 408 } else { 409 asm = x86.ALEAL 410 } 411 p := gc.Prog(asm) 412 p.From.Type = obj.TYPE_MEM 413 p.From.Reg = a 414 p.From.Offset = v.AuxInt 415 p.To.Type = obj.TYPE_REG 416 p.To.Reg = r 417 418 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ: 419 r := gc.SSARegNum(v) 420 if r != gc.SSARegNum(v.Args[0]) { 421 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 422 } 423 p := gc.Prog(v.Op.Asm()) 424 p.From.Type = obj.TYPE_REG 425 p.From.Reg = gc.SSARegNum(v.Args[1]) 426 p.To.Type = obj.TYPE_REG 427 p.To.Reg = r 428 429 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 430 r := gc.SSARegNum(v) 431 if r != gc.SSARegNum(v.Args[0]) { 432 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 433 } 434 p := gc.Prog(v.Op.Asm()) 435 p.From.Type = obj.TYPE_CONST 436 p.From.Offset = v.AuxInt 437 p.To.Type = obj.TYPE_REG 438 p.To.Reg = r 439 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 440 // then we don't need to use resultInArg0 for these ops. 441 //p.From3 = new(obj.Addr) 442 //p.From3.Type = obj.TYPE_REG 443 //p.From3.Reg = gc.SSARegNum(v.Args[0]) 444 445 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 446 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 447 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 448 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 449 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 450 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 451 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 452 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 453 r := gc.SSARegNum(v) 454 if r != gc.SSARegNum(v.Args[0]) { 455 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 456 } 457 p := gc.Prog(v.Op.Asm()) 458 p.From.Type = obj.TYPE_CONST 459 p.From.Offset = v.AuxInt 460 p.To.Type = obj.TYPE_REG 461 p.To.Reg = r 462 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 463 r := gc.SSARegNum(v) 464 p := gc.Prog(v.Op.Asm()) 465 p.From.Type = obj.TYPE_REG 466 p.From.Reg = r 467 p.To.Type = obj.TYPE_REG 468 p.To.Reg = r 469 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 470 r := gc.SSARegNum(v.Args[0]) 471 i := gc.SSARegNum(v.Args[1]) 472 p := gc.Prog(x86.ALEAQ) 473 switch v.Op { 474 case ssa.OpAMD64LEAQ1: 475 p.From.Scale = 1 476 if i == x86.REG_SP { 477 r, i = i, r 478 } 479 case ssa.OpAMD64LEAQ2: 480 p.From.Scale = 2 481 case ssa.OpAMD64LEAQ4: 482 p.From.Scale = 4 483 case ssa.OpAMD64LEAQ8: 484 p.From.Scale = 8 485 } 486 p.From.Type = obj.TYPE_MEM 487 p.From.Reg = r 488 p.From.Index = i 489 gc.AddAux(&p.From, v) 490 p.To.Type = obj.TYPE_REG 491 p.To.Reg = gc.SSARegNum(v) 492 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL: 493 p := gc.Prog(v.Op.Asm()) 494 p.From.Type = obj.TYPE_MEM 495 p.From.Reg = gc.SSARegNum(v.Args[0]) 496 gc.AddAux(&p.From, v) 497 p.To.Type = obj.TYPE_REG 498 p.To.Reg = gc.SSARegNum(v) 499 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 500 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB: 501 opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) 502 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 503 // Go assembler has swapped operands for UCOMISx relative to CMP, 504 // must account for that right here. 505 opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) 506 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 507 p := gc.Prog(v.Op.Asm()) 508 p.From.Type = obj.TYPE_REG 509 p.From.Reg = gc.SSARegNum(v.Args[0]) 510 p.To.Type = obj.TYPE_CONST 511 p.To.Offset = v.AuxInt 512 case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst: 513 p := gc.Prog(v.Op.Asm()) 514 p.From.Type = obj.TYPE_CONST 515 p.From.Offset = v.AuxInt 516 p.To.Type = obj.TYPE_REG 517 p.To.Reg = gc.SSARegNum(v.Args[0]) 518 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 519 x := gc.SSARegNum(v) 520 p := gc.Prog(v.Op.Asm()) 521 p.From.Type = obj.TYPE_CONST 522 p.From.Offset = v.AuxInt 523 p.To.Type = obj.TYPE_REG 524 p.To.Reg = x 525 // If flags are live at this instruction, suppress the 526 // MOV $0,AX -> XOR AX,AX optimization. 527 if v.Aux != nil { 528 p.Mark |= x86.PRESERVEFLAGS 529 } 530 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 531 x := gc.SSARegNum(v) 532 p := gc.Prog(v.Op.Asm()) 533 p.From.Type = obj.TYPE_FCONST 534 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 535 p.To.Type = obj.TYPE_REG 536 p.To.Reg = x 537 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 538 p := gc.Prog(v.Op.Asm()) 539 p.From.Type = obj.TYPE_MEM 540 p.From.Reg = gc.SSARegNum(v.Args[0]) 541 gc.AddAux(&p.From, v) 542 p.To.Type = obj.TYPE_REG 543 p.To.Reg = gc.SSARegNum(v) 544 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: 545 p := gc.Prog(v.Op.Asm()) 546 p.From.Type = obj.TYPE_MEM 547 p.From.Reg = gc.SSARegNum(v.Args[0]) 548 gc.AddAux(&p.From, v) 549 p.From.Scale = 8 550 p.From.Index = gc.SSARegNum(v.Args[1]) 551 p.To.Type = obj.TYPE_REG 552 p.To.Reg = gc.SSARegNum(v) 553 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 554 p := gc.Prog(v.Op.Asm()) 555 p.From.Type = obj.TYPE_MEM 556 p.From.Reg = gc.SSARegNum(v.Args[0]) 557 gc.AddAux(&p.From, v) 558 p.From.Scale = 4 559 p.From.Index = gc.SSARegNum(v.Args[1]) 560 p.To.Type = obj.TYPE_REG 561 p.To.Reg = gc.SSARegNum(v) 562 case ssa.OpAMD64MOVWloadidx2: 563 p := gc.Prog(v.Op.Asm()) 564 p.From.Type = obj.TYPE_MEM 565 p.From.Reg = gc.SSARegNum(v.Args[0]) 566 gc.AddAux(&p.From, v) 567 p.From.Scale = 2 568 p.From.Index = gc.SSARegNum(v.Args[1]) 569 p.To.Type = obj.TYPE_REG 570 p.To.Reg = gc.SSARegNum(v) 571 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 572 r := gc.SSARegNum(v.Args[0]) 573 i := gc.SSARegNum(v.Args[1]) 574 if i == x86.REG_SP { 575 r, i = i, r 576 } 577 p := gc.Prog(v.Op.Asm()) 578 p.From.Type = obj.TYPE_MEM 579 p.From.Reg = r 580 p.From.Scale = 1 581 p.From.Index = i 582 gc.AddAux(&p.From, v) 583 p.To.Type = obj.TYPE_REG 584 p.To.Reg = gc.SSARegNum(v) 585 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: 586 p := gc.Prog(v.Op.Asm()) 587 p.From.Type = obj.TYPE_REG 588 p.From.Reg = gc.SSARegNum(v.Args[1]) 589 p.To.Type = obj.TYPE_MEM 590 p.To.Reg = gc.SSARegNum(v.Args[0]) 591 gc.AddAux(&p.To, v) 592 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: 593 p := gc.Prog(v.Op.Asm()) 594 p.From.Type = obj.TYPE_REG 595 p.From.Reg = gc.SSARegNum(v.Args[2]) 596 p.To.Type = obj.TYPE_MEM 597 p.To.Reg = gc.SSARegNum(v.Args[0]) 598 p.To.Scale = 8 599 p.To.Index = gc.SSARegNum(v.Args[1]) 600 gc.AddAux(&p.To, v) 601 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 602 p := gc.Prog(v.Op.Asm()) 603 p.From.Type = obj.TYPE_REG 604 p.From.Reg = gc.SSARegNum(v.Args[2]) 605 p.To.Type = obj.TYPE_MEM 606 p.To.Reg = gc.SSARegNum(v.Args[0]) 607 p.To.Scale = 4 608 p.To.Index = gc.SSARegNum(v.Args[1]) 609 gc.AddAux(&p.To, v) 610 case ssa.OpAMD64MOVWstoreidx2: 611 p := gc.Prog(v.Op.Asm()) 612 p.From.Type = obj.TYPE_REG 613 p.From.Reg = gc.SSARegNum(v.Args[2]) 614 p.To.Type = obj.TYPE_MEM 615 p.To.Reg = gc.SSARegNum(v.Args[0]) 616 p.To.Scale = 2 617 p.To.Index = gc.SSARegNum(v.Args[1]) 618 gc.AddAux(&p.To, v) 619 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 620 r := gc.SSARegNum(v.Args[0]) 621 i := gc.SSARegNum(v.Args[1]) 622 if i == x86.REG_SP { 623 r, i = i, r 624 } 625 p := gc.Prog(v.Op.Asm()) 626 p.From.Type = obj.TYPE_REG 627 p.From.Reg = gc.SSARegNum(v.Args[2]) 628 p.To.Type = obj.TYPE_MEM 629 p.To.Reg = r 630 p.To.Scale = 1 631 p.To.Index = i 632 gc.AddAux(&p.To, v) 633 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 634 p := gc.Prog(v.Op.Asm()) 635 p.From.Type = obj.TYPE_CONST 636 sc := v.AuxValAndOff() 637 p.From.Offset = sc.Val() 638 p.To.Type = obj.TYPE_MEM 639 p.To.Reg = gc.SSARegNum(v.Args[0]) 640 gc.AddAux2(&p.To, v, sc.Off()) 641 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 642 p := gc.Prog(v.Op.Asm()) 643 p.From.Type = obj.TYPE_CONST 644 sc := v.AuxValAndOff() 645 p.From.Offset = sc.Val() 646 r := gc.SSARegNum(v.Args[0]) 647 i := gc.SSARegNum(v.Args[1]) 648 switch v.Op { 649 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 650 p.To.Scale = 1 651 if i == x86.REG_SP { 652 r, i = i, r 653 } 654 case ssa.OpAMD64MOVWstoreconstidx2: 655 p.To.Scale = 2 656 case ssa.OpAMD64MOVLstoreconstidx4: 657 p.To.Scale = 4 658 case ssa.OpAMD64MOVQstoreconstidx8: 659 p.To.Scale = 8 660 } 661 p.To.Type = obj.TYPE_MEM 662 p.To.Reg = r 663 p.To.Index = i 664 gc.AddAux2(&p.To, v, sc.Off()) 665 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 666 ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, 667 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 668 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 669 opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) 670 case ssa.OpAMD64DUFFZERO: 671 off := duffStart(v.AuxInt) 672 adj := duffAdj(v.AuxInt) 673 var p *obj.Prog 674 if adj != 0 { 675 p = gc.Prog(x86.AADDQ) 676 p.From.Type = obj.TYPE_CONST 677 p.From.Offset = adj 678 p.To.Type = obj.TYPE_REG 679 p.To.Reg = x86.REG_DI 680 } 681 p = gc.Prog(obj.ADUFFZERO) 682 p.To.Type = obj.TYPE_ADDR 683 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 684 p.To.Offset = off 685 case ssa.OpAMD64MOVOconst: 686 if v.AuxInt != 0 { 687 v.Unimplementedf("MOVOconst can only do constant=0") 688 } 689 r := gc.SSARegNum(v) 690 opregreg(x86.AXORPS, r, r) 691 case ssa.OpAMD64DUFFCOPY: 692 p := gc.Prog(obj.ADUFFCOPY) 693 p.To.Type = obj.TYPE_ADDR 694 p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) 695 p.To.Offset = v.AuxInt 696 697 case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 698 if v.Type.IsMemory() { 699 return 700 } 701 x := gc.SSARegNum(v.Args[0]) 702 y := gc.SSARegNum(v) 703 if x != y { 704 opregreg(moveByType(v.Type), y, x) 705 } 706 case ssa.OpLoadReg: 707 if v.Type.IsFlags() { 708 v.Unimplementedf("load flags not implemented: %v", v.LongString()) 709 return 710 } 711 p := gc.Prog(loadByType(v.Type)) 712 n, off := gc.AutoVar(v.Args[0]) 713 p.From.Type = obj.TYPE_MEM 714 p.From.Node = n 715 p.From.Sym = gc.Linksym(n.Sym) 716 p.From.Offset = off 717 if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { 718 p.From.Name = obj.NAME_PARAM 719 p.From.Offset += n.Xoffset 720 } else { 721 p.From.Name = obj.NAME_AUTO 722 } 723 p.To.Type = obj.TYPE_REG 724 p.To.Reg = gc.SSARegNum(v) 725 726 case ssa.OpStoreReg: 727 if v.Type.IsFlags() { 728 v.Unimplementedf("store flags not implemented: %v", v.LongString()) 729 return 730 } 731 p := gc.Prog(storeByType(v.Type)) 732 p.From.Type = obj.TYPE_REG 733 p.From.Reg = gc.SSARegNum(v.Args[0]) 734 n, off := gc.AutoVar(v) 735 p.To.Type = obj.TYPE_MEM 736 p.To.Node = n 737 p.To.Sym = gc.Linksym(n.Sym) 738 p.To.Offset = off 739 if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { 740 p.To.Name = obj.NAME_PARAM 741 p.To.Offset += n.Xoffset 742 } else { 743 p.To.Name = obj.NAME_AUTO 744 } 745 case ssa.OpPhi: 746 gc.CheckLoweredPhi(v) 747 case ssa.OpInitMem: 748 // memory arg needs no code 749 case ssa.OpArg: 750 // input args need no code 751 case ssa.OpAMD64LoweredGetClosurePtr: 752 // Closure pointer is DX. 753 gc.CheckLoweredGetClosurePtr(v) 754 case ssa.OpAMD64LoweredGetG: 755 r := gc.SSARegNum(v) 756 // See the comments in cmd/internal/obj/x86/obj6.go 757 // near CanUse1InsnTLS for a detailed explanation of these instructions. 758 if x86.CanUse1InsnTLS(gc.Ctxt) { 759 // MOVQ (TLS), r 760 p := gc.Prog(x86.AMOVQ) 761 p.From.Type = obj.TYPE_MEM 762 p.From.Reg = x86.REG_TLS 763 p.To.Type = obj.TYPE_REG 764 p.To.Reg = r 765 } else { 766 // MOVQ TLS, r 767 // MOVQ (r)(TLS*1), r 768 p := gc.Prog(x86.AMOVQ) 769 p.From.Type = obj.TYPE_REG 770 p.From.Reg = x86.REG_TLS 771 p.To.Type = obj.TYPE_REG 772 p.To.Reg = r 773 q := gc.Prog(x86.AMOVQ) 774 q.From.Type = obj.TYPE_MEM 775 q.From.Reg = r 776 q.From.Index = x86.REG_TLS 777 q.From.Scale = 1 778 q.To.Type = obj.TYPE_REG 779 q.To.Reg = r 780 } 781 case ssa.OpAMD64CALLstatic: 782 if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { 783 // Deferred calls will appear to be returning to 784 // the CALL deferreturn(SB) that we are about to emit. 785 // However, the stack trace code will show the line 786 // of the instruction byte before the return PC. 787 // To avoid that being an unrelated instruction, 788 // insert an actual hardware NOP that will have the right line number. 789 // This is different from obj.ANOP, which is a virtual no-op 790 // that doesn't make it into the instruction stream. 791 ginsnop() 792 } 793 p := gc.Prog(obj.ACALL) 794 p.To.Type = obj.TYPE_MEM 795 p.To.Name = obj.NAME_EXTERN 796 p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) 797 if gc.Maxarg < v.AuxInt { 798 gc.Maxarg = v.AuxInt 799 } 800 case ssa.OpAMD64CALLclosure: 801 p := gc.Prog(obj.ACALL) 802 p.To.Type = obj.TYPE_REG 803 p.To.Reg = gc.SSARegNum(v.Args[0]) 804 if gc.Maxarg < v.AuxInt { 805 gc.Maxarg = v.AuxInt 806 } 807 case ssa.OpAMD64CALLdefer: 808 p := gc.Prog(obj.ACALL) 809 p.To.Type = obj.TYPE_MEM 810 p.To.Name = obj.NAME_EXTERN 811 p.To.Sym = gc.Linksym(gc.Deferproc.Sym) 812 if gc.Maxarg < v.AuxInt { 813 gc.Maxarg = v.AuxInt 814 } 815 case ssa.OpAMD64CALLgo: 816 p := gc.Prog(obj.ACALL) 817 p.To.Type = obj.TYPE_MEM 818 p.To.Name = obj.NAME_EXTERN 819 p.To.Sym = gc.Linksym(gc.Newproc.Sym) 820 if gc.Maxarg < v.AuxInt { 821 gc.Maxarg = v.AuxInt 822 } 823 case ssa.OpAMD64CALLinter: 824 p := gc.Prog(obj.ACALL) 825 p.To.Type = obj.TYPE_REG 826 p.To.Reg = gc.SSARegNum(v.Args[0]) 827 if gc.Maxarg < v.AuxInt { 828 gc.Maxarg = v.AuxInt 829 } 830 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 831 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 832 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 833 r := gc.SSARegNum(v) 834 if r != gc.SSARegNum(v.Args[0]) { 835 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 836 } 837 p := gc.Prog(v.Op.Asm()) 838 p.To.Type = obj.TYPE_REG 839 p.To.Reg = r 840 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL: 841 p := gc.Prog(v.Op.Asm()) 842 p.From.Type = obj.TYPE_REG 843 p.From.Reg = gc.SSARegNum(v.Args[0]) 844 p.To.Type = obj.TYPE_REG 845 p.To.Reg = gc.SSARegNum0(v) 846 case ssa.OpAMD64SQRTSD: 847 p := gc.Prog(v.Op.Asm()) 848 p.From.Type = obj.TYPE_REG 849 p.From.Reg = gc.SSARegNum(v.Args[0]) 850 p.To.Type = obj.TYPE_REG 851 p.To.Reg = gc.SSARegNum(v) 852 case ssa.OpSP, ssa.OpSB: 853 // nothing to do 854 case ssa.OpSelect0, ssa.OpSelect1: 855 // nothing to do 856 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 857 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 858 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 859 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 860 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 861 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 862 ssa.OpAMD64SETA, ssa.OpAMD64SETAE: 863 p := gc.Prog(v.Op.Asm()) 864 p.To.Type = obj.TYPE_REG 865 p.To.Reg = gc.SSARegNum(v) 866 867 case ssa.OpAMD64SETNEF: 868 p := gc.Prog(v.Op.Asm()) 869 p.To.Type = obj.TYPE_REG 870 p.To.Reg = gc.SSARegNum(v) 871 q := gc.Prog(x86.ASETPS) 872 q.To.Type = obj.TYPE_REG 873 q.To.Reg = x86.REG_AX 874 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 875 opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) 876 877 case ssa.OpAMD64SETEQF: 878 p := gc.Prog(v.Op.Asm()) 879 p.To.Type = obj.TYPE_REG 880 p.To.Reg = gc.SSARegNum(v) 881 q := gc.Prog(x86.ASETPC) 882 q.To.Type = obj.TYPE_REG 883 q.To.Reg = x86.REG_AX 884 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 885 opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) 886 887 case ssa.OpAMD64InvertFlags: 888 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 889 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 890 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 891 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 892 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 893 case ssa.OpAMD64REPSTOSQ: 894 gc.Prog(x86.AREP) 895 gc.Prog(x86.ASTOSQ) 896 case ssa.OpAMD64REPMOVSQ: 897 gc.Prog(x86.AREP) 898 gc.Prog(x86.AMOVSQ) 899 case ssa.OpVarDef: 900 gc.Gvardef(v.Aux.(*gc.Node)) 901 case ssa.OpVarKill: 902 gc.Gvarkill(v.Aux.(*gc.Node)) 903 case ssa.OpVarLive: 904 gc.Gvarlive(v.Aux.(*gc.Node)) 905 case ssa.OpKeepAlive: 906 if !v.Args[0].Type.IsPtrShaped() { 907 v.Fatalf("keeping non-pointer alive %v", v.Args[0]) 908 } 909 n, off := gc.AutoVar(v.Args[0]) 910 if n == nil { 911 v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) 912 } 913 if off != 0 { 914 v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) 915 } 916 gc.Gvarlive(n) 917 case ssa.OpAMD64LoweredNilCheck: 918 // Optimization - if the subsequent block has a load or store 919 // at the same address, we don't need to issue this instruction. 920 mem := v.Args[1] 921 for _, w := range v.Block.Succs[0].Block().Values { 922 if w.Op == ssa.OpPhi { 923 if w.Type.IsMemory() { 924 mem = w 925 } 926 continue 927 } 928 if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { 929 // w doesn't use a store - can't be a memory op. 930 continue 931 } 932 if w.Args[len(w.Args)-1] != mem { 933 v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) 934 } 935 switch w.Op { 936 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, 937 ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, 938 ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, 939 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, 940 ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore, 941 ssa.OpAMD64MOVQatomicload, ssa.OpAMD64MOVLatomicload, 942 ssa.OpAMD64CMPXCHGQlock, ssa.OpAMD64CMPXCHGLlock, 943 ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: 944 if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { 945 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 946 gc.Warnl(v.Line, "removed nil check") 947 } 948 return 949 } 950 case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ, ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 951 if w.Args[1] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { 952 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 953 gc.Warnl(v.Line, "removed nil check") 954 } 955 return 956 } 957 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 958 off := ssa.ValAndOff(v.AuxInt).Off() 959 if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { 960 if gc.Debug_checknil != 0 && int(v.Line) > 1 { 961 gc.Warnl(v.Line, "removed nil check") 962 } 963 return 964 } 965 } 966 if w.Type.IsMemory() || w.Type.IsTuple() && w.Type.FieldType(1).IsMemory() { 967 if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { 968 // these ops are OK 969 mem = w 970 continue 971 } 972 // We can't delay the nil check past the next store. 973 break 974 } 975 } 976 // Issue a load which will fault if the input is nil. 977 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 978 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 979 // but it doesn't have false dependency on AX. 980 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 981 // That trades clobbering flags for clobbering a register. 982 p := gc.Prog(x86.ATESTB) 983 p.From.Type = obj.TYPE_REG 984 p.From.Reg = x86.REG_AX 985 p.To.Type = obj.TYPE_MEM 986 p.To.Reg = gc.SSARegNum(v.Args[0]) 987 gc.AddAux(&p.To, v) 988 if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers 989 gc.Warnl(v.Line, "generated nil check") 990 } 991 case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 992 p := gc.Prog(v.Op.Asm()) 993 p.From.Type = obj.TYPE_MEM 994 p.From.Reg = gc.SSARegNum(v.Args[0]) 995 gc.AddAux(&p.From, v) 996 p.To.Type = obj.TYPE_REG 997 p.To.Reg = gc.SSARegNum0(v) 998 case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 999 r := gc.SSARegNum0(v) 1000 if r != gc.SSARegNum(v.Args[0]) { 1001 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 1002 } 1003 p := gc.Prog(v.Op.Asm()) 1004 p.From.Type = obj.TYPE_REG 1005 p.From.Reg = r 1006 p.To.Type = obj.TYPE_MEM 1007 p.To.Reg = gc.SSARegNum(v.Args[1]) 1008 gc.AddAux(&p.To, v) 1009 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 1010 r := gc.SSARegNum0(v) 1011 if r != gc.SSARegNum(v.Args[0]) { 1012 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 1013 } 1014 gc.Prog(x86.ALOCK) 1015 p := gc.Prog(v.Op.Asm()) 1016 p.From.Type = obj.TYPE_REG 1017 p.From.Reg = r 1018 p.To.Type = obj.TYPE_MEM 1019 p.To.Reg = gc.SSARegNum(v.Args[1]) 1020 gc.AddAux(&p.To, v) 1021 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 1022 if gc.SSARegNum(v.Args[1]) != x86.REG_AX { 1023 v.Fatalf("input[1] not in AX %s", v.LongString()) 1024 } 1025 gc.Prog(x86.ALOCK) 1026 p := gc.Prog(v.Op.Asm()) 1027 p.From.Type = obj.TYPE_REG 1028 p.From.Reg = gc.SSARegNum(v.Args[2]) 1029 p.To.Type = obj.TYPE_MEM 1030 p.To.Reg = gc.SSARegNum(v.Args[0]) 1031 gc.AddAux(&p.To, v) 1032 p = gc.Prog(x86.ASETEQ) 1033 p.To.Type = obj.TYPE_REG 1034 p.To.Reg = gc.SSARegNum0(v) 1035 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: 1036 gc.Prog(x86.ALOCK) 1037 p := gc.Prog(v.Op.Asm()) 1038 p.From.Type = obj.TYPE_REG 1039 p.From.Reg = gc.SSARegNum(v.Args[1]) 1040 p.To.Type = obj.TYPE_MEM 1041 p.To.Reg = gc.SSARegNum(v.Args[0]) 1042 gc.AddAux(&p.To, v) 1043 default: 1044 v.Unimplementedf("genValue not implemented: %s", v.LongString()) 1045 } 1046 } 1047 1048 var blockJump = [...]struct { 1049 asm, invasm obj.As 1050 }{ 1051 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1052 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1053 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1054 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1055 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1056 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1057 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1058 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1059 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1060 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1061 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1062 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1063 } 1064 1065 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 1066 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1067 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1068 } 1069 var nefJumps = [2][2]gc.FloatingEQNEJump{ 1070 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1071 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1072 } 1073 1074 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1075 s.SetLineno(b.Line) 1076 1077 switch b.Kind { 1078 case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck: 1079 if b.Succs[0].Block() != next { 1080 p := gc.Prog(obj.AJMP) 1081 p.To.Type = obj.TYPE_BRANCH 1082 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1083 } 1084 case ssa.BlockDefer: 1085 // defer returns in rax: 1086 // 0 if we should continue executing 1087 // 1 if we should jump to deferreturn call 1088 p := gc.Prog(x86.ATESTL) 1089 p.From.Type = obj.TYPE_REG 1090 p.From.Reg = x86.REG_AX 1091 p.To.Type = obj.TYPE_REG 1092 p.To.Reg = x86.REG_AX 1093 p = gc.Prog(x86.AJNE) 1094 p.To.Type = obj.TYPE_BRANCH 1095 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1096 if b.Succs[0].Block() != next { 1097 p := gc.Prog(obj.AJMP) 1098 p.To.Type = obj.TYPE_BRANCH 1099 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1100 } 1101 case ssa.BlockExit: 1102 gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1103 case ssa.BlockRet: 1104 gc.Prog(obj.ARET) 1105 case ssa.BlockRetJmp: 1106 p := gc.Prog(obj.AJMP) 1107 p.To.Type = obj.TYPE_MEM 1108 p.To.Name = obj.NAME_EXTERN 1109 p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) 1110 1111 case ssa.BlockAMD64EQF: 1112 gc.SSAGenFPJump(s, b, next, &eqfJumps) 1113 1114 case ssa.BlockAMD64NEF: 1115 gc.SSAGenFPJump(s, b, next, &nefJumps) 1116 1117 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1118 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1119 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1120 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1121 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1122 jmp := blockJump[b.Kind] 1123 likely := b.Likely 1124 var p *obj.Prog 1125 switch next { 1126 case b.Succs[0].Block(): 1127 p = gc.Prog(jmp.invasm) 1128 likely *= -1 1129 p.To.Type = obj.TYPE_BRANCH 1130 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1131 case b.Succs[1].Block(): 1132 p = gc.Prog(jmp.asm) 1133 p.To.Type = obj.TYPE_BRANCH 1134 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1135 default: 1136 p = gc.Prog(jmp.asm) 1137 p.To.Type = obj.TYPE_BRANCH 1138 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1139 q := gc.Prog(obj.AJMP) 1140 q.To.Type = obj.TYPE_BRANCH 1141 s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) 1142 } 1143 1144 // liblink reorders the instruction stream as it sees fit. 1145 // Pass along what we know so liblink can make use of it. 1146 // TODO: Once we've fully switched to SSA, 1147 // make liblink leave our output alone. 1148 switch likely { 1149 case ssa.BranchUnlikely: 1150 p.From.Type = obj.TYPE_CONST 1151 p.From.Offset = 0 1152 case ssa.BranchLikely: 1153 p.From.Type = obj.TYPE_CONST 1154 p.From.Offset = 1 1155 } 1156 1157 default: 1158 b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1159 } 1160 }