github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/internal/obj" 14 "cmd/internal/obj/x86" 15 ) 16 17 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 18 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 19 flive := b.FlagsLiveAtEnd 20 if b.Control != nil && b.Control.Type.IsFlags() { 21 flive = true 22 } 23 for i := len(b.Values) - 1; i >= 0; i-- { 24 v := b.Values[i] 25 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 26 // The "mark" is any non-nil Aux value. 27 v.Aux = v 28 } 29 if v.Type.IsFlags() { 30 flive = false 31 } 32 for _, a := range v.Args { 33 if a.Type.IsFlags() { 34 flive = true 35 } 36 } 37 } 38 } 39 40 // loadByType returns the load instruction of the given type. 41 func loadByType(t ssa.Type) obj.As { 42 // Avoid partial register write 43 if !t.IsFloat() && t.Size() <= 2 { 44 if t.Size() == 1 { 45 return x86.AMOVBLZX 46 } else { 47 return x86.AMOVWLZX 48 } 49 } 50 // Otherwise, there's no difference between load and store opcodes. 51 return storeByType(t) 52 } 53 54 // storeByType returns the store instruction of the given type. 55 func storeByType(t ssa.Type) obj.As { 56 width := t.Size() 57 if t.IsFloat() { 58 switch width { 59 case 4: 60 return x86.AMOVSS 61 case 8: 62 return x86.AMOVSD 63 } 64 } else { 65 switch width { 66 case 1: 67 return x86.AMOVB 68 case 2: 69 return x86.AMOVW 70 case 4: 71 return x86.AMOVL 72 case 8: 73 return x86.AMOVQ 74 } 75 } 76 panic("bad store type") 77 } 78 79 // moveByType returns the reg->reg move instruction of the given type. 80 func moveByType(t ssa.Type) obj.As { 81 if t.IsFloat() { 82 // Moving the whole sse2 register is faster 83 // than moving just the correct low portion of it. 84 // There is no xmm->xmm move with 1 byte opcode, 85 // so use movups, which has 2 byte opcode. 86 return x86.AMOVUPS 87 } else { 88 switch t.Size() { 89 case 1: 90 // Avoids partial register write 91 return x86.AMOVL 92 case 2: 93 return x86.AMOVL 94 case 4: 95 return x86.AMOVL 96 case 8: 97 return x86.AMOVQ 98 case 16: 99 return x86.AMOVUPS // int128s are in SSE registers 100 default: 101 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 102 } 103 } 104 } 105 106 // opregreg emits instructions for 107 // dest := dest(To) op src(From) 108 // and also returns the created obj.Prog so it 109 // may be further adjusted (offset, scale, etc). 110 func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog { 111 p := s.Prog(op) 112 p.From.Type = obj.TYPE_REG 113 p.To.Type = obj.TYPE_REG 114 p.To.Reg = dest 115 p.From.Reg = src 116 return p 117 } 118 119 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD, 120 // See runtime/mkduff.go. 121 func duffStart(size int64) int64 { 122 x, _ := duff(size) 123 return x 124 } 125 func duffAdj(size int64) int64 { 126 _, x := duff(size) 127 return x 128 } 129 130 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 131 // required to use the duffzero mechanism for a block of the given size. 132 func duff(size int64) (int64, int64) { 133 if size < 32 || size > 1024 || size%dzClearStep != 0 { 134 panic("bad duffzero size") 135 } 136 steps := size / dzClearStep 137 blocks := steps / dzBlockLen 138 steps %= dzBlockLen 139 off := dzBlockSize * (dzBlocks - blocks) 140 var adj int64 141 if steps != 0 { 142 off -= dzAddSize 143 off -= dzMovSize * steps 144 adj -= dzClearStep * (dzBlockLen - steps) 145 } 146 return off, adj 147 } 148 149 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 150 switch v.Op { 151 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 152 r := v.Reg() 153 r1 := v.Args[0].Reg() 154 r2 := v.Args[1].Reg() 155 switch { 156 case r == r1: 157 p := s.Prog(v.Op.Asm()) 158 p.From.Type = obj.TYPE_REG 159 p.From.Reg = r2 160 p.To.Type = obj.TYPE_REG 161 p.To.Reg = r 162 case r == r2: 163 p := s.Prog(v.Op.Asm()) 164 p.From.Type = obj.TYPE_REG 165 p.From.Reg = r1 166 p.To.Type = obj.TYPE_REG 167 p.To.Reg = r 168 default: 169 var asm obj.As 170 if v.Op == ssa.OpAMD64ADDQ { 171 asm = x86.ALEAQ 172 } else { 173 asm = x86.ALEAL 174 } 175 p := s.Prog(asm) 176 p.From.Type = obj.TYPE_MEM 177 p.From.Reg = r1 178 p.From.Scale = 1 179 p.From.Index = r2 180 p.To.Type = obj.TYPE_REG 181 p.To.Reg = r 182 } 183 // 2-address opcode arithmetic 184 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 185 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 186 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 187 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 188 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 189 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 190 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 191 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 192 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 193 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 194 ssa.OpAMD64PXOR: 195 r := v.Reg() 196 if r != v.Args[0].Reg() { 197 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 198 } 199 opregreg(s, v.Op.Asm(), r, v.Args[1].Reg()) 200 201 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 202 // Arg[0] (the dividend) is in AX. 203 // Arg[1] (the divisor) can be in any other register. 204 // Result[0] (the quotient) is in AX. 205 // Result[1] (the remainder) is in DX. 206 r := v.Args[1].Reg() 207 208 // Zero extend dividend. 209 c := s.Prog(x86.AXORL) 210 c.From.Type = obj.TYPE_REG 211 c.From.Reg = x86.REG_DX 212 c.To.Type = obj.TYPE_REG 213 c.To.Reg = x86.REG_DX 214 215 // Issue divide. 216 p := s.Prog(v.Op.Asm()) 217 p.From.Type = obj.TYPE_REG 218 p.From.Reg = r 219 220 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 221 // Arg[0] (the dividend) is in AX. 222 // Arg[1] (the divisor) can be in any other register. 223 // Result[0] (the quotient) is in AX. 224 // Result[1] (the remainder) is in DX. 225 r := v.Args[1].Reg() 226 227 // CPU faults upon signed overflow, which occurs when the most 228 // negative int is divided by -1. Handle divide by -1 as a special case. 229 var c *obj.Prog 230 switch v.Op { 231 case ssa.OpAMD64DIVQ: 232 c = s.Prog(x86.ACMPQ) 233 case ssa.OpAMD64DIVL: 234 c = s.Prog(x86.ACMPL) 235 case ssa.OpAMD64DIVW: 236 c = s.Prog(x86.ACMPW) 237 } 238 c.From.Type = obj.TYPE_REG 239 c.From.Reg = r 240 c.To.Type = obj.TYPE_CONST 241 c.To.Offset = -1 242 j1 := s.Prog(x86.AJEQ) 243 j1.To.Type = obj.TYPE_BRANCH 244 245 // Sign extend dividend. 246 switch v.Op { 247 case ssa.OpAMD64DIVQ: 248 s.Prog(x86.ACQO) 249 case ssa.OpAMD64DIVL: 250 s.Prog(x86.ACDQ) 251 case ssa.OpAMD64DIVW: 252 s.Prog(x86.ACWD) 253 } 254 255 // Issue divide. 256 p := s.Prog(v.Op.Asm()) 257 p.From.Type = obj.TYPE_REG 258 p.From.Reg = r 259 260 // Skip over -1 fixup code. 261 j2 := s.Prog(obj.AJMP) 262 j2.To.Type = obj.TYPE_BRANCH 263 264 // Issue -1 fixup code. 265 // n / -1 = -n 266 n1 := s.Prog(x86.ANEGQ) 267 n1.To.Type = obj.TYPE_REG 268 n1.To.Reg = x86.REG_AX 269 270 // n % -1 == 0 271 n2 := s.Prog(x86.AXORL) 272 n2.From.Type = obj.TYPE_REG 273 n2.From.Reg = x86.REG_DX 274 n2.To.Type = obj.TYPE_REG 275 n2.To.Reg = x86.REG_DX 276 277 // TODO(khr): issue only the -1 fixup code we need. 278 // For instance, if only the quotient is used, no point in zeroing the remainder. 279 280 j1.To.Val = n1 281 j2.To.Val = s.Pc() 282 283 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 284 // the frontend rewrites constant division by 8/16/32 bit integers into 285 // HMUL by a constant 286 // SSA rewrites generate the 64 bit versions 287 288 // Arg[0] is already in AX as it's the only register we allow 289 // and DX is the only output we care about (the high bits) 290 p := s.Prog(v.Op.Asm()) 291 p.From.Type = obj.TYPE_REG 292 p.From.Reg = v.Args[1].Reg() 293 294 // IMULB puts the high portion in AH instead of DL, 295 // so move it to DL for consistency 296 if v.Type.Size() == 1 { 297 m := s.Prog(x86.AMOVB) 298 m.From.Type = obj.TYPE_REG 299 m.From.Reg = x86.REG_AH 300 m.To.Type = obj.TYPE_REG 301 m.To.Reg = x86.REG_DX 302 } 303 304 case ssa.OpAMD64MULQU2: 305 // Arg[0] is already in AX as it's the only register we allow 306 // results hi in DX, lo in AX 307 p := s.Prog(v.Op.Asm()) 308 p.From.Type = obj.TYPE_REG 309 p.From.Reg = v.Args[1].Reg() 310 311 case ssa.OpAMD64DIVQU2: 312 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 313 // results q in AX, r in DX 314 p := s.Prog(v.Op.Asm()) 315 p.From.Type = obj.TYPE_REG 316 p.From.Reg = v.Args[2].Reg() 317 318 case ssa.OpAMD64AVGQU: 319 // compute (x+y)/2 unsigned. 320 // Do a 64-bit add, the overflow goes into the carry. 321 // Shift right once and pull the carry back into the 63rd bit. 322 r := v.Reg() 323 if r != v.Args[0].Reg() { 324 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 325 } 326 p := s.Prog(x86.AADDQ) 327 p.From.Type = obj.TYPE_REG 328 p.To.Type = obj.TYPE_REG 329 p.To.Reg = r 330 p.From.Reg = v.Args[1].Reg() 331 p = s.Prog(x86.ARCRQ) 332 p.From.Type = obj.TYPE_CONST 333 p.From.Offset = 1 334 p.To.Type = obj.TYPE_REG 335 p.To.Reg = r 336 337 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 338 r := v.Reg() 339 a := v.Args[0].Reg() 340 if r == a { 341 if v.AuxInt == 1 { 342 var asm obj.As 343 // Software optimization manual recommends add $1,reg. 344 // But inc/dec is 1 byte smaller. ICC always uses inc 345 // Clang/GCC choose depending on flags, but prefer add. 346 // Experiments show that inc/dec is both a little faster 347 // and make a binary a little smaller. 348 if v.Op == ssa.OpAMD64ADDQconst { 349 asm = x86.AINCQ 350 } else { 351 asm = x86.AINCL 352 } 353 p := s.Prog(asm) 354 p.To.Type = obj.TYPE_REG 355 p.To.Reg = r 356 return 357 } 358 if v.AuxInt == -1 { 359 var asm obj.As 360 if v.Op == ssa.OpAMD64ADDQconst { 361 asm = x86.ADECQ 362 } else { 363 asm = x86.ADECL 364 } 365 p := s.Prog(asm) 366 p.To.Type = obj.TYPE_REG 367 p.To.Reg = r 368 return 369 } 370 p := s.Prog(v.Op.Asm()) 371 p.From.Type = obj.TYPE_CONST 372 p.From.Offset = v.AuxInt 373 p.To.Type = obj.TYPE_REG 374 p.To.Reg = r 375 return 376 } 377 var asm obj.As 378 if v.Op == ssa.OpAMD64ADDQconst { 379 asm = x86.ALEAQ 380 } else { 381 asm = x86.ALEAL 382 } 383 p := s.Prog(asm) 384 p.From.Type = obj.TYPE_MEM 385 p.From.Reg = a 386 p.From.Offset = v.AuxInt 387 p.To.Type = obj.TYPE_REG 388 p.To.Reg = r 389 390 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ: 391 r := v.Reg() 392 if r != v.Args[0].Reg() { 393 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 394 } 395 p := s.Prog(v.Op.Asm()) 396 p.From.Type = obj.TYPE_REG 397 p.From.Reg = v.Args[1].Reg() 398 p.To.Type = obj.TYPE_REG 399 p.To.Reg = r 400 401 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 402 r := v.Reg() 403 if r != v.Args[0].Reg() { 404 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 405 } 406 p := s.Prog(v.Op.Asm()) 407 p.From.Type = obj.TYPE_CONST 408 p.From.Offset = v.AuxInt 409 p.To.Type = obj.TYPE_REG 410 p.To.Reg = r 411 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 412 // then we don't need to use resultInArg0 for these ops. 413 //p.From3 = new(obj.Addr) 414 //p.From3.Type = obj.TYPE_REG 415 //p.From3.Reg = v.Args[0].Reg() 416 417 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 418 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 419 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 420 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 421 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 422 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 423 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 424 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 425 r := v.Reg() 426 if r != v.Args[0].Reg() { 427 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 428 } 429 p := s.Prog(v.Op.Asm()) 430 p.From.Type = obj.TYPE_CONST 431 p.From.Offset = v.AuxInt 432 p.To.Type = obj.TYPE_REG 433 p.To.Reg = r 434 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 435 r := v.Reg() 436 p := s.Prog(v.Op.Asm()) 437 p.From.Type = obj.TYPE_REG 438 p.From.Reg = r 439 p.To.Type = obj.TYPE_REG 440 p.To.Reg = r 441 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 442 r := v.Args[0].Reg() 443 i := v.Args[1].Reg() 444 p := s.Prog(x86.ALEAQ) 445 switch v.Op { 446 case ssa.OpAMD64LEAQ1: 447 p.From.Scale = 1 448 if i == x86.REG_SP { 449 r, i = i, r 450 } 451 case ssa.OpAMD64LEAQ2: 452 p.From.Scale = 2 453 case ssa.OpAMD64LEAQ4: 454 p.From.Scale = 4 455 case ssa.OpAMD64LEAQ8: 456 p.From.Scale = 8 457 } 458 p.From.Type = obj.TYPE_MEM 459 p.From.Reg = r 460 p.From.Index = i 461 gc.AddAux(&p.From, v) 462 p.To.Type = obj.TYPE_REG 463 p.To.Reg = v.Reg() 464 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL: 465 p := s.Prog(v.Op.Asm()) 466 p.From.Type = obj.TYPE_MEM 467 p.From.Reg = v.Args[0].Reg() 468 gc.AddAux(&p.From, v) 469 p.To.Type = obj.TYPE_REG 470 p.To.Reg = v.Reg() 471 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 472 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 473 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 474 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 475 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 476 // Go assembler has swapped operands for UCOMISx relative to CMP, 477 // must account for that right here. 478 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 479 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 480 p := s.Prog(v.Op.Asm()) 481 p.From.Type = obj.TYPE_REG 482 p.From.Reg = v.Args[0].Reg() 483 p.To.Type = obj.TYPE_CONST 484 p.To.Offset = v.AuxInt 485 case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 486 ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst: 487 p := s.Prog(v.Op.Asm()) 488 p.From.Type = obj.TYPE_CONST 489 p.From.Offset = v.AuxInt 490 p.To.Type = obj.TYPE_REG 491 p.To.Reg = v.Args[0].Reg() 492 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 493 x := v.Reg() 494 p := s.Prog(v.Op.Asm()) 495 p.From.Type = obj.TYPE_CONST 496 p.From.Offset = v.AuxInt 497 p.To.Type = obj.TYPE_REG 498 p.To.Reg = x 499 // If flags are live at this instruction, suppress the 500 // MOV $0,AX -> XOR AX,AX optimization. 501 if v.Aux != nil { 502 p.Mark |= x86.PRESERVEFLAGS 503 } 504 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 505 x := v.Reg() 506 p := s.Prog(v.Op.Asm()) 507 p.From.Type = obj.TYPE_FCONST 508 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 509 p.To.Type = obj.TYPE_REG 510 p.To.Reg = x 511 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 512 p := s.Prog(v.Op.Asm()) 513 p.From.Type = obj.TYPE_MEM 514 p.From.Reg = v.Args[0].Reg() 515 gc.AddAux(&p.From, v) 516 p.To.Type = obj.TYPE_REG 517 p.To.Reg = v.Reg() 518 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: 519 p := s.Prog(v.Op.Asm()) 520 p.From.Type = obj.TYPE_MEM 521 p.From.Reg = v.Args[0].Reg() 522 gc.AddAux(&p.From, v) 523 p.From.Scale = 8 524 p.From.Index = v.Args[1].Reg() 525 p.To.Type = obj.TYPE_REG 526 p.To.Reg = v.Reg() 527 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 528 p := s.Prog(v.Op.Asm()) 529 p.From.Type = obj.TYPE_MEM 530 p.From.Reg = v.Args[0].Reg() 531 gc.AddAux(&p.From, v) 532 p.From.Scale = 4 533 p.From.Index = v.Args[1].Reg() 534 p.To.Type = obj.TYPE_REG 535 p.To.Reg = v.Reg() 536 case ssa.OpAMD64MOVWloadidx2: 537 p := s.Prog(v.Op.Asm()) 538 p.From.Type = obj.TYPE_MEM 539 p.From.Reg = v.Args[0].Reg() 540 gc.AddAux(&p.From, v) 541 p.From.Scale = 2 542 p.From.Index = v.Args[1].Reg() 543 p.To.Type = obj.TYPE_REG 544 p.To.Reg = v.Reg() 545 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 546 r := v.Args[0].Reg() 547 i := v.Args[1].Reg() 548 if i == x86.REG_SP { 549 r, i = i, r 550 } 551 p := s.Prog(v.Op.Asm()) 552 p.From.Type = obj.TYPE_MEM 553 p.From.Reg = r 554 p.From.Scale = 1 555 p.From.Index = i 556 gc.AddAux(&p.From, v) 557 p.To.Type = obj.TYPE_REG 558 p.To.Reg = v.Reg() 559 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: 560 p := s.Prog(v.Op.Asm()) 561 p.From.Type = obj.TYPE_REG 562 p.From.Reg = v.Args[1].Reg() 563 p.To.Type = obj.TYPE_MEM 564 p.To.Reg = v.Args[0].Reg() 565 gc.AddAux(&p.To, v) 566 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: 567 p := s.Prog(v.Op.Asm()) 568 p.From.Type = obj.TYPE_REG 569 p.From.Reg = v.Args[2].Reg() 570 p.To.Type = obj.TYPE_MEM 571 p.To.Reg = v.Args[0].Reg() 572 p.To.Scale = 8 573 p.To.Index = v.Args[1].Reg() 574 gc.AddAux(&p.To, v) 575 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 576 p := s.Prog(v.Op.Asm()) 577 p.From.Type = obj.TYPE_REG 578 p.From.Reg = v.Args[2].Reg() 579 p.To.Type = obj.TYPE_MEM 580 p.To.Reg = v.Args[0].Reg() 581 p.To.Scale = 4 582 p.To.Index = v.Args[1].Reg() 583 gc.AddAux(&p.To, v) 584 case ssa.OpAMD64MOVWstoreidx2: 585 p := s.Prog(v.Op.Asm()) 586 p.From.Type = obj.TYPE_REG 587 p.From.Reg = v.Args[2].Reg() 588 p.To.Type = obj.TYPE_MEM 589 p.To.Reg = v.Args[0].Reg() 590 p.To.Scale = 2 591 p.To.Index = v.Args[1].Reg() 592 gc.AddAux(&p.To, v) 593 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 594 r := v.Args[0].Reg() 595 i := v.Args[1].Reg() 596 if i == x86.REG_SP { 597 r, i = i, r 598 } 599 p := s.Prog(v.Op.Asm()) 600 p.From.Type = obj.TYPE_REG 601 p.From.Reg = v.Args[2].Reg() 602 p.To.Type = obj.TYPE_MEM 603 p.To.Reg = r 604 p.To.Scale = 1 605 p.To.Index = i 606 gc.AddAux(&p.To, v) 607 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 608 p := s.Prog(v.Op.Asm()) 609 p.From.Type = obj.TYPE_CONST 610 sc := v.AuxValAndOff() 611 p.From.Offset = sc.Val() 612 p.To.Type = obj.TYPE_MEM 613 p.To.Reg = v.Args[0].Reg() 614 gc.AddAux2(&p.To, v, sc.Off()) 615 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 616 p := s.Prog(v.Op.Asm()) 617 p.From.Type = obj.TYPE_CONST 618 sc := v.AuxValAndOff() 619 p.From.Offset = sc.Val() 620 r := v.Args[0].Reg() 621 i := v.Args[1].Reg() 622 switch v.Op { 623 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 624 p.To.Scale = 1 625 if i == x86.REG_SP { 626 r, i = i, r 627 } 628 case ssa.OpAMD64MOVWstoreconstidx2: 629 p.To.Scale = 2 630 case ssa.OpAMD64MOVLstoreconstidx4: 631 p.To.Scale = 4 632 case ssa.OpAMD64MOVQstoreconstidx8: 633 p.To.Scale = 8 634 } 635 p.To.Type = obj.TYPE_MEM 636 p.To.Reg = r 637 p.To.Index = i 638 gc.AddAux2(&p.To, v, sc.Off()) 639 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 640 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 641 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 642 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 643 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 644 r := v.Reg() 645 // Break false dependency on destination register. 646 opregreg(s, x86.AXORPS, r, r) 647 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 648 case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem, 649 ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem, 650 ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem, 651 ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem: 652 p := s.Prog(v.Op.Asm()) 653 p.From.Type = obj.TYPE_MEM 654 p.From.Reg = v.Args[1].Reg() 655 gc.AddAux(&p.From, v) 656 p.To.Type = obj.TYPE_REG 657 p.To.Reg = v.Reg() 658 if v.Reg() != v.Args[0].Reg() { 659 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 660 } 661 case ssa.OpAMD64DUFFZERO: 662 off := duffStart(v.AuxInt) 663 adj := duffAdj(v.AuxInt) 664 var p *obj.Prog 665 if adj != 0 { 666 p = s.Prog(x86.AADDQ) 667 p.From.Type = obj.TYPE_CONST 668 p.From.Offset = adj 669 p.To.Type = obj.TYPE_REG 670 p.To.Reg = x86.REG_DI 671 } 672 p = s.Prog(obj.ADUFFZERO) 673 p.To.Type = obj.TYPE_ADDR 674 p.To.Sym = gc.Duffzero 675 p.To.Offset = off 676 case ssa.OpAMD64MOVOconst: 677 if v.AuxInt != 0 { 678 v.Fatalf("MOVOconst can only do constant=0") 679 } 680 r := v.Reg() 681 opregreg(s, x86.AXORPS, r, r) 682 case ssa.OpAMD64DUFFCOPY: 683 p := s.Prog(obj.ADUFFCOPY) 684 p.To.Type = obj.TYPE_ADDR 685 p.To.Sym = gc.Duffcopy 686 p.To.Offset = v.AuxInt 687 688 case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 689 if v.Type.IsMemory() { 690 return 691 } 692 x := v.Args[0].Reg() 693 y := v.Reg() 694 if x != y { 695 opregreg(s, moveByType(v.Type), y, x) 696 } 697 case ssa.OpLoadReg: 698 if v.Type.IsFlags() { 699 v.Fatalf("load flags not implemented: %v", v.LongString()) 700 return 701 } 702 p := s.Prog(loadByType(v.Type)) 703 gc.AddrAuto(&p.From, v.Args[0]) 704 p.To.Type = obj.TYPE_REG 705 p.To.Reg = v.Reg() 706 707 case ssa.OpStoreReg: 708 if v.Type.IsFlags() { 709 v.Fatalf("store flags not implemented: %v", v.LongString()) 710 return 711 } 712 p := s.Prog(storeByType(v.Type)) 713 p.From.Type = obj.TYPE_REG 714 p.From.Reg = v.Args[0].Reg() 715 gc.AddrAuto(&p.To, v) 716 case ssa.OpAMD64LoweredGetClosurePtr: 717 // Closure pointer is DX. 718 gc.CheckLoweredGetClosurePtr(v) 719 case ssa.OpAMD64LoweredGetG: 720 r := v.Reg() 721 // See the comments in cmd/internal/obj/x86/obj6.go 722 // near CanUse1InsnTLS for a detailed explanation of these instructions. 723 if x86.CanUse1InsnTLS(gc.Ctxt) { 724 // MOVQ (TLS), r 725 p := s.Prog(x86.AMOVQ) 726 p.From.Type = obj.TYPE_MEM 727 p.From.Reg = x86.REG_TLS 728 p.To.Type = obj.TYPE_REG 729 p.To.Reg = r 730 } else { 731 // MOVQ TLS, r 732 // MOVQ (r)(TLS*1), r 733 p := s.Prog(x86.AMOVQ) 734 p.From.Type = obj.TYPE_REG 735 p.From.Reg = x86.REG_TLS 736 p.To.Type = obj.TYPE_REG 737 p.To.Reg = r 738 q := s.Prog(x86.AMOVQ) 739 q.From.Type = obj.TYPE_MEM 740 q.From.Reg = r 741 q.From.Index = x86.REG_TLS 742 q.From.Scale = 1 743 q.To.Type = obj.TYPE_REG 744 q.To.Reg = r 745 } 746 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 747 s.Call(v) 748 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 749 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 750 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 751 r := v.Reg() 752 if r != v.Args[0].Reg() { 753 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 754 } 755 p := s.Prog(v.Op.Asm()) 756 p.To.Type = obj.TYPE_REG 757 p.To.Reg = r 758 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL: 759 p := s.Prog(v.Op.Asm()) 760 p.From.Type = obj.TYPE_REG 761 p.From.Reg = v.Args[0].Reg() 762 p.To.Type = obj.TYPE_REG 763 p.To.Reg = v.Reg0() 764 case ssa.OpAMD64SQRTSD: 765 p := s.Prog(v.Op.Asm()) 766 p.From.Type = obj.TYPE_REG 767 p.From.Reg = v.Args[0].Reg() 768 p.To.Type = obj.TYPE_REG 769 p.To.Reg = v.Reg() 770 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL: 771 if v.Args[0].Reg() != v.Reg() { 772 // POPCNT on Intel has a false dependency on the destination register. 773 // Zero the destination to break the dependency. 774 p := s.Prog(x86.AMOVQ) 775 p.From.Type = obj.TYPE_CONST 776 p.From.Offset = 0 777 p.To.Type = obj.TYPE_REG 778 p.To.Reg = v.Reg() 779 } 780 p := s.Prog(v.Op.Asm()) 781 p.From.Type = obj.TYPE_REG 782 p.From.Reg = v.Args[0].Reg() 783 p.To.Type = obj.TYPE_REG 784 p.To.Reg = v.Reg() 785 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 786 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 787 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 788 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 789 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 790 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 791 ssa.OpAMD64SETA, ssa.OpAMD64SETAE: 792 p := s.Prog(v.Op.Asm()) 793 p.To.Type = obj.TYPE_REG 794 p.To.Reg = v.Reg() 795 796 case ssa.OpAMD64SETNEF: 797 p := s.Prog(v.Op.Asm()) 798 p.To.Type = obj.TYPE_REG 799 p.To.Reg = v.Reg() 800 q := s.Prog(x86.ASETPS) 801 q.To.Type = obj.TYPE_REG 802 q.To.Reg = x86.REG_AX 803 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 804 opregreg(s, x86.AORL, v.Reg(), x86.REG_AX) 805 806 case ssa.OpAMD64SETEQF: 807 p := s.Prog(v.Op.Asm()) 808 p.To.Type = obj.TYPE_REG 809 p.To.Reg = v.Reg() 810 q := s.Prog(x86.ASETPC) 811 q.To.Type = obj.TYPE_REG 812 q.To.Reg = x86.REG_AX 813 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 814 opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX) 815 816 case ssa.OpAMD64InvertFlags: 817 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 818 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 819 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 820 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 821 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 822 case ssa.OpAMD64REPSTOSQ: 823 s.Prog(x86.AREP) 824 s.Prog(x86.ASTOSQ) 825 case ssa.OpAMD64REPMOVSQ: 826 s.Prog(x86.AREP) 827 s.Prog(x86.AMOVSQ) 828 case ssa.OpAMD64LoweredNilCheck: 829 // Issue a load which will fault if the input is nil. 830 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 831 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 832 // but it doesn't have false dependency on AX. 833 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 834 // That trades clobbering flags for clobbering a register. 835 p := s.Prog(x86.ATESTB) 836 p.From.Type = obj.TYPE_REG 837 p.From.Reg = x86.REG_AX 838 p.To.Type = obj.TYPE_MEM 839 p.To.Reg = v.Args[0].Reg() 840 gc.AddAux(&p.To, v) 841 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 842 gc.Warnl(v.Pos, "generated nil check") 843 } 844 case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 845 p := s.Prog(v.Op.Asm()) 846 p.From.Type = obj.TYPE_MEM 847 p.From.Reg = v.Args[0].Reg() 848 gc.AddAux(&p.From, v) 849 p.To.Type = obj.TYPE_REG 850 p.To.Reg = v.Reg0() 851 case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 852 r := v.Reg0() 853 if r != v.Args[0].Reg() { 854 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 855 } 856 p := s.Prog(v.Op.Asm()) 857 p.From.Type = obj.TYPE_REG 858 p.From.Reg = r 859 p.To.Type = obj.TYPE_MEM 860 p.To.Reg = v.Args[1].Reg() 861 gc.AddAux(&p.To, v) 862 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 863 r := v.Reg0() 864 if r != v.Args[0].Reg() { 865 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 866 } 867 s.Prog(x86.ALOCK) 868 p := s.Prog(v.Op.Asm()) 869 p.From.Type = obj.TYPE_REG 870 p.From.Reg = r 871 p.To.Type = obj.TYPE_MEM 872 p.To.Reg = v.Args[1].Reg() 873 gc.AddAux(&p.To, v) 874 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 875 if v.Args[1].Reg() != x86.REG_AX { 876 v.Fatalf("input[1] not in AX %s", v.LongString()) 877 } 878 s.Prog(x86.ALOCK) 879 p := s.Prog(v.Op.Asm()) 880 p.From.Type = obj.TYPE_REG 881 p.From.Reg = v.Args[2].Reg() 882 p.To.Type = obj.TYPE_MEM 883 p.To.Reg = v.Args[0].Reg() 884 gc.AddAux(&p.To, v) 885 p = s.Prog(x86.ASETEQ) 886 p.To.Type = obj.TYPE_REG 887 p.To.Reg = v.Reg0() 888 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: 889 s.Prog(x86.ALOCK) 890 p := s.Prog(v.Op.Asm()) 891 p.From.Type = obj.TYPE_REG 892 p.From.Reg = v.Args[1].Reg() 893 p.To.Type = obj.TYPE_MEM 894 p.To.Reg = v.Args[0].Reg() 895 gc.AddAux(&p.To, v) 896 default: 897 v.Fatalf("genValue not implemented: %s", v.LongString()) 898 } 899 } 900 901 var blockJump = [...]struct { 902 asm, invasm obj.As 903 }{ 904 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 905 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 906 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 907 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 908 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 909 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 910 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 911 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 912 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 913 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 914 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 915 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 916 } 917 918 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 919 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 920 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 921 } 922 var nefJumps = [2][2]gc.FloatingEQNEJump{ 923 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 924 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 925 } 926 927 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 928 switch b.Kind { 929 case ssa.BlockPlain: 930 if b.Succs[0].Block() != next { 931 p := s.Prog(obj.AJMP) 932 p.To.Type = obj.TYPE_BRANCH 933 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 934 } 935 case ssa.BlockDefer: 936 // defer returns in rax: 937 // 0 if we should continue executing 938 // 1 if we should jump to deferreturn call 939 p := s.Prog(x86.ATESTL) 940 p.From.Type = obj.TYPE_REG 941 p.From.Reg = x86.REG_AX 942 p.To.Type = obj.TYPE_REG 943 p.To.Reg = x86.REG_AX 944 p = s.Prog(x86.AJNE) 945 p.To.Type = obj.TYPE_BRANCH 946 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 947 if b.Succs[0].Block() != next { 948 p := s.Prog(obj.AJMP) 949 p.To.Type = obj.TYPE_BRANCH 950 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 951 } 952 case ssa.BlockExit: 953 s.Prog(obj.AUNDEF) // tell plive.go that we never reach here 954 case ssa.BlockRet: 955 s.Prog(obj.ARET) 956 case ssa.BlockRetJmp: 957 p := s.Prog(obj.AJMP) 958 p.To.Type = obj.TYPE_MEM 959 p.To.Name = obj.NAME_EXTERN 960 p.To.Sym = b.Aux.(*obj.LSym) 961 962 case ssa.BlockAMD64EQF: 963 s.FPJump(b, next, &eqfJumps) 964 965 case ssa.BlockAMD64NEF: 966 s.FPJump(b, next, &nefJumps) 967 968 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 969 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 970 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 971 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 972 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 973 jmp := blockJump[b.Kind] 974 var p *obj.Prog 975 switch next { 976 case b.Succs[0].Block(): 977 p = s.Prog(jmp.invasm) 978 p.To.Type = obj.TYPE_BRANCH 979 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 980 case b.Succs[1].Block(): 981 p = s.Prog(jmp.asm) 982 p.To.Type = obj.TYPE_BRANCH 983 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 984 default: 985 p = s.Prog(jmp.asm) 986 p.To.Type = obj.TYPE_BRANCH 987 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 988 q := s.Prog(obj.AJMP) 989 q.To.Type = obj.TYPE_BRANCH 990 s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) 991 } 992 993 default: 994 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 995 } 996 }