github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "github.com/gagliardetto/golang-go/cmd/compile/internal/gc" 12 "github.com/gagliardetto/golang-go/cmd/compile/internal/logopt" 13 "github.com/gagliardetto/golang-go/cmd/compile/internal/ssa" 14 "github.com/gagliardetto/golang-go/cmd/compile/internal/types" 15 "github.com/gagliardetto/golang-go/cmd/internal/obj" 16 "github.com/gagliardetto/golang-go/cmd/internal/obj/x86" 17 ) 18 19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 21 flive := b.FlagsLiveAtEnd 22 for _, c := range b.ControlValues() { 23 flive = c.Type.IsFlags() || flive 24 } 25 for i := len(b.Values) - 1; i >= 0; i-- { 26 v := b.Values[i] 27 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 28 // The "mark" is any non-nil Aux value. 29 v.Aux = v 30 } 31 if v.Type.IsFlags() { 32 flive = false 33 } 34 for _, a := range v.Args { 35 if a.Type.IsFlags() { 36 flive = true 37 } 38 } 39 } 40 } 41 42 // loadByType returns the load instruction of the given type. 43 func loadByType(t *types.Type) obj.As { 44 // Avoid partial register write 45 if !t.IsFloat() && t.Size() <= 2 { 46 if t.Size() == 1 { 47 return x86.AMOVBLZX 48 } else { 49 return x86.AMOVWLZX 50 } 51 } 52 // Otherwise, there's no difference between load and store opcodes. 53 return storeByType(t) 54 } 55 56 // storeByType returns the store instruction of the given type. 57 func storeByType(t *types.Type) obj.As { 58 width := t.Size() 59 if t.IsFloat() { 60 switch width { 61 case 4: 62 return x86.AMOVSS 63 case 8: 64 return x86.AMOVSD 65 } 66 } else { 67 switch width { 68 case 1: 69 return x86.AMOVB 70 case 2: 71 return x86.AMOVW 72 case 4: 73 return x86.AMOVL 74 case 8: 75 return x86.AMOVQ 76 } 77 } 78 panic("bad store type") 79 } 80 81 // moveByType returns the reg->reg move instruction of the given type. 82 func moveByType(t *types.Type) obj.As { 83 if t.IsFloat() { 84 // Moving the whole sse2 register is faster 85 // than moving just the correct low portion of it. 86 // There is no xmm->xmm move with 1 byte opcode, 87 // so use movups, which has 2 byte opcode. 88 return x86.AMOVUPS 89 } else { 90 switch t.Size() { 91 case 1: 92 // Avoids partial register write 93 return x86.AMOVL 94 case 2: 95 return x86.AMOVL 96 case 4: 97 return x86.AMOVL 98 case 8: 99 return x86.AMOVQ 100 case 16: 101 return x86.AMOVUPS // int128s are in SSE registers 102 default: 103 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 104 } 105 } 106 } 107 108 // opregreg emits instructions for 109 // dest := dest(To) op src(From) 110 // and also returns the created obj.Prog so it 111 // may be further adjusted (offset, scale, etc). 112 func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog { 113 p := s.Prog(op) 114 p.From.Type = obj.TYPE_REG 115 p.To.Type = obj.TYPE_REG 116 p.To.Reg = dest 117 p.From.Reg = src 118 return p 119 } 120 121 // memIdx fills out a as an indexed memory reference for v. 122 // It assumes that the base register and the index register 123 // are v.Args[0].Reg() and v.Args[1].Reg(), respectively. 124 // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary. 125 func memIdx(a *obj.Addr, v *ssa.Value) { 126 r, i := v.Args[0].Reg(), v.Args[1].Reg() 127 a.Type = obj.TYPE_MEM 128 a.Scale = v.Op.Scale() 129 if a.Scale == 1 && i == x86.REG_SP { 130 r, i = i, r 131 } 132 a.Reg = r 133 a.Index = i 134 } 135 136 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 137 // See runtime/mkduff.go. 138 func duffStart(size int64) int64 { 139 x, _ := duff(size) 140 return x 141 } 142 func duffAdj(size int64) int64 { 143 _, x := duff(size) 144 return x 145 } 146 147 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 148 // required to use the duffzero mechanism for a block of the given size. 149 func duff(size int64) (int64, int64) { 150 if size < 32 || size > 1024 || size%dzClearStep != 0 { 151 panic("bad duffzero size") 152 } 153 steps := size / dzClearStep 154 blocks := steps / dzBlockLen 155 steps %= dzBlockLen 156 off := dzBlockSize * (dzBlocks - blocks) 157 var adj int64 158 if steps != 0 { 159 off -= dzLeaqSize 160 off -= dzMovSize * steps 161 adj -= dzClearStep * (dzBlockLen - steps) 162 } 163 return off, adj 164 } 165 166 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 167 switch v.Op { 168 case ssa.OpAMD64VFMADD231SD: 169 p := s.Prog(v.Op.Asm()) 170 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()} 171 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 172 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}) 173 if v.Reg() != v.Args[0].Reg() { 174 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 175 } 176 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 177 r := v.Reg() 178 r1 := v.Args[0].Reg() 179 r2 := v.Args[1].Reg() 180 switch { 181 case r == r1: 182 p := s.Prog(v.Op.Asm()) 183 p.From.Type = obj.TYPE_REG 184 p.From.Reg = r2 185 p.To.Type = obj.TYPE_REG 186 p.To.Reg = r 187 case r == r2: 188 p := s.Prog(v.Op.Asm()) 189 p.From.Type = obj.TYPE_REG 190 p.From.Reg = r1 191 p.To.Type = obj.TYPE_REG 192 p.To.Reg = r 193 default: 194 var asm obj.As 195 if v.Op == ssa.OpAMD64ADDQ { 196 asm = x86.ALEAQ 197 } else { 198 asm = x86.ALEAL 199 } 200 p := s.Prog(asm) 201 p.From.Type = obj.TYPE_MEM 202 p.From.Reg = r1 203 p.From.Scale = 1 204 p.From.Index = r2 205 p.To.Type = obj.TYPE_REG 206 p.To.Reg = r 207 } 208 // 2-address opcode arithmetic 209 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 210 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 211 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 212 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 213 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 214 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 215 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 216 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 217 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, 218 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, 219 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 220 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 221 ssa.OpAMD64PXOR, 222 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ, 223 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ, 224 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ: 225 r := v.Reg() 226 if r != v.Args[0].Reg() { 227 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 228 } 229 opregreg(s, v.Op.Asm(), r, v.Args[1].Reg()) 230 231 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 232 // Arg[0] (the dividend) is in AX. 233 // Arg[1] (the divisor) can be in any other register. 234 // Result[0] (the quotient) is in AX. 235 // Result[1] (the remainder) is in DX. 236 r := v.Args[1].Reg() 237 238 // Zero extend dividend. 239 c := s.Prog(x86.AXORL) 240 c.From.Type = obj.TYPE_REG 241 c.From.Reg = x86.REG_DX 242 c.To.Type = obj.TYPE_REG 243 c.To.Reg = x86.REG_DX 244 245 // Issue divide. 246 p := s.Prog(v.Op.Asm()) 247 p.From.Type = obj.TYPE_REG 248 p.From.Reg = r 249 250 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 251 // Arg[0] (the dividend) is in AX. 252 // Arg[1] (the divisor) can be in any other register. 253 // Result[0] (the quotient) is in AX. 254 // Result[1] (the remainder) is in DX. 255 r := v.Args[1].Reg() 256 var j1 *obj.Prog 257 258 // CPU faults upon signed overflow, which occurs when the most 259 // negative int is divided by -1. Handle divide by -1 as a special case. 260 if ssa.NeedsFixUp(v) { 261 var c *obj.Prog 262 switch v.Op { 263 case ssa.OpAMD64DIVQ: 264 c = s.Prog(x86.ACMPQ) 265 case ssa.OpAMD64DIVL: 266 c = s.Prog(x86.ACMPL) 267 case ssa.OpAMD64DIVW: 268 c = s.Prog(x86.ACMPW) 269 } 270 c.From.Type = obj.TYPE_REG 271 c.From.Reg = r 272 c.To.Type = obj.TYPE_CONST 273 c.To.Offset = -1 274 j1 = s.Prog(x86.AJEQ) 275 j1.To.Type = obj.TYPE_BRANCH 276 } 277 278 // Sign extend dividend. 279 switch v.Op { 280 case ssa.OpAMD64DIVQ: 281 s.Prog(x86.ACQO) 282 case ssa.OpAMD64DIVL: 283 s.Prog(x86.ACDQ) 284 case ssa.OpAMD64DIVW: 285 s.Prog(x86.ACWD) 286 } 287 288 // Issue divide. 289 p := s.Prog(v.Op.Asm()) 290 p.From.Type = obj.TYPE_REG 291 p.From.Reg = r 292 293 if j1 != nil { 294 // Skip over -1 fixup code. 295 j2 := s.Prog(obj.AJMP) 296 j2.To.Type = obj.TYPE_BRANCH 297 298 // Issue -1 fixup code. 299 // n / -1 = -n 300 var n1 *obj.Prog 301 switch v.Op { 302 case ssa.OpAMD64DIVQ: 303 n1 = s.Prog(x86.ANEGQ) 304 case ssa.OpAMD64DIVL: 305 n1 = s.Prog(x86.ANEGL) 306 case ssa.OpAMD64DIVW: 307 n1 = s.Prog(x86.ANEGW) 308 } 309 n1.To.Type = obj.TYPE_REG 310 n1.To.Reg = x86.REG_AX 311 312 // n % -1 == 0 313 n2 := s.Prog(x86.AXORL) 314 n2.From.Type = obj.TYPE_REG 315 n2.From.Reg = x86.REG_DX 316 n2.To.Type = obj.TYPE_REG 317 n2.To.Reg = x86.REG_DX 318 319 // TODO(khr): issue only the -1 fixup code we need. 320 // For instance, if only the quotient is used, no point in zeroing the remainder. 321 322 j1.To.Val = n1 323 j2.To.Val = s.Pc() 324 } 325 326 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 327 // the frontend rewrites constant division by 8/16/32 bit integers into 328 // HMUL by a constant 329 // SSA rewrites generate the 64 bit versions 330 331 // Arg[0] is already in AX as it's the only register we allow 332 // and DX is the only output we care about (the high bits) 333 p := s.Prog(v.Op.Asm()) 334 p.From.Type = obj.TYPE_REG 335 p.From.Reg = v.Args[1].Reg() 336 337 // IMULB puts the high portion in AH instead of DL, 338 // so move it to DL for consistency 339 if v.Type.Size() == 1 { 340 m := s.Prog(x86.AMOVB) 341 m.From.Type = obj.TYPE_REG 342 m.From.Reg = x86.REG_AH 343 m.To.Type = obj.TYPE_REG 344 m.To.Reg = x86.REG_DX 345 } 346 347 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU: 348 // Arg[0] is already in AX as it's the only register we allow 349 // results lo in AX 350 p := s.Prog(v.Op.Asm()) 351 p.From.Type = obj.TYPE_REG 352 p.From.Reg = v.Args[1].Reg() 353 354 case ssa.OpAMD64MULQU2: 355 // Arg[0] is already in AX as it's the only register we allow 356 // results hi in DX, lo in AX 357 p := s.Prog(v.Op.Asm()) 358 p.From.Type = obj.TYPE_REG 359 p.From.Reg = v.Args[1].Reg() 360 361 case ssa.OpAMD64DIVQU2: 362 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 363 // results q in AX, r in DX 364 p := s.Prog(v.Op.Asm()) 365 p.From.Type = obj.TYPE_REG 366 p.From.Reg = v.Args[2].Reg() 367 368 case ssa.OpAMD64AVGQU: 369 // compute (x+y)/2 unsigned. 370 // Do a 64-bit add, the overflow goes into the carry. 371 // Shift right once and pull the carry back into the 63rd bit. 372 r := v.Reg() 373 if r != v.Args[0].Reg() { 374 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 375 } 376 p := s.Prog(x86.AADDQ) 377 p.From.Type = obj.TYPE_REG 378 p.To.Type = obj.TYPE_REG 379 p.To.Reg = r 380 p.From.Reg = v.Args[1].Reg() 381 p = s.Prog(x86.ARCRQ) 382 p.From.Type = obj.TYPE_CONST 383 p.From.Offset = 1 384 p.To.Type = obj.TYPE_REG 385 p.To.Reg = r 386 387 case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ: 388 r := v.Reg0() 389 r0 := v.Args[0].Reg() 390 r1 := v.Args[1].Reg() 391 switch r { 392 case r0: 393 p := s.Prog(v.Op.Asm()) 394 p.From.Type = obj.TYPE_REG 395 p.From.Reg = r1 396 p.To.Type = obj.TYPE_REG 397 p.To.Reg = r 398 case r1: 399 p := s.Prog(v.Op.Asm()) 400 p.From.Type = obj.TYPE_REG 401 p.From.Reg = r0 402 p.To.Type = obj.TYPE_REG 403 p.To.Reg = r 404 default: 405 v.Fatalf("output not in same register as an input %s", v.LongString()) 406 } 407 408 case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ: 409 p := s.Prog(v.Op.Asm()) 410 p.From.Type = obj.TYPE_REG 411 p.From.Reg = v.Args[1].Reg() 412 p.To.Type = obj.TYPE_REG 413 p.To.Reg = v.Reg0() 414 415 case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst: 416 p := s.Prog(v.Op.Asm()) 417 p.From.Type = obj.TYPE_CONST 418 p.From.Offset = v.AuxInt 419 p.To.Type = obj.TYPE_REG 420 p.To.Reg = v.Reg0() 421 422 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 423 r := v.Reg() 424 a := v.Args[0].Reg() 425 if r == a { 426 switch v.AuxInt { 427 case 1: 428 var asm obj.As 429 // Software optimization manual recommends add $1,reg. 430 // But inc/dec is 1 byte smaller. ICC always uses inc 431 // Clang/GCC choose depending on flags, but prefer add. 432 // Experiments show that inc/dec is both a little faster 433 // and make a binary a little smaller. 434 if v.Op == ssa.OpAMD64ADDQconst { 435 asm = x86.AINCQ 436 } else { 437 asm = x86.AINCL 438 } 439 p := s.Prog(asm) 440 p.To.Type = obj.TYPE_REG 441 p.To.Reg = r 442 return 443 case -1: 444 var asm obj.As 445 if v.Op == ssa.OpAMD64ADDQconst { 446 asm = x86.ADECQ 447 } else { 448 asm = x86.ADECL 449 } 450 p := s.Prog(asm) 451 p.To.Type = obj.TYPE_REG 452 p.To.Reg = r 453 return 454 case 0x80: 455 // 'SUBQ $-0x80, r' is shorter to encode than 456 // and functionally equivalent to 'ADDQ $0x80, r'. 457 asm := x86.ASUBL 458 if v.Op == ssa.OpAMD64ADDQconst { 459 asm = x86.ASUBQ 460 } 461 p := s.Prog(asm) 462 p.From.Type = obj.TYPE_CONST 463 p.From.Offset = -0x80 464 p.To.Type = obj.TYPE_REG 465 p.To.Reg = r 466 return 467 468 } 469 p := s.Prog(v.Op.Asm()) 470 p.From.Type = obj.TYPE_CONST 471 p.From.Offset = v.AuxInt 472 p.To.Type = obj.TYPE_REG 473 p.To.Reg = r 474 return 475 } 476 var asm obj.As 477 if v.Op == ssa.OpAMD64ADDQconst { 478 asm = x86.ALEAQ 479 } else { 480 asm = x86.ALEAL 481 } 482 p := s.Prog(asm) 483 p.From.Type = obj.TYPE_MEM 484 p.From.Reg = a 485 p.From.Offset = v.AuxInt 486 p.To.Type = obj.TYPE_REG 487 p.To.Reg = r 488 489 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ, 490 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT, 491 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE, 492 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT, 493 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE, 494 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE, 495 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI, 496 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS, 497 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC, 498 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS, 499 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF, 500 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF: 501 r := v.Reg() 502 if r != v.Args[0].Reg() { 503 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 504 } 505 p := s.Prog(v.Op.Asm()) 506 p.From.Type = obj.TYPE_REG 507 p.From.Reg = v.Args[1].Reg() 508 p.To.Type = obj.TYPE_REG 509 p.To.Reg = r 510 511 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF: 512 r := v.Reg() 513 if r != v.Args[0].Reg() { 514 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 515 } 516 // Flag condition: ^ZERO || PARITY 517 // Generate: 518 // CMOV*NE SRC,DST 519 // CMOV*PS SRC,DST 520 p := s.Prog(v.Op.Asm()) 521 p.From.Type = obj.TYPE_REG 522 p.From.Reg = v.Args[1].Reg() 523 p.To.Type = obj.TYPE_REG 524 p.To.Reg = r 525 var q *obj.Prog 526 if v.Op == ssa.OpAMD64CMOVQNEF { 527 q = s.Prog(x86.ACMOVQPS) 528 } else if v.Op == ssa.OpAMD64CMOVLNEF { 529 q = s.Prog(x86.ACMOVLPS) 530 } else { 531 q = s.Prog(x86.ACMOVWPS) 532 } 533 q.From.Type = obj.TYPE_REG 534 q.From.Reg = v.Args[1].Reg() 535 q.To.Type = obj.TYPE_REG 536 q.To.Reg = r 537 538 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF: 539 r := v.Reg() 540 if r != v.Args[0].Reg() { 541 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 542 } 543 544 // Flag condition: ZERO && !PARITY 545 // Generate: 546 // MOV SRC,AX 547 // CMOV*NE DST,AX 548 // CMOV*PC AX,DST 549 // 550 // TODO(rasky): we could generate: 551 // CMOV*NE DST,SRC 552 // CMOV*PC SRC,DST 553 // But this requires a way for regalloc to know that SRC might be 554 // clobbered by this instruction. 555 if v.Args[1].Reg() != x86.REG_AX { 556 opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg()) 557 } 558 p := s.Prog(v.Op.Asm()) 559 p.From.Type = obj.TYPE_REG 560 p.From.Reg = r 561 p.To.Type = obj.TYPE_REG 562 p.To.Reg = x86.REG_AX 563 var q *obj.Prog 564 if v.Op == ssa.OpAMD64CMOVQEQF { 565 q = s.Prog(x86.ACMOVQPC) 566 } else if v.Op == ssa.OpAMD64CMOVLEQF { 567 q = s.Prog(x86.ACMOVLPC) 568 } else { 569 q = s.Prog(x86.ACMOVWPC) 570 } 571 q.From.Type = obj.TYPE_REG 572 q.From.Reg = x86.REG_AX 573 q.To.Type = obj.TYPE_REG 574 q.To.Reg = r 575 576 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 577 r := v.Reg() 578 p := s.Prog(v.Op.Asm()) 579 p.From.Type = obj.TYPE_CONST 580 p.From.Offset = v.AuxInt 581 p.To.Type = obj.TYPE_REG 582 p.To.Reg = r 583 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) 584 585 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 586 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 587 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 588 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 589 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 590 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 591 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 592 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 593 r := v.Reg() 594 if r != v.Args[0].Reg() { 595 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 596 } 597 p := s.Prog(v.Op.Asm()) 598 p.From.Type = obj.TYPE_CONST 599 p.From.Offset = v.AuxInt 600 p.To.Type = obj.TYPE_REG 601 p.To.Reg = r 602 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 603 r := v.Reg() 604 p := s.Prog(v.Op.Asm()) 605 p.From.Type = obj.TYPE_REG 606 p.From.Reg = r 607 p.To.Type = obj.TYPE_REG 608 p.To.Reg = r 609 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8, 610 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8, 611 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 612 p := s.Prog(v.Op.Asm()) 613 memIdx(&p.From, v) 614 o := v.Reg() 615 p.To.Type = obj.TYPE_REG 616 p.To.Reg = o 617 if v.AuxInt != 0 && v.Aux == nil { 618 // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA. 619 switch v.Op { 620 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 621 p = s.Prog(x86.ALEAQ) 622 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8: 623 p = s.Prog(x86.ALEAL) 624 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 625 p = s.Prog(x86.ALEAW) 626 } 627 p.From.Type = obj.TYPE_MEM 628 p.From.Reg = o 629 p.To.Type = obj.TYPE_REG 630 p.To.Reg = o 631 } 632 gc.AddAux(&p.From, v) 633 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW: 634 p := s.Prog(v.Op.Asm()) 635 p.From.Type = obj.TYPE_MEM 636 p.From.Reg = v.Args[0].Reg() 637 gc.AddAux(&p.From, v) 638 p.To.Type = obj.TYPE_REG 639 p.To.Reg = v.Reg() 640 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 641 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 642 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 643 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 644 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 645 // Go assembler has swapped operands for UCOMISx relative to CMP, 646 // must account for that right here. 647 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 648 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 649 p := s.Prog(v.Op.Asm()) 650 p.From.Type = obj.TYPE_REG 651 p.From.Reg = v.Args[0].Reg() 652 p.To.Type = obj.TYPE_CONST 653 p.To.Offset = v.AuxInt 654 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, 655 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 656 ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst, 657 ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst, 658 ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst: 659 op := v.Op 660 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { 661 // Emit 32-bit version because it's shorter 662 op = ssa.OpAMD64BTLconst 663 } 664 p := s.Prog(op.Asm()) 665 p.From.Type = obj.TYPE_CONST 666 p.From.Offset = v.AuxInt 667 p.To.Type = obj.TYPE_REG 668 p.To.Reg = v.Args[0].Reg() 669 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload: 670 p := s.Prog(v.Op.Asm()) 671 p.From.Type = obj.TYPE_MEM 672 p.From.Reg = v.Args[0].Reg() 673 gc.AddAux(&p.From, v) 674 p.To.Type = obj.TYPE_REG 675 p.To.Reg = v.Args[1].Reg() 676 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload: 677 sc := v.AuxValAndOff() 678 p := s.Prog(v.Op.Asm()) 679 p.From.Type = obj.TYPE_MEM 680 p.From.Reg = v.Args[0].Reg() 681 gc.AddAux2(&p.From, v, sc.Off()) 682 p.To.Type = obj.TYPE_CONST 683 p.To.Offset = sc.Val() 684 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 685 x := v.Reg() 686 687 // If flags aren't live (indicated by v.Aux == nil), 688 // then we can rewrite MOV $0, AX into XOR AX, AX. 689 if v.AuxInt == 0 && v.Aux == nil { 690 p := s.Prog(x86.AXORL) 691 p.From.Type = obj.TYPE_REG 692 p.From.Reg = x 693 p.To.Type = obj.TYPE_REG 694 p.To.Reg = x 695 break 696 } 697 698 asm := v.Op.Asm() 699 // Use MOVL to move a small constant into a register 700 // when the constant is positive and fits into 32 bits. 701 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 702 // The upper 32bit are zeroed automatically when using MOVL. 703 asm = x86.AMOVL 704 } 705 p := s.Prog(asm) 706 p.From.Type = obj.TYPE_CONST 707 p.From.Offset = v.AuxInt 708 p.To.Type = obj.TYPE_REG 709 p.To.Reg = x 710 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 711 x := v.Reg() 712 p := s.Prog(v.Op.Asm()) 713 p.From.Type = obj.TYPE_FCONST 714 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 715 p.To.Type = obj.TYPE_REG 716 p.To.Reg = x 717 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 718 p := s.Prog(v.Op.Asm()) 719 p.From.Type = obj.TYPE_MEM 720 p.From.Reg = v.Args[0].Reg() 721 gc.AddAux(&p.From, v) 722 p.To.Type = obj.TYPE_REG 723 p.To.Reg = v.Reg() 724 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1, 725 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2: 726 p := s.Prog(v.Op.Asm()) 727 memIdx(&p.From, v) 728 gc.AddAux(&p.From, v) 729 p.To.Type = obj.TYPE_REG 730 p.To.Reg = v.Reg() 731 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, 732 ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify, 733 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, 734 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify: 735 p := s.Prog(v.Op.Asm()) 736 p.From.Type = obj.TYPE_REG 737 p.From.Reg = v.Args[1].Reg() 738 p.To.Type = obj.TYPE_MEM 739 p.To.Reg = v.Args[0].Reg() 740 gc.AddAux(&p.To, v) 741 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1, 742 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2: 743 p := s.Prog(v.Op.Asm()) 744 p.From.Type = obj.TYPE_REG 745 p.From.Reg = v.Args[2].Reg() 746 memIdx(&p.To, v) 747 gc.AddAux(&p.To, v) 748 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify: 749 sc := v.AuxValAndOff() 750 off := sc.Off() 751 val := sc.Val() 752 if val == 1 || val == -1 { 753 var asm obj.As 754 if v.Op == ssa.OpAMD64ADDQconstmodify { 755 if val == 1 { 756 asm = x86.AINCQ 757 } else { 758 asm = x86.ADECQ 759 } 760 } else { 761 if val == 1 { 762 asm = x86.AINCL 763 } else { 764 asm = x86.ADECL 765 } 766 } 767 p := s.Prog(asm) 768 p.To.Type = obj.TYPE_MEM 769 p.To.Reg = v.Args[0].Reg() 770 gc.AddAux2(&p.To, v, off) 771 break 772 } 773 fallthrough 774 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, 775 ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify, 776 ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: 777 sc := v.AuxValAndOff() 778 off := sc.Off() 779 val := sc.Val() 780 p := s.Prog(v.Op.Asm()) 781 p.From.Type = obj.TYPE_CONST 782 p.From.Offset = val 783 p.To.Type = obj.TYPE_MEM 784 p.To.Reg = v.Args[0].Reg() 785 gc.AddAux2(&p.To, v, off) 786 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 787 p := s.Prog(v.Op.Asm()) 788 p.From.Type = obj.TYPE_CONST 789 sc := v.AuxValAndOff() 790 p.From.Offset = sc.Val() 791 p.To.Type = obj.TYPE_MEM 792 p.To.Reg = v.Args[0].Reg() 793 gc.AddAux2(&p.To, v, sc.Off()) 794 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 795 p := s.Prog(v.Op.Asm()) 796 p.From.Type = obj.TYPE_CONST 797 sc := v.AuxValAndOff() 798 p.From.Offset = sc.Val() 799 memIdx(&p.To, v) 800 gc.AddAux2(&p.To, v, sc.Off()) 801 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 802 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 803 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 804 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 805 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 806 r := v.Reg() 807 // Break false dependency on destination register. 808 opregreg(s, x86.AXORPS, r, r) 809 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 810 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 811 var p *obj.Prog 812 switch v.Op { 813 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: 814 p = s.Prog(x86.AMOVQ) 815 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 816 p = s.Prog(x86.AMOVL) 817 } 818 p.From.Type = obj.TYPE_REG 819 p.From.Reg = v.Args[0].Reg() 820 p.To.Type = obj.TYPE_REG 821 p.To.Reg = v.Reg() 822 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload, 823 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload, 824 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload, 825 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload, 826 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload: 827 p := s.Prog(v.Op.Asm()) 828 p.From.Type = obj.TYPE_MEM 829 p.From.Reg = v.Args[1].Reg() 830 gc.AddAux(&p.From, v) 831 p.To.Type = obj.TYPE_REG 832 p.To.Reg = v.Reg() 833 if v.Reg() != v.Args[0].Reg() { 834 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 835 } 836 case ssa.OpAMD64DUFFZERO: 837 off := duffStart(v.AuxInt) 838 adj := duffAdj(v.AuxInt) 839 var p *obj.Prog 840 if adj != 0 { 841 p = s.Prog(x86.ALEAQ) 842 p.From.Type = obj.TYPE_MEM 843 p.From.Offset = adj 844 p.From.Reg = x86.REG_DI 845 p.To.Type = obj.TYPE_REG 846 p.To.Reg = x86.REG_DI 847 } 848 p = s.Prog(obj.ADUFFZERO) 849 p.To.Type = obj.TYPE_ADDR 850 p.To.Sym = gc.Duffzero 851 p.To.Offset = off 852 case ssa.OpAMD64MOVOconst: 853 if v.AuxInt != 0 { 854 v.Fatalf("MOVOconst can only do constant=0") 855 } 856 r := v.Reg() 857 opregreg(s, x86.AXORPS, r, r) 858 case ssa.OpAMD64DUFFCOPY: 859 p := s.Prog(obj.ADUFFCOPY) 860 p.To.Type = obj.TYPE_ADDR 861 p.To.Sym = gc.Duffcopy 862 p.To.Offset = v.AuxInt 863 864 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 865 if v.Type.IsMemory() { 866 return 867 } 868 x := v.Args[0].Reg() 869 y := v.Reg() 870 if x != y { 871 opregreg(s, moveByType(v.Type), y, x) 872 } 873 case ssa.OpLoadReg: 874 if v.Type.IsFlags() { 875 v.Fatalf("load flags not implemented: %v", v.LongString()) 876 return 877 } 878 p := s.Prog(loadByType(v.Type)) 879 gc.AddrAuto(&p.From, v.Args[0]) 880 p.To.Type = obj.TYPE_REG 881 p.To.Reg = v.Reg() 882 883 case ssa.OpStoreReg: 884 if v.Type.IsFlags() { 885 v.Fatalf("store flags not implemented: %v", v.LongString()) 886 return 887 } 888 p := s.Prog(storeByType(v.Type)) 889 p.From.Type = obj.TYPE_REG 890 p.From.Reg = v.Args[0].Reg() 891 gc.AddrAuto(&p.To, v) 892 case ssa.OpAMD64LoweredGetClosurePtr: 893 // Closure pointer is DX. 894 gc.CheckLoweredGetClosurePtr(v) 895 case ssa.OpAMD64LoweredGetG: 896 r := v.Reg() 897 // See the comments in cmd/internal/obj/x86/obj6.go 898 // near CanUse1InsnTLS for a detailed explanation of these instructions. 899 if x86.CanUse1InsnTLS(gc.Ctxt) { 900 // MOVQ (TLS), r 901 p := s.Prog(x86.AMOVQ) 902 p.From.Type = obj.TYPE_MEM 903 p.From.Reg = x86.REG_TLS 904 p.To.Type = obj.TYPE_REG 905 p.To.Reg = r 906 } else { 907 // MOVQ TLS, r 908 // MOVQ (r)(TLS*1), r 909 p := s.Prog(x86.AMOVQ) 910 p.From.Type = obj.TYPE_REG 911 p.From.Reg = x86.REG_TLS 912 p.To.Type = obj.TYPE_REG 913 p.To.Reg = r 914 q := s.Prog(x86.AMOVQ) 915 q.From.Type = obj.TYPE_MEM 916 q.From.Reg = r 917 q.From.Index = x86.REG_TLS 918 q.From.Scale = 1 919 q.To.Type = obj.TYPE_REG 920 q.To.Reg = r 921 } 922 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 923 s.Call(v) 924 925 case ssa.OpAMD64LoweredGetCallerPC: 926 p := s.Prog(x86.AMOVQ) 927 p.From.Type = obj.TYPE_MEM 928 p.From.Offset = -8 // PC is stored 8 bytes below first parameter. 929 p.From.Name = obj.NAME_PARAM 930 p.To.Type = obj.TYPE_REG 931 p.To.Reg = v.Reg() 932 933 case ssa.OpAMD64LoweredGetCallerSP: 934 // caller's SP is the address of the first arg 935 mov := x86.AMOVQ 936 if gc.Widthptr == 4 { 937 mov = x86.AMOVL 938 } 939 p := s.Prog(mov) 940 p.From.Type = obj.TYPE_ADDR 941 p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures 942 p.From.Name = obj.NAME_PARAM 943 p.To.Type = obj.TYPE_REG 944 p.To.Reg = v.Reg() 945 946 case ssa.OpAMD64LoweredWB: 947 p := s.Prog(obj.ACALL) 948 p.To.Type = obj.TYPE_MEM 949 p.To.Name = obj.NAME_EXTERN 950 p.To.Sym = v.Aux.(*obj.LSym) 951 952 case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC: 953 p := s.Prog(obj.ACALL) 954 p.To.Type = obj.TYPE_MEM 955 p.To.Name = obj.NAME_EXTERN 956 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt] 957 s.UseArgs(int64(2 * gc.Widthptr)) // space used in callee args area by assembly stubs 958 959 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 960 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 961 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 962 r := v.Reg() 963 if r != v.Args[0].Reg() { 964 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 965 } 966 p := s.Prog(v.Op.Asm()) 967 p.To.Type = obj.TYPE_REG 968 p.To.Reg = r 969 970 case ssa.OpAMD64NEGLflags: 971 r := v.Reg0() 972 if r != v.Args[0].Reg() { 973 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 974 } 975 p := s.Prog(v.Op.Asm()) 976 p.To.Type = obj.TYPE_REG 977 p.To.Reg = r 978 979 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD: 980 p := s.Prog(v.Op.Asm()) 981 p.From.Type = obj.TYPE_REG 982 p.From.Reg = v.Args[0].Reg() 983 p.To.Type = obj.TYPE_REG 984 switch v.Op { 985 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ: 986 p.To.Reg = v.Reg0() 987 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD: 988 p.To.Reg = v.Reg() 989 } 990 case ssa.OpAMD64ROUNDSD: 991 p := s.Prog(v.Op.Asm()) 992 val := v.AuxInt 993 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc 994 if val != 0 && val != 1 && val != 2 && val != 3 { 995 v.Fatalf("Invalid rounding mode") 996 } 997 p.From.Offset = val 998 p.From.Type = obj.TYPE_CONST 999 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) 1000 p.To.Type = obj.TYPE_REG 1001 p.To.Reg = v.Reg() 1002 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL: 1003 if v.Args[0].Reg() != v.Reg() { 1004 // POPCNT on Intel has a false dependency on the destination register. 1005 // Xor register with itself to break the dependency. 1006 p := s.Prog(x86.AXORQ) 1007 p.From.Type = obj.TYPE_REG 1008 p.From.Reg = v.Reg() 1009 p.To.Type = obj.TYPE_REG 1010 p.To.Reg = v.Reg() 1011 } 1012 p := s.Prog(v.Op.Asm()) 1013 p.From.Type = obj.TYPE_REG 1014 p.From.Reg = v.Args[0].Reg() 1015 p.To.Type = obj.TYPE_REG 1016 p.To.Reg = v.Reg() 1017 1018 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 1019 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 1020 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 1021 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 1022 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 1023 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 1024 ssa.OpAMD64SETA, ssa.OpAMD64SETAE, 1025 ssa.OpAMD64SETO: 1026 p := s.Prog(v.Op.Asm()) 1027 p.To.Type = obj.TYPE_REG 1028 p.To.Reg = v.Reg() 1029 1030 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore, 1031 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore, 1032 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore, 1033 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore, 1034 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore: 1035 p := s.Prog(v.Op.Asm()) 1036 p.To.Type = obj.TYPE_MEM 1037 p.To.Reg = v.Args[0].Reg() 1038 gc.AddAux(&p.To, v) 1039 1040 case ssa.OpAMD64SETNEF: 1041 p := s.Prog(v.Op.Asm()) 1042 p.To.Type = obj.TYPE_REG 1043 p.To.Reg = v.Reg() 1044 q := s.Prog(x86.ASETPS) 1045 q.To.Type = obj.TYPE_REG 1046 q.To.Reg = x86.REG_AX 1047 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 1048 opregreg(s, x86.AORL, v.Reg(), x86.REG_AX) 1049 1050 case ssa.OpAMD64SETEQF: 1051 p := s.Prog(v.Op.Asm()) 1052 p.To.Type = obj.TYPE_REG 1053 p.To.Reg = v.Reg() 1054 q := s.Prog(x86.ASETPC) 1055 q.To.Type = obj.TYPE_REG 1056 q.To.Reg = x86.REG_AX 1057 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 1058 opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX) 1059 1060 case ssa.OpAMD64InvertFlags: 1061 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1062 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 1063 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1064 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 1065 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 1066 case ssa.OpAMD64REPSTOSQ: 1067 s.Prog(x86.AREP) 1068 s.Prog(x86.ASTOSQ) 1069 case ssa.OpAMD64REPMOVSQ: 1070 s.Prog(x86.AREP) 1071 s.Prog(x86.AMOVSQ) 1072 case ssa.OpAMD64LoweredNilCheck: 1073 // Issue a load which will fault if the input is nil. 1074 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 1075 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 1076 // but it doesn't have false dependency on AX. 1077 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 1078 // That trades clobbering flags for clobbering a register. 1079 p := s.Prog(x86.ATESTB) 1080 p.From.Type = obj.TYPE_REG 1081 p.From.Reg = x86.REG_AX 1082 p.To.Type = obj.TYPE_MEM 1083 p.To.Reg = v.Args[0].Reg() 1084 gc.AddAux(&p.To, v) 1085 if logopt.Enabled() { 1086 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 1087 } 1088 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1089 gc.Warnl(v.Pos, "generated nil check") 1090 } 1091 case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 1092 p := s.Prog(v.Op.Asm()) 1093 p.From.Type = obj.TYPE_MEM 1094 p.From.Reg = v.Args[0].Reg() 1095 gc.AddAux(&p.From, v) 1096 p.To.Type = obj.TYPE_REG 1097 p.To.Reg = v.Reg0() 1098 case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 1099 r := v.Reg0() 1100 if r != v.Args[0].Reg() { 1101 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 1102 } 1103 p := s.Prog(v.Op.Asm()) 1104 p.From.Type = obj.TYPE_REG 1105 p.From.Reg = r 1106 p.To.Type = obj.TYPE_MEM 1107 p.To.Reg = v.Args[1].Reg() 1108 gc.AddAux(&p.To, v) 1109 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 1110 r := v.Reg0() 1111 if r != v.Args[0].Reg() { 1112 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 1113 } 1114 s.Prog(x86.ALOCK) 1115 p := s.Prog(v.Op.Asm()) 1116 p.From.Type = obj.TYPE_REG 1117 p.From.Reg = r 1118 p.To.Type = obj.TYPE_MEM 1119 p.To.Reg = v.Args[1].Reg() 1120 gc.AddAux(&p.To, v) 1121 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 1122 if v.Args[1].Reg() != x86.REG_AX { 1123 v.Fatalf("input[1] not in AX %s", v.LongString()) 1124 } 1125 s.Prog(x86.ALOCK) 1126 p := s.Prog(v.Op.Asm()) 1127 p.From.Type = obj.TYPE_REG 1128 p.From.Reg = v.Args[2].Reg() 1129 p.To.Type = obj.TYPE_MEM 1130 p.To.Reg = v.Args[0].Reg() 1131 gc.AddAux(&p.To, v) 1132 p = s.Prog(x86.ASETEQ) 1133 p.To.Type = obj.TYPE_REG 1134 p.To.Reg = v.Reg0() 1135 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: 1136 s.Prog(x86.ALOCK) 1137 p := s.Prog(v.Op.Asm()) 1138 p.From.Type = obj.TYPE_REG 1139 p.From.Reg = v.Args[1].Reg() 1140 p.To.Type = obj.TYPE_MEM 1141 p.To.Reg = v.Args[0].Reg() 1142 gc.AddAux(&p.To, v) 1143 case ssa.OpClobber: 1144 p := s.Prog(x86.AMOVL) 1145 p.From.Type = obj.TYPE_CONST 1146 p.From.Offset = 0xdeaddead 1147 p.To.Type = obj.TYPE_MEM 1148 p.To.Reg = x86.REG_SP 1149 gc.AddAux(&p.To, v) 1150 p = s.Prog(x86.AMOVL) 1151 p.From.Type = obj.TYPE_CONST 1152 p.From.Offset = 0xdeaddead 1153 p.To.Type = obj.TYPE_MEM 1154 p.To.Reg = x86.REG_SP 1155 gc.AddAux(&p.To, v) 1156 p.To.Offset += 4 1157 default: 1158 v.Fatalf("genValue not implemented: %s", v.LongString()) 1159 } 1160 } 1161 1162 var blockJump = [...]struct { 1163 asm, invasm obj.As 1164 }{ 1165 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1166 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1167 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1168 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1169 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1170 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1171 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC}, 1172 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS}, 1173 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1174 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1175 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1176 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1177 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1178 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1179 } 1180 1181 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 1182 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1183 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1184 } 1185 var nefJumps = [2][2]gc.FloatingEQNEJump{ 1186 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1187 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1188 } 1189 1190 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1191 switch b.Kind { 1192 case ssa.BlockPlain: 1193 if b.Succs[0].Block() != next { 1194 p := s.Prog(obj.AJMP) 1195 p.To.Type = obj.TYPE_BRANCH 1196 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1197 } 1198 case ssa.BlockDefer: 1199 // defer returns in rax: 1200 // 0 if we should continue executing 1201 // 1 if we should jump to deferreturn call 1202 p := s.Prog(x86.ATESTL) 1203 p.From.Type = obj.TYPE_REG 1204 p.From.Reg = x86.REG_AX 1205 p.To.Type = obj.TYPE_REG 1206 p.To.Reg = x86.REG_AX 1207 p = s.Prog(x86.AJNE) 1208 p.To.Type = obj.TYPE_BRANCH 1209 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1210 if b.Succs[0].Block() != next { 1211 p := s.Prog(obj.AJMP) 1212 p.To.Type = obj.TYPE_BRANCH 1213 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1214 } 1215 case ssa.BlockExit: 1216 case ssa.BlockRet: 1217 s.Prog(obj.ARET) 1218 case ssa.BlockRetJmp: 1219 p := s.Prog(obj.ARET) 1220 p.To.Type = obj.TYPE_MEM 1221 p.To.Name = obj.NAME_EXTERN 1222 p.To.Sym = b.Aux.(*obj.LSym) 1223 1224 case ssa.BlockAMD64EQF: 1225 s.FPJump(b, next, &eqfJumps) 1226 1227 case ssa.BlockAMD64NEF: 1228 s.FPJump(b, next, &nefJumps) 1229 1230 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1231 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1232 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1233 ssa.BlockAMD64OS, ssa.BlockAMD64OC, 1234 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1235 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1236 jmp := blockJump[b.Kind] 1237 switch next { 1238 case b.Succs[0].Block(): 1239 s.Br(jmp.invasm, b.Succs[1].Block()) 1240 case b.Succs[1].Block(): 1241 s.Br(jmp.asm, b.Succs[0].Block()) 1242 default: 1243 if b.Likely != ssa.BranchUnlikely { 1244 s.Br(jmp.asm, b.Succs[0].Block()) 1245 s.Br(obj.AJMP, b.Succs[1].Block()) 1246 } else { 1247 s.Br(jmp.invasm, b.Succs[1].Block()) 1248 s.Br(obj.AJMP, b.Succs[0].Block()) 1249 } 1250 } 1251 1252 default: 1253 b.Fatalf("branch not implemented: %s", b.LongString()) 1254 } 1255 }