github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/compile/internal/types" 14 "cmd/internal/obj" 15 "cmd/internal/obj/x86" 16 ) 17 18 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 19 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 20 flive := b.FlagsLiveAtEnd 21 if b.Control != nil && b.Control.Type.IsFlags() { 22 flive = true 23 } 24 for i := len(b.Values) - 1; i >= 0; i-- { 25 v := b.Values[i] 26 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 27 // The "mark" is any non-nil Aux value. 28 v.Aux = v 29 } 30 if v.Type.IsFlags() { 31 flive = false 32 } 33 for _, a := range v.Args { 34 if a.Type.IsFlags() { 35 flive = true 36 } 37 } 38 } 39 } 40 41 // loadByType returns the load instruction of the given type. 42 func loadByType(t *types.Type) obj.As { 43 // Avoid partial register write 44 if !t.IsFloat() && t.Size() <= 2 { 45 if t.Size() == 1 { 46 return x86.AMOVBLZX 47 } else { 48 return x86.AMOVWLZX 49 } 50 } 51 // Otherwise, there's no difference between load and store opcodes. 52 return storeByType(t) 53 } 54 55 // storeByType returns the store instruction of the given type. 56 func storeByType(t *types.Type) obj.As { 57 width := t.Size() 58 if t.IsFloat() { 59 switch width { 60 case 4: 61 return x86.AMOVSS 62 case 8: 63 return x86.AMOVSD 64 } 65 } else { 66 switch width { 67 case 1: 68 return x86.AMOVB 69 case 2: 70 return x86.AMOVW 71 case 4: 72 return x86.AMOVL 73 case 8: 74 return x86.AMOVQ 75 } 76 } 77 panic("bad store type") 78 } 79 80 // moveByType returns the reg->reg move instruction of the given type. 81 func moveByType(t *types.Type) obj.As { 82 if t.IsFloat() { 83 // Moving the whole sse2 register is faster 84 // than moving just the correct low portion of it. 85 // There is no xmm->xmm move with 1 byte opcode, 86 // so use movups, which has 2 byte opcode. 87 return x86.AMOVUPS 88 } else { 89 switch t.Size() { 90 case 1: 91 // Avoids partial register write 92 return x86.AMOVL 93 case 2: 94 return x86.AMOVL 95 case 4: 96 return x86.AMOVL 97 case 8: 98 return x86.AMOVQ 99 case 16: 100 return x86.AMOVUPS // int128s are in SSE registers 101 default: 102 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 103 } 104 } 105 } 106 107 // opregreg emits instructions for 108 // dest := dest(To) op src(From) 109 // and also returns the created obj.Prog so it 110 // may be further adjusted (offset, scale, etc). 111 func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog { 112 p := s.Prog(op) 113 p.From.Type = obj.TYPE_REG 114 p.To.Type = obj.TYPE_REG 115 p.To.Reg = dest 116 p.From.Reg = src 117 return p 118 } 119 120 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 121 // See runtime/mkduff.go. 122 func duffStart(size int64) int64 { 123 x, _ := duff(size) 124 return x 125 } 126 func duffAdj(size int64) int64 { 127 _, x := duff(size) 128 return x 129 } 130 131 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 132 // required to use the duffzero mechanism for a block of the given size. 133 func duff(size int64) (int64, int64) { 134 if size < 32 || size > 1024 || size%dzClearStep != 0 { 135 panic("bad duffzero size") 136 } 137 steps := size / dzClearStep 138 blocks := steps / dzBlockLen 139 steps %= dzBlockLen 140 off := dzBlockSize * (dzBlocks - blocks) 141 var adj int64 142 if steps != 0 { 143 off -= dzLeaqSize 144 off -= dzMovSize * steps 145 adj -= dzClearStep * (dzBlockLen - steps) 146 } 147 return off, adj 148 } 149 150 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 151 switch v.Op { 152 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 153 r := v.Reg() 154 r1 := v.Args[0].Reg() 155 r2 := v.Args[1].Reg() 156 switch { 157 case r == r1: 158 p := s.Prog(v.Op.Asm()) 159 p.From.Type = obj.TYPE_REG 160 p.From.Reg = r2 161 p.To.Type = obj.TYPE_REG 162 p.To.Reg = r 163 case r == r2: 164 p := s.Prog(v.Op.Asm()) 165 p.From.Type = obj.TYPE_REG 166 p.From.Reg = r1 167 p.To.Type = obj.TYPE_REG 168 p.To.Reg = r 169 default: 170 var asm obj.As 171 if v.Op == ssa.OpAMD64ADDQ { 172 asm = x86.ALEAQ 173 } else { 174 asm = x86.ALEAL 175 } 176 p := s.Prog(asm) 177 p.From.Type = obj.TYPE_MEM 178 p.From.Reg = r1 179 p.From.Scale = 1 180 p.From.Index = r2 181 p.To.Type = obj.TYPE_REG 182 p.To.Reg = r 183 } 184 // 2-address opcode arithmetic 185 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 186 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 187 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 188 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 189 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 190 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 191 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 192 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 193 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, 194 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, 195 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 196 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 197 ssa.OpAMD64PXOR, 198 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ, 199 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ, 200 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ: 201 r := v.Reg() 202 if r != v.Args[0].Reg() { 203 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 204 } 205 opregreg(s, v.Op.Asm(), r, v.Args[1].Reg()) 206 207 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 208 // Arg[0] (the dividend) is in AX. 209 // Arg[1] (the divisor) can be in any other register. 210 // Result[0] (the quotient) is in AX. 211 // Result[1] (the remainder) is in DX. 212 r := v.Args[1].Reg() 213 214 // Zero extend dividend. 215 c := s.Prog(x86.AXORL) 216 c.From.Type = obj.TYPE_REG 217 c.From.Reg = x86.REG_DX 218 c.To.Type = obj.TYPE_REG 219 c.To.Reg = x86.REG_DX 220 221 // Issue divide. 222 p := s.Prog(v.Op.Asm()) 223 p.From.Type = obj.TYPE_REG 224 p.From.Reg = r 225 226 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 227 // Arg[0] (the dividend) is in AX. 228 // Arg[1] (the divisor) can be in any other register. 229 // Result[0] (the quotient) is in AX. 230 // Result[1] (the remainder) is in DX. 231 r := v.Args[1].Reg() 232 var j1 *obj.Prog 233 234 // CPU faults upon signed overflow, which occurs when the most 235 // negative int is divided by -1. Handle divide by -1 as a special case. 236 if ssa.NeedsFixUp(v) { 237 var c *obj.Prog 238 switch v.Op { 239 case ssa.OpAMD64DIVQ: 240 c = s.Prog(x86.ACMPQ) 241 case ssa.OpAMD64DIVL: 242 c = s.Prog(x86.ACMPL) 243 case ssa.OpAMD64DIVW: 244 c = s.Prog(x86.ACMPW) 245 } 246 c.From.Type = obj.TYPE_REG 247 c.From.Reg = r 248 c.To.Type = obj.TYPE_CONST 249 c.To.Offset = -1 250 j1 = s.Prog(x86.AJEQ) 251 j1.To.Type = obj.TYPE_BRANCH 252 } 253 254 // Sign extend dividend. 255 switch v.Op { 256 case ssa.OpAMD64DIVQ: 257 s.Prog(x86.ACQO) 258 case ssa.OpAMD64DIVL: 259 s.Prog(x86.ACDQ) 260 case ssa.OpAMD64DIVW: 261 s.Prog(x86.ACWD) 262 } 263 264 // Issue divide. 265 p := s.Prog(v.Op.Asm()) 266 p.From.Type = obj.TYPE_REG 267 p.From.Reg = r 268 269 if j1 != nil { 270 // Skip over -1 fixup code. 271 j2 := s.Prog(obj.AJMP) 272 j2.To.Type = obj.TYPE_BRANCH 273 274 // Issue -1 fixup code. 275 // n / -1 = -n 276 var n1 *obj.Prog 277 switch v.Op { 278 case ssa.OpAMD64DIVQ: 279 n1 = s.Prog(x86.ANEGQ) 280 case ssa.OpAMD64DIVL: 281 n1 = s.Prog(x86.ANEGL) 282 case ssa.OpAMD64DIVW: 283 n1 = s.Prog(x86.ANEGW) 284 } 285 n1.To.Type = obj.TYPE_REG 286 n1.To.Reg = x86.REG_AX 287 288 // n % -1 == 0 289 n2 := s.Prog(x86.AXORL) 290 n2.From.Type = obj.TYPE_REG 291 n2.From.Reg = x86.REG_DX 292 n2.To.Type = obj.TYPE_REG 293 n2.To.Reg = x86.REG_DX 294 295 // TODO(khr): issue only the -1 fixup code we need. 296 // For instance, if only the quotient is used, no point in zeroing the remainder. 297 298 j1.To.Val = n1 299 j2.To.Val = s.Pc() 300 } 301 302 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 303 // the frontend rewrites constant division by 8/16/32 bit integers into 304 // HMUL by a constant 305 // SSA rewrites generate the 64 bit versions 306 307 // Arg[0] is already in AX as it's the only register we allow 308 // and DX is the only output we care about (the high bits) 309 p := s.Prog(v.Op.Asm()) 310 p.From.Type = obj.TYPE_REG 311 p.From.Reg = v.Args[1].Reg() 312 313 // IMULB puts the high portion in AH instead of DL, 314 // so move it to DL for consistency 315 if v.Type.Size() == 1 { 316 m := s.Prog(x86.AMOVB) 317 m.From.Type = obj.TYPE_REG 318 m.From.Reg = x86.REG_AH 319 m.To.Type = obj.TYPE_REG 320 m.To.Reg = x86.REG_DX 321 } 322 323 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU: 324 // Arg[0] is already in AX as it's the only register we allow 325 // results lo in AX 326 p := s.Prog(v.Op.Asm()) 327 p.From.Type = obj.TYPE_REG 328 p.From.Reg = v.Args[1].Reg() 329 330 case ssa.OpAMD64MULQU2: 331 // Arg[0] is already in AX as it's the only register we allow 332 // results hi in DX, lo in AX 333 p := s.Prog(v.Op.Asm()) 334 p.From.Type = obj.TYPE_REG 335 p.From.Reg = v.Args[1].Reg() 336 337 case ssa.OpAMD64DIVQU2: 338 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 339 // results q in AX, r in DX 340 p := s.Prog(v.Op.Asm()) 341 p.From.Type = obj.TYPE_REG 342 p.From.Reg = v.Args[2].Reg() 343 344 case ssa.OpAMD64AVGQU: 345 // compute (x+y)/2 unsigned. 346 // Do a 64-bit add, the overflow goes into the carry. 347 // Shift right once and pull the carry back into the 63rd bit. 348 r := v.Reg() 349 if r != v.Args[0].Reg() { 350 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 351 } 352 p := s.Prog(x86.AADDQ) 353 p.From.Type = obj.TYPE_REG 354 p.To.Type = obj.TYPE_REG 355 p.To.Reg = r 356 p.From.Reg = v.Args[1].Reg() 357 p = s.Prog(x86.ARCRQ) 358 p.From.Type = obj.TYPE_CONST 359 p.From.Offset = 1 360 p.To.Type = obj.TYPE_REG 361 p.To.Reg = r 362 363 case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ: 364 r := v.Reg0() 365 r0 := v.Args[0].Reg() 366 r1 := v.Args[1].Reg() 367 switch r { 368 case r0: 369 p := s.Prog(v.Op.Asm()) 370 p.From.Type = obj.TYPE_REG 371 p.From.Reg = r1 372 p.To.Type = obj.TYPE_REG 373 p.To.Reg = r 374 case r1: 375 p := s.Prog(v.Op.Asm()) 376 p.From.Type = obj.TYPE_REG 377 p.From.Reg = r0 378 p.To.Type = obj.TYPE_REG 379 p.To.Reg = r 380 default: 381 v.Fatalf("output not in same register as an input %s", v.LongString()) 382 } 383 384 case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ: 385 p := s.Prog(v.Op.Asm()) 386 p.From.Type = obj.TYPE_REG 387 p.From.Reg = v.Args[1].Reg() 388 p.To.Type = obj.TYPE_REG 389 p.To.Reg = v.Reg0() 390 391 case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst: 392 p := s.Prog(v.Op.Asm()) 393 p.From.Type = obj.TYPE_CONST 394 p.From.Offset = v.AuxInt 395 p.To.Type = obj.TYPE_REG 396 p.To.Reg = v.Reg0() 397 398 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 399 r := v.Reg() 400 a := v.Args[0].Reg() 401 if r == a { 402 if v.AuxInt == 1 { 403 var asm obj.As 404 // Software optimization manual recommends add $1,reg. 405 // But inc/dec is 1 byte smaller. ICC always uses inc 406 // Clang/GCC choose depending on flags, but prefer add. 407 // Experiments show that inc/dec is both a little faster 408 // and make a binary a little smaller. 409 if v.Op == ssa.OpAMD64ADDQconst { 410 asm = x86.AINCQ 411 } else { 412 asm = x86.AINCL 413 } 414 p := s.Prog(asm) 415 p.To.Type = obj.TYPE_REG 416 p.To.Reg = r 417 return 418 } 419 if v.AuxInt == -1 { 420 var asm obj.As 421 if v.Op == ssa.OpAMD64ADDQconst { 422 asm = x86.ADECQ 423 } else { 424 asm = x86.ADECL 425 } 426 p := s.Prog(asm) 427 p.To.Type = obj.TYPE_REG 428 p.To.Reg = r 429 return 430 } 431 p := s.Prog(v.Op.Asm()) 432 p.From.Type = obj.TYPE_CONST 433 p.From.Offset = v.AuxInt 434 p.To.Type = obj.TYPE_REG 435 p.To.Reg = r 436 return 437 } 438 var asm obj.As 439 if v.Op == ssa.OpAMD64ADDQconst { 440 asm = x86.ALEAQ 441 } else { 442 asm = x86.ALEAL 443 } 444 p := s.Prog(asm) 445 p.From.Type = obj.TYPE_MEM 446 p.From.Reg = a 447 p.From.Offset = v.AuxInt 448 p.To.Type = obj.TYPE_REG 449 p.To.Reg = r 450 451 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ, 452 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT, 453 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE, 454 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT, 455 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE, 456 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE, 457 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI, 458 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS, 459 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC, 460 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS, 461 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF, 462 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF: 463 r := v.Reg() 464 if r != v.Args[0].Reg() { 465 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 466 } 467 p := s.Prog(v.Op.Asm()) 468 p.From.Type = obj.TYPE_REG 469 p.From.Reg = v.Args[1].Reg() 470 p.To.Type = obj.TYPE_REG 471 p.To.Reg = r 472 473 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF: 474 r := v.Reg() 475 if r != v.Args[0].Reg() { 476 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 477 } 478 // Flag condition: ^ZERO || PARITY 479 // Generate: 480 // CMOV*NE SRC,DST 481 // CMOV*PS SRC,DST 482 p := s.Prog(v.Op.Asm()) 483 p.From.Type = obj.TYPE_REG 484 p.From.Reg = v.Args[1].Reg() 485 p.To.Type = obj.TYPE_REG 486 p.To.Reg = r 487 var q *obj.Prog 488 if v.Op == ssa.OpAMD64CMOVQNEF { 489 q = s.Prog(x86.ACMOVQPS) 490 } else if v.Op == ssa.OpAMD64CMOVLNEF { 491 q = s.Prog(x86.ACMOVLPS) 492 } else { 493 q = s.Prog(x86.ACMOVWPS) 494 } 495 q.From.Type = obj.TYPE_REG 496 q.From.Reg = v.Args[1].Reg() 497 q.To.Type = obj.TYPE_REG 498 q.To.Reg = r 499 500 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF: 501 r := v.Reg() 502 if r != v.Args[0].Reg() { 503 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 504 } 505 506 // Flag condition: ZERO && !PARITY 507 // Generate: 508 // MOV SRC,AX 509 // CMOV*NE DST,AX 510 // CMOV*PC AX,DST 511 // 512 // TODO(rasky): we could generate: 513 // CMOV*NE DST,SRC 514 // CMOV*PC SRC,DST 515 // But this requires a way for regalloc to know that SRC might be 516 // clobbered by this instruction. 517 if v.Args[1].Reg() != x86.REG_AX { 518 opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg()) 519 } 520 p := s.Prog(v.Op.Asm()) 521 p.From.Type = obj.TYPE_REG 522 p.From.Reg = r 523 p.To.Type = obj.TYPE_REG 524 p.To.Reg = x86.REG_AX 525 var q *obj.Prog 526 if v.Op == ssa.OpAMD64CMOVQEQF { 527 q = s.Prog(x86.ACMOVQPC) 528 } else if v.Op == ssa.OpAMD64CMOVLEQF { 529 q = s.Prog(x86.ACMOVLPC) 530 } else { 531 q = s.Prog(x86.ACMOVWPC) 532 } 533 q.From.Type = obj.TYPE_REG 534 q.From.Reg = x86.REG_AX 535 q.To.Type = obj.TYPE_REG 536 q.To.Reg = r 537 538 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 539 r := v.Reg() 540 p := s.Prog(v.Op.Asm()) 541 p.From.Type = obj.TYPE_CONST 542 p.From.Offset = v.AuxInt 543 p.To.Type = obj.TYPE_REG 544 p.To.Reg = r 545 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) 546 547 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 548 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 549 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 550 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 551 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 552 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 553 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 554 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 555 r := v.Reg() 556 if r != v.Args[0].Reg() { 557 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 558 } 559 p := s.Prog(v.Op.Asm()) 560 p.From.Type = obj.TYPE_CONST 561 p.From.Offset = v.AuxInt 562 p.To.Type = obj.TYPE_REG 563 p.To.Reg = r 564 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 565 r := v.Reg() 566 p := s.Prog(v.Op.Asm()) 567 p.From.Type = obj.TYPE_REG 568 p.From.Reg = r 569 p.To.Type = obj.TYPE_REG 570 p.To.Reg = r 571 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8, 572 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8, 573 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 574 o := v.Reg() 575 r := v.Args[0].Reg() 576 i := v.Args[1].Reg() 577 p := s.Prog(v.Op.Asm()) 578 switch v.Op { 579 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAL1, ssa.OpAMD64LEAW1: 580 p.From.Scale = 1 581 if i == x86.REG_SP { 582 r, i = i, r 583 } 584 case ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAW2: 585 p.From.Scale = 2 586 case ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAW4: 587 p.From.Scale = 4 588 case ssa.OpAMD64LEAQ8, ssa.OpAMD64LEAL8, ssa.OpAMD64LEAW8: 589 p.From.Scale = 8 590 } 591 p.From.Type = obj.TYPE_MEM 592 p.From.Reg = r 593 p.From.Index = i 594 p.To.Type = obj.TYPE_REG 595 p.To.Reg = o 596 if v.AuxInt != 0 && v.Aux == nil { 597 // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA. 598 switch v.Op { 599 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 600 p = s.Prog(x86.ALEAQ) 601 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8: 602 p = s.Prog(x86.ALEAL) 603 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 604 p = s.Prog(x86.ALEAW) 605 } 606 p.From.Type = obj.TYPE_MEM 607 p.From.Reg = o 608 p.To.Type = obj.TYPE_REG 609 p.To.Reg = o 610 } 611 gc.AddAux(&p.From, v) 612 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW: 613 p := s.Prog(v.Op.Asm()) 614 p.From.Type = obj.TYPE_MEM 615 p.From.Reg = v.Args[0].Reg() 616 gc.AddAux(&p.From, v) 617 p.To.Type = obj.TYPE_REG 618 p.To.Reg = v.Reg() 619 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 620 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 621 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 622 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 623 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 624 // Go assembler has swapped operands for UCOMISx relative to CMP, 625 // must account for that right here. 626 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 627 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 628 p := s.Prog(v.Op.Asm()) 629 p.From.Type = obj.TYPE_REG 630 p.From.Reg = v.Args[0].Reg() 631 p.To.Type = obj.TYPE_CONST 632 p.To.Offset = v.AuxInt 633 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, 634 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 635 ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst, 636 ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst, 637 ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst: 638 op := v.Op 639 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { 640 // Emit 32-bit version because it's shorter 641 op = ssa.OpAMD64BTLconst 642 } 643 p := s.Prog(op.Asm()) 644 p.From.Type = obj.TYPE_CONST 645 p.From.Offset = v.AuxInt 646 p.To.Type = obj.TYPE_REG 647 p.To.Reg = v.Args[0].Reg() 648 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload: 649 p := s.Prog(v.Op.Asm()) 650 p.From.Type = obj.TYPE_MEM 651 p.From.Reg = v.Args[0].Reg() 652 gc.AddAux(&p.From, v) 653 p.To.Type = obj.TYPE_REG 654 p.To.Reg = v.Args[1].Reg() 655 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload: 656 sc := v.AuxValAndOff() 657 p := s.Prog(v.Op.Asm()) 658 p.From.Type = obj.TYPE_MEM 659 p.From.Reg = v.Args[0].Reg() 660 gc.AddAux2(&p.From, v, sc.Off()) 661 p.To.Type = obj.TYPE_CONST 662 p.To.Offset = sc.Val() 663 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 664 x := v.Reg() 665 666 // If flags aren't live (indicated by v.Aux == nil), 667 // then we can rewrite MOV $0, AX into XOR AX, AX. 668 if v.AuxInt == 0 && v.Aux == nil { 669 p := s.Prog(x86.AXORL) 670 p.From.Type = obj.TYPE_REG 671 p.From.Reg = x 672 p.To.Type = obj.TYPE_REG 673 p.To.Reg = x 674 break 675 } 676 677 asm := v.Op.Asm() 678 // Use MOVL to move a small constant into a register 679 // when the constant is positive and fits into 32 bits. 680 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 681 // The upper 32bit are zeroed automatically when using MOVL. 682 asm = x86.AMOVL 683 } 684 p := s.Prog(asm) 685 p.From.Type = obj.TYPE_CONST 686 p.From.Offset = v.AuxInt 687 p.To.Type = obj.TYPE_REG 688 p.To.Reg = x 689 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 690 x := v.Reg() 691 p := s.Prog(v.Op.Asm()) 692 p.From.Type = obj.TYPE_FCONST 693 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 694 p.To.Type = obj.TYPE_REG 695 p.To.Reg = x 696 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 697 p := s.Prog(v.Op.Asm()) 698 p.From.Type = obj.TYPE_MEM 699 p.From.Reg = v.Args[0].Reg() 700 gc.AddAux(&p.From, v) 701 p.To.Type = obj.TYPE_REG 702 p.To.Reg = v.Reg() 703 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1, 704 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2: 705 r := v.Args[0].Reg() 706 i := v.Args[1].Reg() 707 p := s.Prog(v.Op.Asm()) 708 p.From.Type = obj.TYPE_MEM 709 switch v.Op { 710 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 711 if i == x86.REG_SP { 712 r, i = i, r 713 } 714 p.From.Scale = 1 715 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8: 716 p.From.Scale = 8 717 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 718 p.From.Scale = 4 719 case ssa.OpAMD64MOVWloadidx2: 720 p.From.Scale = 2 721 } 722 p.From.Reg = r 723 p.From.Index = i 724 gc.AddAux(&p.From, v) 725 p.To.Type = obj.TYPE_REG 726 p.To.Reg = v.Reg() 727 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, 728 ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify, 729 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, 730 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify: 731 p := s.Prog(v.Op.Asm()) 732 p.From.Type = obj.TYPE_REG 733 p.From.Reg = v.Args[1].Reg() 734 p.To.Type = obj.TYPE_MEM 735 p.To.Reg = v.Args[0].Reg() 736 gc.AddAux(&p.To, v) 737 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1, 738 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2: 739 r := v.Args[0].Reg() 740 i := v.Args[1].Reg() 741 p := s.Prog(v.Op.Asm()) 742 p.From.Type = obj.TYPE_REG 743 p.From.Reg = v.Args[2].Reg() 744 p.To.Type = obj.TYPE_MEM 745 switch v.Op { 746 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 747 if i == x86.REG_SP { 748 r, i = i, r 749 } 750 p.To.Scale = 1 751 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8: 752 p.To.Scale = 8 753 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 754 p.To.Scale = 4 755 case ssa.OpAMD64MOVWstoreidx2: 756 p.To.Scale = 2 757 } 758 p.To.Reg = r 759 p.To.Index = i 760 gc.AddAux(&p.To, v) 761 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify: 762 sc := v.AuxValAndOff() 763 off := sc.Off() 764 val := sc.Val() 765 if val == 1 || val == -1 { 766 var asm obj.As 767 if v.Op == ssa.OpAMD64ADDQconstmodify { 768 if val == 1 { 769 asm = x86.AINCQ 770 } else { 771 asm = x86.ADECQ 772 } 773 } else { 774 if val == 1 { 775 asm = x86.AINCL 776 } else { 777 asm = x86.ADECL 778 } 779 } 780 p := s.Prog(asm) 781 p.To.Type = obj.TYPE_MEM 782 p.To.Reg = v.Args[0].Reg() 783 gc.AddAux2(&p.To, v, off) 784 break 785 } 786 fallthrough 787 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, 788 ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify, 789 ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: 790 sc := v.AuxValAndOff() 791 off := sc.Off() 792 val := sc.Val() 793 p := s.Prog(v.Op.Asm()) 794 p.From.Type = obj.TYPE_CONST 795 p.From.Offset = val 796 p.To.Type = obj.TYPE_MEM 797 p.To.Reg = v.Args[0].Reg() 798 gc.AddAux2(&p.To, v, off) 799 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 800 p := s.Prog(v.Op.Asm()) 801 p.From.Type = obj.TYPE_CONST 802 sc := v.AuxValAndOff() 803 p.From.Offset = sc.Val() 804 p.To.Type = obj.TYPE_MEM 805 p.To.Reg = v.Args[0].Reg() 806 gc.AddAux2(&p.To, v, sc.Off()) 807 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 808 p := s.Prog(v.Op.Asm()) 809 p.From.Type = obj.TYPE_CONST 810 sc := v.AuxValAndOff() 811 p.From.Offset = sc.Val() 812 r := v.Args[0].Reg() 813 i := v.Args[1].Reg() 814 switch v.Op { 815 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 816 p.To.Scale = 1 817 if i == x86.REG_SP { 818 r, i = i, r 819 } 820 case ssa.OpAMD64MOVWstoreconstidx2: 821 p.To.Scale = 2 822 case ssa.OpAMD64MOVLstoreconstidx4: 823 p.To.Scale = 4 824 case ssa.OpAMD64MOVQstoreconstidx8: 825 p.To.Scale = 8 826 } 827 p.To.Type = obj.TYPE_MEM 828 p.To.Reg = r 829 p.To.Index = i 830 gc.AddAux2(&p.To, v, sc.Off()) 831 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 832 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 833 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 834 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 835 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 836 r := v.Reg() 837 // Break false dependency on destination register. 838 opregreg(s, x86.AXORPS, r, r) 839 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 840 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 841 var p *obj.Prog 842 switch v.Op { 843 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: 844 p = s.Prog(x86.AMOVQ) 845 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 846 p = s.Prog(x86.AMOVL) 847 } 848 p.From.Type = obj.TYPE_REG 849 p.From.Reg = v.Args[0].Reg() 850 p.To.Type = obj.TYPE_REG 851 p.To.Reg = v.Reg() 852 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload, 853 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload, 854 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload, 855 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload, 856 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload: 857 p := s.Prog(v.Op.Asm()) 858 p.From.Type = obj.TYPE_MEM 859 p.From.Reg = v.Args[1].Reg() 860 gc.AddAux(&p.From, v) 861 p.To.Type = obj.TYPE_REG 862 p.To.Reg = v.Reg() 863 if v.Reg() != v.Args[0].Reg() { 864 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 865 } 866 case ssa.OpAMD64DUFFZERO: 867 off := duffStart(v.AuxInt) 868 adj := duffAdj(v.AuxInt) 869 var p *obj.Prog 870 if adj != 0 { 871 p = s.Prog(x86.ALEAQ) 872 p.From.Type = obj.TYPE_MEM 873 p.From.Offset = adj 874 p.From.Reg = x86.REG_DI 875 p.To.Type = obj.TYPE_REG 876 p.To.Reg = x86.REG_DI 877 } 878 p = s.Prog(obj.ADUFFZERO) 879 p.To.Type = obj.TYPE_ADDR 880 p.To.Sym = gc.Duffzero 881 p.To.Offset = off 882 case ssa.OpAMD64MOVOconst: 883 if v.AuxInt != 0 { 884 v.Fatalf("MOVOconst can only do constant=0") 885 } 886 r := v.Reg() 887 opregreg(s, x86.AXORPS, r, r) 888 case ssa.OpAMD64DUFFCOPY: 889 p := s.Prog(obj.ADUFFCOPY) 890 p.To.Type = obj.TYPE_ADDR 891 p.To.Sym = gc.Duffcopy 892 p.To.Offset = v.AuxInt 893 894 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 895 if v.Type.IsMemory() { 896 return 897 } 898 x := v.Args[0].Reg() 899 y := v.Reg() 900 if x != y { 901 opregreg(s, moveByType(v.Type), y, x) 902 } 903 case ssa.OpLoadReg: 904 if v.Type.IsFlags() { 905 v.Fatalf("load flags not implemented: %v", v.LongString()) 906 return 907 } 908 p := s.Prog(loadByType(v.Type)) 909 gc.AddrAuto(&p.From, v.Args[0]) 910 p.To.Type = obj.TYPE_REG 911 p.To.Reg = v.Reg() 912 913 case ssa.OpStoreReg: 914 if v.Type.IsFlags() { 915 v.Fatalf("store flags not implemented: %v", v.LongString()) 916 return 917 } 918 p := s.Prog(storeByType(v.Type)) 919 p.From.Type = obj.TYPE_REG 920 p.From.Reg = v.Args[0].Reg() 921 gc.AddrAuto(&p.To, v) 922 case ssa.OpAMD64LoweredGetClosurePtr: 923 // Closure pointer is DX. 924 gc.CheckLoweredGetClosurePtr(v) 925 case ssa.OpAMD64LoweredGetG: 926 r := v.Reg() 927 // See the comments in cmd/internal/obj/x86/obj6.go 928 // near CanUse1InsnTLS for a detailed explanation of these instructions. 929 if x86.CanUse1InsnTLS(gc.Ctxt) { 930 // MOVQ (TLS), r 931 p := s.Prog(x86.AMOVQ) 932 p.From.Type = obj.TYPE_MEM 933 p.From.Reg = x86.REG_TLS 934 p.To.Type = obj.TYPE_REG 935 p.To.Reg = r 936 } else { 937 // MOVQ TLS, r 938 // MOVQ (r)(TLS*1), r 939 p := s.Prog(x86.AMOVQ) 940 p.From.Type = obj.TYPE_REG 941 p.From.Reg = x86.REG_TLS 942 p.To.Type = obj.TYPE_REG 943 p.To.Reg = r 944 q := s.Prog(x86.AMOVQ) 945 q.From.Type = obj.TYPE_MEM 946 q.From.Reg = r 947 q.From.Index = x86.REG_TLS 948 q.From.Scale = 1 949 q.To.Type = obj.TYPE_REG 950 q.To.Reg = r 951 } 952 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 953 s.Call(v) 954 955 case ssa.OpAMD64LoweredGetCallerPC: 956 p := s.Prog(x86.AMOVQ) 957 p.From.Type = obj.TYPE_MEM 958 p.From.Offset = -8 // PC is stored 8 bytes below first parameter. 959 p.From.Name = obj.NAME_PARAM 960 p.To.Type = obj.TYPE_REG 961 p.To.Reg = v.Reg() 962 963 case ssa.OpAMD64LoweredGetCallerSP: 964 // caller's SP is the address of the first arg 965 mov := x86.AMOVQ 966 if gc.Widthptr == 4 { 967 mov = x86.AMOVL 968 } 969 p := s.Prog(mov) 970 p.From.Type = obj.TYPE_ADDR 971 p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures 972 p.From.Name = obj.NAME_PARAM 973 p.To.Type = obj.TYPE_REG 974 p.To.Reg = v.Reg() 975 976 case ssa.OpAMD64LoweredWB: 977 p := s.Prog(obj.ACALL) 978 p.To.Type = obj.TYPE_MEM 979 p.To.Name = obj.NAME_EXTERN 980 p.To.Sym = v.Aux.(*obj.LSym) 981 982 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 983 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 984 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 985 r := v.Reg() 986 if r != v.Args[0].Reg() { 987 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 988 } 989 p := s.Prog(v.Op.Asm()) 990 p.To.Type = obj.TYPE_REG 991 p.To.Reg = r 992 993 case ssa.OpAMD64NEGLflags: 994 r := v.Reg0() 995 if r != v.Args[0].Reg() { 996 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 997 } 998 p := s.Prog(v.Op.Asm()) 999 p.To.Type = obj.TYPE_REG 1000 p.To.Reg = r 1001 1002 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD: 1003 p := s.Prog(v.Op.Asm()) 1004 p.From.Type = obj.TYPE_REG 1005 p.From.Reg = v.Args[0].Reg() 1006 p.To.Type = obj.TYPE_REG 1007 switch v.Op { 1008 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ: 1009 p.To.Reg = v.Reg0() 1010 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD: 1011 p.To.Reg = v.Reg() 1012 } 1013 case ssa.OpAMD64ROUNDSD: 1014 p := s.Prog(v.Op.Asm()) 1015 val := v.AuxInt 1016 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc 1017 if val != 0 && val != 1 && val != 2 && val != 3 { 1018 v.Fatalf("Invalid rounding mode") 1019 } 1020 p.From.Offset = val 1021 p.From.Type = obj.TYPE_CONST 1022 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) 1023 p.To.Type = obj.TYPE_REG 1024 p.To.Reg = v.Reg() 1025 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL: 1026 if v.Args[0].Reg() != v.Reg() { 1027 // POPCNT on Intel has a false dependency on the destination register. 1028 // Xor register with itself to break the dependency. 1029 p := s.Prog(x86.AXORQ) 1030 p.From.Type = obj.TYPE_REG 1031 p.From.Reg = v.Reg() 1032 p.To.Type = obj.TYPE_REG 1033 p.To.Reg = v.Reg() 1034 } 1035 p := s.Prog(v.Op.Asm()) 1036 p.From.Type = obj.TYPE_REG 1037 p.From.Reg = v.Args[0].Reg() 1038 p.To.Type = obj.TYPE_REG 1039 p.To.Reg = v.Reg() 1040 1041 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 1042 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 1043 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 1044 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 1045 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 1046 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 1047 ssa.OpAMD64SETA, ssa.OpAMD64SETAE, 1048 ssa.OpAMD64SETO: 1049 p := s.Prog(v.Op.Asm()) 1050 p.To.Type = obj.TYPE_REG 1051 p.To.Reg = v.Reg() 1052 1053 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore, 1054 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore, 1055 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore, 1056 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore, 1057 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore: 1058 p := s.Prog(v.Op.Asm()) 1059 p.To.Type = obj.TYPE_MEM 1060 p.To.Reg = v.Args[0].Reg() 1061 gc.AddAux(&p.To, v) 1062 1063 case ssa.OpAMD64SETNEF: 1064 p := s.Prog(v.Op.Asm()) 1065 p.To.Type = obj.TYPE_REG 1066 p.To.Reg = v.Reg() 1067 q := s.Prog(x86.ASETPS) 1068 q.To.Type = obj.TYPE_REG 1069 q.To.Reg = x86.REG_AX 1070 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 1071 opregreg(s, x86.AORL, v.Reg(), x86.REG_AX) 1072 1073 case ssa.OpAMD64SETEQF: 1074 p := s.Prog(v.Op.Asm()) 1075 p.To.Type = obj.TYPE_REG 1076 p.To.Reg = v.Reg() 1077 q := s.Prog(x86.ASETPC) 1078 q.To.Type = obj.TYPE_REG 1079 q.To.Reg = x86.REG_AX 1080 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 1081 opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX) 1082 1083 case ssa.OpAMD64InvertFlags: 1084 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1085 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 1086 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1087 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 1088 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 1089 case ssa.OpAMD64REPSTOSQ: 1090 s.Prog(x86.AREP) 1091 s.Prog(x86.ASTOSQ) 1092 case ssa.OpAMD64REPMOVSQ: 1093 s.Prog(x86.AREP) 1094 s.Prog(x86.AMOVSQ) 1095 case ssa.OpAMD64LoweredNilCheck: 1096 // Issue a load which will fault if the input is nil. 1097 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 1098 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 1099 // but it doesn't have false dependency on AX. 1100 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 1101 // That trades clobbering flags for clobbering a register. 1102 p := s.Prog(x86.ATESTB) 1103 p.From.Type = obj.TYPE_REG 1104 p.From.Reg = x86.REG_AX 1105 p.To.Type = obj.TYPE_MEM 1106 p.To.Reg = v.Args[0].Reg() 1107 gc.AddAux(&p.To, v) 1108 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1109 gc.Warnl(v.Pos, "generated nil check") 1110 } 1111 case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 1112 p := s.Prog(v.Op.Asm()) 1113 p.From.Type = obj.TYPE_MEM 1114 p.From.Reg = v.Args[0].Reg() 1115 gc.AddAux(&p.From, v) 1116 p.To.Type = obj.TYPE_REG 1117 p.To.Reg = v.Reg0() 1118 case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 1119 r := v.Reg0() 1120 if r != v.Args[0].Reg() { 1121 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 1122 } 1123 p := s.Prog(v.Op.Asm()) 1124 p.From.Type = obj.TYPE_REG 1125 p.From.Reg = r 1126 p.To.Type = obj.TYPE_MEM 1127 p.To.Reg = v.Args[1].Reg() 1128 gc.AddAux(&p.To, v) 1129 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 1130 r := v.Reg0() 1131 if r != v.Args[0].Reg() { 1132 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 1133 } 1134 s.Prog(x86.ALOCK) 1135 p := s.Prog(v.Op.Asm()) 1136 p.From.Type = obj.TYPE_REG 1137 p.From.Reg = r 1138 p.To.Type = obj.TYPE_MEM 1139 p.To.Reg = v.Args[1].Reg() 1140 gc.AddAux(&p.To, v) 1141 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 1142 if v.Args[1].Reg() != x86.REG_AX { 1143 v.Fatalf("input[1] not in AX %s", v.LongString()) 1144 } 1145 s.Prog(x86.ALOCK) 1146 p := s.Prog(v.Op.Asm()) 1147 p.From.Type = obj.TYPE_REG 1148 p.From.Reg = v.Args[2].Reg() 1149 p.To.Type = obj.TYPE_MEM 1150 p.To.Reg = v.Args[0].Reg() 1151 gc.AddAux(&p.To, v) 1152 p = s.Prog(x86.ASETEQ) 1153 p.To.Type = obj.TYPE_REG 1154 p.To.Reg = v.Reg0() 1155 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: 1156 s.Prog(x86.ALOCK) 1157 p := s.Prog(v.Op.Asm()) 1158 p.From.Type = obj.TYPE_REG 1159 p.From.Reg = v.Args[1].Reg() 1160 p.To.Type = obj.TYPE_MEM 1161 p.To.Reg = v.Args[0].Reg() 1162 gc.AddAux(&p.To, v) 1163 case ssa.OpClobber: 1164 p := s.Prog(x86.AMOVL) 1165 p.From.Type = obj.TYPE_CONST 1166 p.From.Offset = 0xdeaddead 1167 p.To.Type = obj.TYPE_MEM 1168 p.To.Reg = x86.REG_SP 1169 gc.AddAux(&p.To, v) 1170 p = s.Prog(x86.AMOVL) 1171 p.From.Type = obj.TYPE_CONST 1172 p.From.Offset = 0xdeaddead 1173 p.To.Type = obj.TYPE_MEM 1174 p.To.Reg = x86.REG_SP 1175 gc.AddAux(&p.To, v) 1176 p.To.Offset += 4 1177 default: 1178 v.Fatalf("genValue not implemented: %s", v.LongString()) 1179 } 1180 } 1181 1182 var blockJump = [...]struct { 1183 asm, invasm obj.As 1184 }{ 1185 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1186 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1187 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1188 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1189 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1190 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1191 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC}, 1192 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS}, 1193 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1194 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1195 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1196 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1197 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1198 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1199 } 1200 1201 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 1202 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1203 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1204 } 1205 var nefJumps = [2][2]gc.FloatingEQNEJump{ 1206 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1207 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1208 } 1209 1210 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1211 switch b.Kind { 1212 case ssa.BlockPlain: 1213 if b.Succs[0].Block() != next { 1214 p := s.Prog(obj.AJMP) 1215 p.To.Type = obj.TYPE_BRANCH 1216 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1217 } 1218 case ssa.BlockDefer: 1219 // defer returns in rax: 1220 // 0 if we should continue executing 1221 // 1 if we should jump to deferreturn call 1222 p := s.Prog(x86.ATESTL) 1223 p.From.Type = obj.TYPE_REG 1224 p.From.Reg = x86.REG_AX 1225 p.To.Type = obj.TYPE_REG 1226 p.To.Reg = x86.REG_AX 1227 p = s.Prog(x86.AJNE) 1228 p.To.Type = obj.TYPE_BRANCH 1229 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1230 if b.Succs[0].Block() != next { 1231 p := s.Prog(obj.AJMP) 1232 p.To.Type = obj.TYPE_BRANCH 1233 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1234 } 1235 case ssa.BlockExit: 1236 s.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1237 case ssa.BlockRet: 1238 s.Prog(obj.ARET) 1239 case ssa.BlockRetJmp: 1240 p := s.Prog(obj.ARET) 1241 p.To.Type = obj.TYPE_MEM 1242 p.To.Name = obj.NAME_EXTERN 1243 p.To.Sym = b.Aux.(*obj.LSym) 1244 1245 case ssa.BlockAMD64EQF: 1246 s.FPJump(b, next, &eqfJumps) 1247 1248 case ssa.BlockAMD64NEF: 1249 s.FPJump(b, next, &nefJumps) 1250 1251 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1252 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1253 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1254 ssa.BlockAMD64OS, ssa.BlockAMD64OC, 1255 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1256 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1257 jmp := blockJump[b.Kind] 1258 switch next { 1259 case b.Succs[0].Block(): 1260 s.Br(jmp.invasm, b.Succs[1].Block()) 1261 case b.Succs[1].Block(): 1262 s.Br(jmp.asm, b.Succs[0].Block()) 1263 default: 1264 if b.Likely != ssa.BranchUnlikely { 1265 s.Br(jmp.asm, b.Succs[0].Block()) 1266 s.Br(obj.AJMP, b.Succs[1].Block()) 1267 } else { 1268 s.Br(jmp.invasm, b.Succs[1].Block()) 1269 s.Br(obj.AJMP, b.Succs[0].Block()) 1270 } 1271 } 1272 1273 default: 1274 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1275 } 1276 }