github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/compile/internal/types" 14 "cmd/internal/obj" 15 "cmd/internal/obj/x86" 16 ) 17 18 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 19 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 20 flive := b.FlagsLiveAtEnd 21 if b.Control != nil && b.Control.Type.IsFlags() { 22 flive = true 23 } 24 for i := len(b.Values) - 1; i >= 0; i-- { 25 v := b.Values[i] 26 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 27 // The "mark" is any non-nil Aux value. 28 v.Aux = v 29 } 30 if v.Type.IsFlags() { 31 flive = false 32 } 33 for _, a := range v.Args { 34 if a.Type.IsFlags() { 35 flive = true 36 } 37 } 38 } 39 } 40 41 // loadByType returns the load instruction of the given type. 42 func loadByType(t *types.Type) obj.As { 43 // Avoid partial register write 44 if !t.IsFloat() && t.Size() <= 2 { 45 if t.Size() == 1 { 46 return x86.AMOVBLZX 47 } else { 48 return x86.AMOVWLZX 49 } 50 } 51 // Otherwise, there's no difference between load and store opcodes. 52 return storeByType(t) 53 } 54 55 // storeByType returns the store instruction of the given type. 56 func storeByType(t *types.Type) obj.As { 57 width := t.Size() 58 if t.IsFloat() { 59 switch width { 60 case 4: 61 return x86.AMOVSS 62 case 8: 63 return x86.AMOVSD 64 } 65 } else { 66 switch width { 67 case 1: 68 return x86.AMOVB 69 case 2: 70 return x86.AMOVW 71 case 4: 72 return x86.AMOVL 73 case 8: 74 return x86.AMOVQ 75 } 76 } 77 panic("bad store type") 78 } 79 80 // moveByType returns the reg->reg move instruction of the given type. 81 func moveByType(t *types.Type) obj.As { 82 if t.IsFloat() { 83 // Moving the whole sse2 register is faster 84 // than moving just the correct low portion of it. 85 // There is no xmm->xmm move with 1 byte opcode, 86 // so use movups, which has 2 byte opcode. 87 return x86.AMOVUPS 88 } else { 89 switch t.Size() { 90 case 1: 91 // Avoids partial register write 92 return x86.AMOVL 93 case 2: 94 return x86.AMOVL 95 case 4: 96 return x86.AMOVL 97 case 8: 98 return x86.AMOVQ 99 case 16: 100 return x86.AMOVUPS // int128s are in SSE registers 101 default: 102 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 103 } 104 } 105 } 106 107 // opregreg emits instructions for 108 // dest := dest(To) op src(From) 109 // and also returns the created obj.Prog so it 110 // may be further adjusted (offset, scale, etc). 111 func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog { 112 p := s.Prog(op) 113 p.From.Type = obj.TYPE_REG 114 p.To.Type = obj.TYPE_REG 115 p.To.Reg = dest 116 p.From.Reg = src 117 return p 118 } 119 120 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 121 // See runtime/mkduff.go. 122 func duffStart(size int64) int64 { 123 x, _ := duff(size) 124 return x 125 } 126 func duffAdj(size int64) int64 { 127 _, x := duff(size) 128 return x 129 } 130 131 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 132 // required to use the duffzero mechanism for a block of the given size. 133 func duff(size int64) (int64, int64) { 134 if size < 32 || size > 1024 || size%dzClearStep != 0 { 135 panic("bad duffzero size") 136 } 137 steps := size / dzClearStep 138 blocks := steps / dzBlockLen 139 steps %= dzBlockLen 140 off := dzBlockSize * (dzBlocks - blocks) 141 var adj int64 142 if steps != 0 { 143 off -= dzLeaqSize 144 off -= dzMovSize * steps 145 adj -= dzClearStep * (dzBlockLen - steps) 146 } 147 return off, adj 148 } 149 150 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 151 switch v.Op { 152 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 153 r := v.Reg() 154 r1 := v.Args[0].Reg() 155 r2 := v.Args[1].Reg() 156 switch { 157 case r == r1: 158 p := s.Prog(v.Op.Asm()) 159 p.From.Type = obj.TYPE_REG 160 p.From.Reg = r2 161 p.To.Type = obj.TYPE_REG 162 p.To.Reg = r 163 case r == r2: 164 p := s.Prog(v.Op.Asm()) 165 p.From.Type = obj.TYPE_REG 166 p.From.Reg = r1 167 p.To.Type = obj.TYPE_REG 168 p.To.Reg = r 169 default: 170 var asm obj.As 171 if v.Op == ssa.OpAMD64ADDQ { 172 asm = x86.ALEAQ 173 } else { 174 asm = x86.ALEAL 175 } 176 p := s.Prog(asm) 177 p.From.Type = obj.TYPE_MEM 178 p.From.Reg = r1 179 p.From.Scale = 1 180 p.From.Index = r2 181 p.To.Type = obj.TYPE_REG 182 p.To.Reg = r 183 } 184 // 2-address opcode arithmetic 185 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 186 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 187 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 188 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 189 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 190 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 191 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 192 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 193 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, 194 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, 195 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 196 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 197 ssa.OpAMD64PXOR, 198 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ, 199 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ, 200 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ: 201 r := v.Reg() 202 if r != v.Args[0].Reg() { 203 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 204 } 205 opregreg(s, v.Op.Asm(), r, v.Args[1].Reg()) 206 207 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 208 // Arg[0] (the dividend) is in AX. 209 // Arg[1] (the divisor) can be in any other register. 210 // Result[0] (the quotient) is in AX. 211 // Result[1] (the remainder) is in DX. 212 r := v.Args[1].Reg() 213 214 // Zero extend dividend. 215 c := s.Prog(x86.AXORL) 216 c.From.Type = obj.TYPE_REG 217 c.From.Reg = x86.REG_DX 218 c.To.Type = obj.TYPE_REG 219 c.To.Reg = x86.REG_DX 220 221 // Issue divide. 222 p := s.Prog(v.Op.Asm()) 223 p.From.Type = obj.TYPE_REG 224 p.From.Reg = r 225 226 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 227 // Arg[0] (the dividend) is in AX. 228 // Arg[1] (the divisor) can be in any other register. 229 // Result[0] (the quotient) is in AX. 230 // Result[1] (the remainder) is in DX. 231 r := v.Args[1].Reg() 232 var j1 *obj.Prog 233 234 // CPU faults upon signed overflow, which occurs when the most 235 // negative int is divided by -1. Handle divide by -1 as a special case. 236 if ssa.NeedsFixUp(v) { 237 var c *obj.Prog 238 switch v.Op { 239 case ssa.OpAMD64DIVQ: 240 c = s.Prog(x86.ACMPQ) 241 case ssa.OpAMD64DIVL: 242 c = s.Prog(x86.ACMPL) 243 case ssa.OpAMD64DIVW: 244 c = s.Prog(x86.ACMPW) 245 } 246 c.From.Type = obj.TYPE_REG 247 c.From.Reg = r 248 c.To.Type = obj.TYPE_CONST 249 c.To.Offset = -1 250 j1 = s.Prog(x86.AJEQ) 251 j1.To.Type = obj.TYPE_BRANCH 252 } 253 254 // Sign extend dividend. 255 switch v.Op { 256 case ssa.OpAMD64DIVQ: 257 s.Prog(x86.ACQO) 258 case ssa.OpAMD64DIVL: 259 s.Prog(x86.ACDQ) 260 case ssa.OpAMD64DIVW: 261 s.Prog(x86.ACWD) 262 } 263 264 // Issue divide. 265 p := s.Prog(v.Op.Asm()) 266 p.From.Type = obj.TYPE_REG 267 p.From.Reg = r 268 269 if j1 != nil { 270 // Skip over -1 fixup code. 271 j2 := s.Prog(obj.AJMP) 272 j2.To.Type = obj.TYPE_BRANCH 273 274 // Issue -1 fixup code. 275 // n / -1 = -n 276 var n1 *obj.Prog 277 switch v.Op { 278 case ssa.OpAMD64DIVQ: 279 n1 = s.Prog(x86.ANEGQ) 280 case ssa.OpAMD64DIVL: 281 n1 = s.Prog(x86.ANEGL) 282 case ssa.OpAMD64DIVW: 283 n1 = s.Prog(x86.ANEGW) 284 } 285 n1.To.Type = obj.TYPE_REG 286 n1.To.Reg = x86.REG_AX 287 288 // n % -1 == 0 289 n2 := s.Prog(x86.AXORL) 290 n2.From.Type = obj.TYPE_REG 291 n2.From.Reg = x86.REG_DX 292 n2.To.Type = obj.TYPE_REG 293 n2.To.Reg = x86.REG_DX 294 295 // TODO(khr): issue only the -1 fixup code we need. 296 // For instance, if only the quotient is used, no point in zeroing the remainder. 297 298 j1.To.Val = n1 299 j2.To.Val = s.Pc() 300 } 301 302 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 303 // the frontend rewrites constant division by 8/16/32 bit integers into 304 // HMUL by a constant 305 // SSA rewrites generate the 64 bit versions 306 307 // Arg[0] is already in AX as it's the only register we allow 308 // and DX is the only output we care about (the high bits) 309 p := s.Prog(v.Op.Asm()) 310 p.From.Type = obj.TYPE_REG 311 p.From.Reg = v.Args[1].Reg() 312 313 // IMULB puts the high portion in AH instead of DL, 314 // so move it to DL for consistency 315 if v.Type.Size() == 1 { 316 m := s.Prog(x86.AMOVB) 317 m.From.Type = obj.TYPE_REG 318 m.From.Reg = x86.REG_AH 319 m.To.Type = obj.TYPE_REG 320 m.To.Reg = x86.REG_DX 321 } 322 323 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU: 324 // Arg[0] is already in AX as it's the only register we allow 325 // results lo in AX 326 p := s.Prog(v.Op.Asm()) 327 p.From.Type = obj.TYPE_REG 328 p.From.Reg = v.Args[1].Reg() 329 330 case ssa.OpAMD64MULQU2: 331 // Arg[0] is already in AX as it's the only register we allow 332 // results hi in DX, lo in AX 333 p := s.Prog(v.Op.Asm()) 334 p.From.Type = obj.TYPE_REG 335 p.From.Reg = v.Args[1].Reg() 336 337 case ssa.OpAMD64DIVQU2: 338 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 339 // results q in AX, r in DX 340 p := s.Prog(v.Op.Asm()) 341 p.From.Type = obj.TYPE_REG 342 p.From.Reg = v.Args[2].Reg() 343 344 case ssa.OpAMD64AVGQU: 345 // compute (x+y)/2 unsigned. 346 // Do a 64-bit add, the overflow goes into the carry. 347 // Shift right once and pull the carry back into the 63rd bit. 348 r := v.Reg() 349 if r != v.Args[0].Reg() { 350 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 351 } 352 p := s.Prog(x86.AADDQ) 353 p.From.Type = obj.TYPE_REG 354 p.To.Type = obj.TYPE_REG 355 p.To.Reg = r 356 p.From.Reg = v.Args[1].Reg() 357 p = s.Prog(x86.ARCRQ) 358 p.From.Type = obj.TYPE_CONST 359 p.From.Offset = 1 360 p.To.Type = obj.TYPE_REG 361 p.To.Reg = r 362 363 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 364 r := v.Reg() 365 a := v.Args[0].Reg() 366 if r == a { 367 if v.AuxInt == 1 { 368 var asm obj.As 369 // Software optimization manual recommends add $1,reg. 370 // But inc/dec is 1 byte smaller. ICC always uses inc 371 // Clang/GCC choose depending on flags, but prefer add. 372 // Experiments show that inc/dec is both a little faster 373 // and make a binary a little smaller. 374 if v.Op == ssa.OpAMD64ADDQconst { 375 asm = x86.AINCQ 376 } else { 377 asm = x86.AINCL 378 } 379 p := s.Prog(asm) 380 p.To.Type = obj.TYPE_REG 381 p.To.Reg = r 382 return 383 } 384 if v.AuxInt == -1 { 385 var asm obj.As 386 if v.Op == ssa.OpAMD64ADDQconst { 387 asm = x86.ADECQ 388 } else { 389 asm = x86.ADECL 390 } 391 p := s.Prog(asm) 392 p.To.Type = obj.TYPE_REG 393 p.To.Reg = r 394 return 395 } 396 p := s.Prog(v.Op.Asm()) 397 p.From.Type = obj.TYPE_CONST 398 p.From.Offset = v.AuxInt 399 p.To.Type = obj.TYPE_REG 400 p.To.Reg = r 401 return 402 } 403 var asm obj.As 404 if v.Op == ssa.OpAMD64ADDQconst { 405 asm = x86.ALEAQ 406 } else { 407 asm = x86.ALEAL 408 } 409 p := s.Prog(asm) 410 p.From.Type = obj.TYPE_MEM 411 p.From.Reg = a 412 p.From.Offset = v.AuxInt 413 p.To.Type = obj.TYPE_REG 414 p.To.Reg = r 415 416 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ, 417 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT, 418 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE, 419 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT, 420 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE, 421 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE, 422 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI, 423 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS, 424 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC, 425 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS, 426 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF, 427 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF: 428 r := v.Reg() 429 if r != v.Args[0].Reg() { 430 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 431 } 432 p := s.Prog(v.Op.Asm()) 433 p.From.Type = obj.TYPE_REG 434 p.From.Reg = v.Args[1].Reg() 435 p.To.Type = obj.TYPE_REG 436 p.To.Reg = r 437 438 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF: 439 r := v.Reg() 440 if r != v.Args[0].Reg() { 441 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 442 } 443 // Flag condition: ^ZERO || PARITY 444 // Generate: 445 // CMOV*NE SRC,DST 446 // CMOV*PS SRC,DST 447 p := s.Prog(v.Op.Asm()) 448 p.From.Type = obj.TYPE_REG 449 p.From.Reg = v.Args[1].Reg() 450 p.To.Type = obj.TYPE_REG 451 p.To.Reg = r 452 var q *obj.Prog 453 if v.Op == ssa.OpAMD64CMOVQNEF { 454 q = s.Prog(x86.ACMOVQPS) 455 } else if v.Op == ssa.OpAMD64CMOVLNEF { 456 q = s.Prog(x86.ACMOVLPS) 457 } else { 458 q = s.Prog(x86.ACMOVWPS) 459 } 460 q.From.Type = obj.TYPE_REG 461 q.From.Reg = v.Args[1].Reg() 462 q.To.Type = obj.TYPE_REG 463 q.To.Reg = r 464 465 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF: 466 r := v.Reg() 467 if r != v.Args[0].Reg() { 468 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 469 } 470 471 // Flag condition: ZERO && !PARITY 472 // Generate: 473 // MOV SRC,AX 474 // CMOV*NE DST,AX 475 // CMOV*PC AX,DST 476 // 477 // TODO(rasky): we could generate: 478 // CMOV*NE DST,SRC 479 // CMOV*PC SRC,DST 480 // But this requires a way for regalloc to know that SRC might be 481 // clobbered by this instruction. 482 if v.Args[1].Reg() != x86.REG_AX { 483 opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg()) 484 } 485 p := s.Prog(v.Op.Asm()) 486 p.From.Type = obj.TYPE_REG 487 p.From.Reg = r 488 p.To.Type = obj.TYPE_REG 489 p.To.Reg = x86.REG_AX 490 var q *obj.Prog 491 if v.Op == ssa.OpAMD64CMOVQEQF { 492 q = s.Prog(x86.ACMOVQPC) 493 } else if v.Op == ssa.OpAMD64CMOVLEQF { 494 q = s.Prog(x86.ACMOVLPC) 495 } else { 496 q = s.Prog(x86.ACMOVWPC) 497 } 498 q.From.Type = obj.TYPE_REG 499 q.From.Reg = x86.REG_AX 500 q.To.Type = obj.TYPE_REG 501 q.To.Reg = r 502 503 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 504 r := v.Reg() 505 p := s.Prog(v.Op.Asm()) 506 p.From.Type = obj.TYPE_CONST 507 p.From.Offset = v.AuxInt 508 p.To.Type = obj.TYPE_REG 509 p.To.Reg = r 510 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) 511 512 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 513 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 514 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 515 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 516 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 517 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 518 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 519 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 520 r := v.Reg() 521 if r != v.Args[0].Reg() { 522 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 523 } 524 p := s.Prog(v.Op.Asm()) 525 p.From.Type = obj.TYPE_CONST 526 p.From.Offset = v.AuxInt 527 p.To.Type = obj.TYPE_REG 528 p.To.Reg = r 529 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 530 r := v.Reg() 531 p := s.Prog(v.Op.Asm()) 532 p.From.Type = obj.TYPE_REG 533 p.From.Reg = r 534 p.To.Type = obj.TYPE_REG 535 p.To.Reg = r 536 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8, 537 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8, 538 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 539 o := v.Reg() 540 r := v.Args[0].Reg() 541 i := v.Args[1].Reg() 542 p := s.Prog(v.Op.Asm()) 543 switch v.Op { 544 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAL1, ssa.OpAMD64LEAW1: 545 p.From.Scale = 1 546 if i == x86.REG_SP { 547 r, i = i, r 548 } 549 case ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAW2: 550 p.From.Scale = 2 551 case ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAW4: 552 p.From.Scale = 4 553 case ssa.OpAMD64LEAQ8, ssa.OpAMD64LEAL8, ssa.OpAMD64LEAW8: 554 p.From.Scale = 8 555 } 556 p.From.Type = obj.TYPE_MEM 557 p.From.Reg = r 558 p.From.Index = i 559 p.To.Type = obj.TYPE_REG 560 p.To.Reg = o 561 if v.AuxInt != 0 && v.Aux == nil { 562 // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA. 563 switch v.Op { 564 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 565 p = s.Prog(x86.ALEAQ) 566 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8: 567 p = s.Prog(x86.ALEAL) 568 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 569 p = s.Prog(x86.ALEAW) 570 } 571 p.From.Type = obj.TYPE_MEM 572 p.From.Reg = o 573 p.To.Type = obj.TYPE_REG 574 p.To.Reg = o 575 } 576 gc.AddAux(&p.From, v) 577 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW: 578 p := s.Prog(v.Op.Asm()) 579 p.From.Type = obj.TYPE_MEM 580 p.From.Reg = v.Args[0].Reg() 581 gc.AddAux(&p.From, v) 582 p.To.Type = obj.TYPE_REG 583 p.To.Reg = v.Reg() 584 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 585 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 586 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 587 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 588 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 589 // Go assembler has swapped operands for UCOMISx relative to CMP, 590 // must account for that right here. 591 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 592 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 593 p := s.Prog(v.Op.Asm()) 594 p.From.Type = obj.TYPE_REG 595 p.From.Reg = v.Args[0].Reg() 596 p.To.Type = obj.TYPE_CONST 597 p.To.Offset = v.AuxInt 598 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, 599 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 600 ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst, 601 ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst, 602 ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst: 603 op := v.Op 604 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { 605 // Emit 32-bit version because it's shorter 606 op = ssa.OpAMD64BTLconst 607 } 608 p := s.Prog(op.Asm()) 609 p.From.Type = obj.TYPE_CONST 610 p.From.Offset = v.AuxInt 611 p.To.Type = obj.TYPE_REG 612 p.To.Reg = v.Args[0].Reg() 613 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload: 614 p := s.Prog(v.Op.Asm()) 615 p.From.Type = obj.TYPE_MEM 616 p.From.Reg = v.Args[0].Reg() 617 gc.AddAux(&p.From, v) 618 p.To.Type = obj.TYPE_REG 619 p.To.Reg = v.Args[1].Reg() 620 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload: 621 sc := v.AuxValAndOff() 622 p := s.Prog(v.Op.Asm()) 623 p.From.Type = obj.TYPE_MEM 624 p.From.Reg = v.Args[0].Reg() 625 gc.AddAux2(&p.From, v, sc.Off()) 626 p.To.Type = obj.TYPE_CONST 627 p.To.Offset = sc.Val() 628 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 629 x := v.Reg() 630 631 // If flags aren't live (indicated by v.Aux == nil), 632 // then we can rewrite MOV $0, AX into XOR AX, AX. 633 if v.AuxInt == 0 && v.Aux == nil { 634 p := s.Prog(x86.AXORL) 635 p.From.Type = obj.TYPE_REG 636 p.From.Reg = x 637 p.To.Type = obj.TYPE_REG 638 p.To.Reg = x 639 break 640 } 641 642 asm := v.Op.Asm() 643 // Use MOVL to move a small constant into a register 644 // when the constant is positive and fits into 32 bits. 645 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 646 // The upper 32bit are zeroed automatically when using MOVL. 647 asm = x86.AMOVL 648 } 649 p := s.Prog(asm) 650 p.From.Type = obj.TYPE_CONST 651 p.From.Offset = v.AuxInt 652 p.To.Type = obj.TYPE_REG 653 p.To.Reg = x 654 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 655 x := v.Reg() 656 p := s.Prog(v.Op.Asm()) 657 p.From.Type = obj.TYPE_FCONST 658 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 659 p.To.Type = obj.TYPE_REG 660 p.To.Reg = x 661 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 662 p := s.Prog(v.Op.Asm()) 663 p.From.Type = obj.TYPE_MEM 664 p.From.Reg = v.Args[0].Reg() 665 gc.AddAux(&p.From, v) 666 p.To.Type = obj.TYPE_REG 667 p.To.Reg = v.Reg() 668 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1, 669 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2: 670 r := v.Args[0].Reg() 671 i := v.Args[1].Reg() 672 p := s.Prog(v.Op.Asm()) 673 p.From.Type = obj.TYPE_MEM 674 switch v.Op { 675 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 676 if i == x86.REG_SP { 677 r, i = i, r 678 } 679 p.From.Scale = 1 680 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8: 681 p.From.Scale = 8 682 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 683 p.From.Scale = 4 684 case ssa.OpAMD64MOVWloadidx2: 685 p.From.Scale = 2 686 } 687 p.From.Reg = r 688 p.From.Index = i 689 gc.AddAux(&p.From, v) 690 p.To.Type = obj.TYPE_REG 691 p.To.Reg = v.Reg() 692 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, 693 ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify, 694 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, 695 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify: 696 p := s.Prog(v.Op.Asm()) 697 p.From.Type = obj.TYPE_REG 698 p.From.Reg = v.Args[1].Reg() 699 p.To.Type = obj.TYPE_MEM 700 p.To.Reg = v.Args[0].Reg() 701 gc.AddAux(&p.To, v) 702 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1, 703 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2: 704 r := v.Args[0].Reg() 705 i := v.Args[1].Reg() 706 p := s.Prog(v.Op.Asm()) 707 p.From.Type = obj.TYPE_REG 708 p.From.Reg = v.Args[2].Reg() 709 p.To.Type = obj.TYPE_MEM 710 switch v.Op { 711 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 712 if i == x86.REG_SP { 713 r, i = i, r 714 } 715 p.To.Scale = 1 716 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8: 717 p.To.Scale = 8 718 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 719 p.To.Scale = 4 720 case ssa.OpAMD64MOVWstoreidx2: 721 p.To.Scale = 2 722 } 723 p.To.Reg = r 724 p.To.Index = i 725 gc.AddAux(&p.To, v) 726 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify: 727 sc := v.AuxValAndOff() 728 off := sc.Off() 729 val := sc.Val() 730 if val == 1 { 731 var asm obj.As 732 if v.Op == ssa.OpAMD64ADDQconstmodify { 733 asm = x86.AINCQ 734 } else { 735 asm = x86.AINCL 736 } 737 p := s.Prog(asm) 738 p.To.Type = obj.TYPE_MEM 739 p.To.Reg = v.Args[0].Reg() 740 gc.AddAux2(&p.To, v, off) 741 break 742 } 743 fallthrough 744 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, 745 ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify, 746 ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: 747 sc := v.AuxValAndOff() 748 off := sc.Off() 749 val := sc.Val() 750 p := s.Prog(v.Op.Asm()) 751 p.From.Type = obj.TYPE_CONST 752 p.From.Offset = val 753 p.To.Type = obj.TYPE_MEM 754 p.To.Reg = v.Args[0].Reg() 755 gc.AddAux2(&p.To, v, off) 756 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 757 p := s.Prog(v.Op.Asm()) 758 p.From.Type = obj.TYPE_CONST 759 sc := v.AuxValAndOff() 760 p.From.Offset = sc.Val() 761 p.To.Type = obj.TYPE_MEM 762 p.To.Reg = v.Args[0].Reg() 763 gc.AddAux2(&p.To, v, sc.Off()) 764 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 765 p := s.Prog(v.Op.Asm()) 766 p.From.Type = obj.TYPE_CONST 767 sc := v.AuxValAndOff() 768 p.From.Offset = sc.Val() 769 r := v.Args[0].Reg() 770 i := v.Args[1].Reg() 771 switch v.Op { 772 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 773 p.To.Scale = 1 774 if i == x86.REG_SP { 775 r, i = i, r 776 } 777 case ssa.OpAMD64MOVWstoreconstidx2: 778 p.To.Scale = 2 779 case ssa.OpAMD64MOVLstoreconstidx4: 780 p.To.Scale = 4 781 case ssa.OpAMD64MOVQstoreconstidx8: 782 p.To.Scale = 8 783 } 784 p.To.Type = obj.TYPE_MEM 785 p.To.Reg = r 786 p.To.Index = i 787 gc.AddAux2(&p.To, v, sc.Off()) 788 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 789 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 790 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 791 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 792 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 793 r := v.Reg() 794 // Break false dependency on destination register. 795 opregreg(s, x86.AXORPS, r, r) 796 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 797 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 798 var p *obj.Prog 799 switch v.Op { 800 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: 801 p = s.Prog(x86.AMOVQ) 802 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 803 p = s.Prog(x86.AMOVL) 804 } 805 p.From.Type = obj.TYPE_REG 806 p.From.Reg = v.Args[0].Reg() 807 p.To.Type = obj.TYPE_REG 808 p.To.Reg = v.Reg() 809 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload, 810 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload, 811 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload, 812 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload, 813 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload: 814 p := s.Prog(v.Op.Asm()) 815 p.From.Type = obj.TYPE_MEM 816 p.From.Reg = v.Args[1].Reg() 817 gc.AddAux(&p.From, v) 818 p.To.Type = obj.TYPE_REG 819 p.To.Reg = v.Reg() 820 if v.Reg() != v.Args[0].Reg() { 821 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 822 } 823 case ssa.OpAMD64DUFFZERO: 824 off := duffStart(v.AuxInt) 825 adj := duffAdj(v.AuxInt) 826 var p *obj.Prog 827 if adj != 0 { 828 p = s.Prog(x86.ALEAQ) 829 p.From.Type = obj.TYPE_MEM 830 p.From.Offset = adj 831 p.From.Reg = x86.REG_DI 832 p.To.Type = obj.TYPE_REG 833 p.To.Reg = x86.REG_DI 834 } 835 p = s.Prog(obj.ADUFFZERO) 836 p.To.Type = obj.TYPE_ADDR 837 p.To.Sym = gc.Duffzero 838 p.To.Offset = off 839 case ssa.OpAMD64MOVOconst: 840 if v.AuxInt != 0 { 841 v.Fatalf("MOVOconst can only do constant=0") 842 } 843 r := v.Reg() 844 opregreg(s, x86.AXORPS, r, r) 845 case ssa.OpAMD64DUFFCOPY: 846 p := s.Prog(obj.ADUFFCOPY) 847 p.To.Type = obj.TYPE_ADDR 848 p.To.Sym = gc.Duffcopy 849 p.To.Offset = v.AuxInt 850 851 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 852 if v.Type.IsMemory() { 853 return 854 } 855 x := v.Args[0].Reg() 856 y := v.Reg() 857 if x != y { 858 opregreg(s, moveByType(v.Type), y, x) 859 } 860 case ssa.OpLoadReg: 861 if v.Type.IsFlags() { 862 v.Fatalf("load flags not implemented: %v", v.LongString()) 863 return 864 } 865 p := s.Prog(loadByType(v.Type)) 866 gc.AddrAuto(&p.From, v.Args[0]) 867 p.To.Type = obj.TYPE_REG 868 p.To.Reg = v.Reg() 869 870 case ssa.OpStoreReg: 871 if v.Type.IsFlags() { 872 v.Fatalf("store flags not implemented: %v", v.LongString()) 873 return 874 } 875 p := s.Prog(storeByType(v.Type)) 876 p.From.Type = obj.TYPE_REG 877 p.From.Reg = v.Args[0].Reg() 878 gc.AddrAuto(&p.To, v) 879 case ssa.OpAMD64LoweredGetClosurePtr: 880 // Closure pointer is DX. 881 gc.CheckLoweredGetClosurePtr(v) 882 case ssa.OpAMD64LoweredGetG: 883 r := v.Reg() 884 // See the comments in cmd/internal/obj/x86/obj6.go 885 // near CanUse1InsnTLS for a detailed explanation of these instructions. 886 if x86.CanUse1InsnTLS(gc.Ctxt) { 887 // MOVQ (TLS), r 888 p := s.Prog(x86.AMOVQ) 889 p.From.Type = obj.TYPE_MEM 890 p.From.Reg = x86.REG_TLS 891 p.To.Type = obj.TYPE_REG 892 p.To.Reg = r 893 } else { 894 // MOVQ TLS, r 895 // MOVQ (r)(TLS*1), r 896 p := s.Prog(x86.AMOVQ) 897 p.From.Type = obj.TYPE_REG 898 p.From.Reg = x86.REG_TLS 899 p.To.Type = obj.TYPE_REG 900 p.To.Reg = r 901 q := s.Prog(x86.AMOVQ) 902 q.From.Type = obj.TYPE_MEM 903 q.From.Reg = r 904 q.From.Index = x86.REG_TLS 905 q.From.Scale = 1 906 q.To.Type = obj.TYPE_REG 907 q.To.Reg = r 908 } 909 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 910 s.Call(v) 911 912 case ssa.OpAMD64LoweredGetCallerPC: 913 p := s.Prog(x86.AMOVQ) 914 p.From.Type = obj.TYPE_MEM 915 p.From.Offset = -8 // PC is stored 8 bytes below first parameter. 916 p.From.Name = obj.NAME_PARAM 917 p.To.Type = obj.TYPE_REG 918 p.To.Reg = v.Reg() 919 920 case ssa.OpAMD64LoweredGetCallerSP: 921 // caller's SP is the address of the first arg 922 mov := x86.AMOVQ 923 if gc.Widthptr == 4 { 924 mov = x86.AMOVL 925 } 926 p := s.Prog(mov) 927 p.From.Type = obj.TYPE_ADDR 928 p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures 929 p.From.Name = obj.NAME_PARAM 930 p.To.Type = obj.TYPE_REG 931 p.To.Reg = v.Reg() 932 933 case ssa.OpAMD64LoweredWB: 934 p := s.Prog(obj.ACALL) 935 p.To.Type = obj.TYPE_MEM 936 p.To.Name = obj.NAME_EXTERN 937 p.To.Sym = v.Aux.(*obj.LSym) 938 939 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 940 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 941 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 942 r := v.Reg() 943 if r != v.Args[0].Reg() { 944 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 945 } 946 p := s.Prog(v.Op.Asm()) 947 p.To.Type = obj.TYPE_REG 948 p.To.Reg = r 949 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD: 950 p := s.Prog(v.Op.Asm()) 951 p.From.Type = obj.TYPE_REG 952 p.From.Reg = v.Args[0].Reg() 953 p.To.Type = obj.TYPE_REG 954 switch v.Op { 955 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ: 956 p.To.Reg = v.Reg0() 957 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD: 958 p.To.Reg = v.Reg() 959 } 960 case ssa.OpAMD64ROUNDSD: 961 p := s.Prog(v.Op.Asm()) 962 val := v.AuxInt 963 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc 964 if val != 0 && val != 1 && val != 2 && val != 3 { 965 v.Fatalf("Invalid rounding mode") 966 } 967 p.From.Offset = val 968 p.From.Type = obj.TYPE_CONST 969 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) 970 p.To.Type = obj.TYPE_REG 971 p.To.Reg = v.Reg() 972 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL: 973 if v.Args[0].Reg() != v.Reg() { 974 // POPCNT on Intel has a false dependency on the destination register. 975 // Xor register with itself to break the dependency. 976 p := s.Prog(x86.AXORQ) 977 p.From.Type = obj.TYPE_REG 978 p.From.Reg = v.Reg() 979 p.To.Type = obj.TYPE_REG 980 p.To.Reg = v.Reg() 981 } 982 p := s.Prog(v.Op.Asm()) 983 p.From.Type = obj.TYPE_REG 984 p.From.Reg = v.Args[0].Reg() 985 p.To.Type = obj.TYPE_REG 986 p.To.Reg = v.Reg() 987 988 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 989 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 990 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 991 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 992 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 993 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 994 ssa.OpAMD64SETA, ssa.OpAMD64SETAE, 995 ssa.OpAMD64SETO: 996 p := s.Prog(v.Op.Asm()) 997 p.To.Type = obj.TYPE_REG 998 p.To.Reg = v.Reg() 999 1000 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore, 1001 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore, 1002 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore, 1003 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore, 1004 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore: 1005 p := s.Prog(v.Op.Asm()) 1006 p.To.Type = obj.TYPE_MEM 1007 p.To.Reg = v.Args[0].Reg() 1008 gc.AddAux(&p.To, v) 1009 1010 case ssa.OpAMD64SETNEF: 1011 p := s.Prog(v.Op.Asm()) 1012 p.To.Type = obj.TYPE_REG 1013 p.To.Reg = v.Reg() 1014 q := s.Prog(x86.ASETPS) 1015 q.To.Type = obj.TYPE_REG 1016 q.To.Reg = x86.REG_AX 1017 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 1018 opregreg(s, x86.AORL, v.Reg(), x86.REG_AX) 1019 1020 case ssa.OpAMD64SETEQF: 1021 p := s.Prog(v.Op.Asm()) 1022 p.To.Type = obj.TYPE_REG 1023 p.To.Reg = v.Reg() 1024 q := s.Prog(x86.ASETPC) 1025 q.To.Type = obj.TYPE_REG 1026 q.To.Reg = x86.REG_AX 1027 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 1028 opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX) 1029 1030 case ssa.OpAMD64InvertFlags: 1031 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1032 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 1033 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1034 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 1035 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 1036 case ssa.OpAMD64REPSTOSQ: 1037 s.Prog(x86.AREP) 1038 s.Prog(x86.ASTOSQ) 1039 case ssa.OpAMD64REPMOVSQ: 1040 s.Prog(x86.AREP) 1041 s.Prog(x86.AMOVSQ) 1042 case ssa.OpAMD64LoweredNilCheck: 1043 // Issue a load which will fault if the input is nil. 1044 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 1045 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 1046 // but it doesn't have false dependency on AX. 1047 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 1048 // That trades clobbering flags for clobbering a register. 1049 p := s.Prog(x86.ATESTB) 1050 p.From.Type = obj.TYPE_REG 1051 p.From.Reg = x86.REG_AX 1052 p.To.Type = obj.TYPE_MEM 1053 p.To.Reg = v.Args[0].Reg() 1054 gc.AddAux(&p.To, v) 1055 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1056 gc.Warnl(v.Pos, "generated nil check") 1057 } 1058 case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 1059 p := s.Prog(v.Op.Asm()) 1060 p.From.Type = obj.TYPE_MEM 1061 p.From.Reg = v.Args[0].Reg() 1062 gc.AddAux(&p.From, v) 1063 p.To.Type = obj.TYPE_REG 1064 p.To.Reg = v.Reg0() 1065 case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 1066 r := v.Reg0() 1067 if r != v.Args[0].Reg() { 1068 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 1069 } 1070 p := s.Prog(v.Op.Asm()) 1071 p.From.Type = obj.TYPE_REG 1072 p.From.Reg = r 1073 p.To.Type = obj.TYPE_MEM 1074 p.To.Reg = v.Args[1].Reg() 1075 gc.AddAux(&p.To, v) 1076 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 1077 r := v.Reg0() 1078 if r != v.Args[0].Reg() { 1079 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 1080 } 1081 s.Prog(x86.ALOCK) 1082 p := s.Prog(v.Op.Asm()) 1083 p.From.Type = obj.TYPE_REG 1084 p.From.Reg = r 1085 p.To.Type = obj.TYPE_MEM 1086 p.To.Reg = v.Args[1].Reg() 1087 gc.AddAux(&p.To, v) 1088 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 1089 if v.Args[1].Reg() != x86.REG_AX { 1090 v.Fatalf("input[1] not in AX %s", v.LongString()) 1091 } 1092 s.Prog(x86.ALOCK) 1093 p := s.Prog(v.Op.Asm()) 1094 p.From.Type = obj.TYPE_REG 1095 p.From.Reg = v.Args[2].Reg() 1096 p.To.Type = obj.TYPE_MEM 1097 p.To.Reg = v.Args[0].Reg() 1098 gc.AddAux(&p.To, v) 1099 p = s.Prog(x86.ASETEQ) 1100 p.To.Type = obj.TYPE_REG 1101 p.To.Reg = v.Reg0() 1102 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: 1103 s.Prog(x86.ALOCK) 1104 p := s.Prog(v.Op.Asm()) 1105 p.From.Type = obj.TYPE_REG 1106 p.From.Reg = v.Args[1].Reg() 1107 p.To.Type = obj.TYPE_MEM 1108 p.To.Reg = v.Args[0].Reg() 1109 gc.AddAux(&p.To, v) 1110 case ssa.OpClobber: 1111 p := s.Prog(x86.AMOVL) 1112 p.From.Type = obj.TYPE_CONST 1113 p.From.Offset = 0xdeaddead 1114 p.To.Type = obj.TYPE_MEM 1115 p.To.Reg = x86.REG_SP 1116 gc.AddAux(&p.To, v) 1117 p = s.Prog(x86.AMOVL) 1118 p.From.Type = obj.TYPE_CONST 1119 p.From.Offset = 0xdeaddead 1120 p.To.Type = obj.TYPE_MEM 1121 p.To.Reg = x86.REG_SP 1122 gc.AddAux(&p.To, v) 1123 p.To.Offset += 4 1124 default: 1125 v.Fatalf("genValue not implemented: %s", v.LongString()) 1126 } 1127 } 1128 1129 var blockJump = [...]struct { 1130 asm, invasm obj.As 1131 }{ 1132 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1133 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1134 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1135 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1136 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1137 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1138 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC}, 1139 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS}, 1140 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1141 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1142 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1143 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1144 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1145 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1146 } 1147 1148 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 1149 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1150 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1151 } 1152 var nefJumps = [2][2]gc.FloatingEQNEJump{ 1153 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1154 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1155 } 1156 1157 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1158 switch b.Kind { 1159 case ssa.BlockPlain: 1160 if b.Succs[0].Block() != next { 1161 p := s.Prog(obj.AJMP) 1162 p.To.Type = obj.TYPE_BRANCH 1163 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1164 } 1165 case ssa.BlockDefer: 1166 // defer returns in rax: 1167 // 0 if we should continue executing 1168 // 1 if we should jump to deferreturn call 1169 p := s.Prog(x86.ATESTL) 1170 p.From.Type = obj.TYPE_REG 1171 p.From.Reg = x86.REG_AX 1172 p.To.Type = obj.TYPE_REG 1173 p.To.Reg = x86.REG_AX 1174 p = s.Prog(x86.AJNE) 1175 p.To.Type = obj.TYPE_BRANCH 1176 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1177 if b.Succs[0].Block() != next { 1178 p := s.Prog(obj.AJMP) 1179 p.To.Type = obj.TYPE_BRANCH 1180 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1181 } 1182 case ssa.BlockExit: 1183 s.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1184 case ssa.BlockRet: 1185 s.Prog(obj.ARET) 1186 case ssa.BlockRetJmp: 1187 p := s.Prog(obj.ARET) 1188 p.To.Type = obj.TYPE_MEM 1189 p.To.Name = obj.NAME_EXTERN 1190 p.To.Sym = b.Aux.(*obj.LSym) 1191 1192 case ssa.BlockAMD64EQF: 1193 s.FPJump(b, next, &eqfJumps) 1194 1195 case ssa.BlockAMD64NEF: 1196 s.FPJump(b, next, &nefJumps) 1197 1198 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1199 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1200 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1201 ssa.BlockAMD64OS, ssa.BlockAMD64OC, 1202 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1203 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1204 jmp := blockJump[b.Kind] 1205 switch next { 1206 case b.Succs[0].Block(): 1207 s.Br(jmp.invasm, b.Succs[1].Block()) 1208 case b.Succs[1].Block(): 1209 s.Br(jmp.asm, b.Succs[0].Block()) 1210 default: 1211 if b.Likely != ssa.BranchUnlikely { 1212 s.Br(jmp.asm, b.Succs[0].Block()) 1213 s.Br(obj.AJMP, b.Succs[1].Block()) 1214 } else { 1215 s.Br(jmp.invasm, b.Succs[1].Block()) 1216 s.Br(obj.AJMP, b.Succs[0].Block()) 1217 } 1218 } 1219 1220 default: 1221 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1222 } 1223 }