github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "github.com/bir3/gocompiler/src/internal/buildcfg" 10 "math" 11 12 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 13 "github.com/bir3/gocompiler/src/cmd/compile/internal/ir" 14 "github.com/bir3/gocompiler/src/cmd/compile/internal/logopt" 15 "github.com/bir3/gocompiler/src/cmd/compile/internal/objw" 16 "github.com/bir3/gocompiler/src/cmd/compile/internal/ssa" 17 "github.com/bir3/gocompiler/src/cmd/compile/internal/ssagen" 18 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 19 "github.com/bir3/gocompiler/src/cmd/internal/obj" 20 "github.com/bir3/gocompiler/src/cmd/internal/obj/x86" 21 ) 22 23 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. 24 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) { 25 flive := b.FlagsLiveAtEnd 26 for _, c := range b.ControlValues() { 27 flive = c.Type.IsFlags() || flive 28 } 29 for i := len(b.Values) - 1; i >= 0; i-- { 30 v := b.Values[i] 31 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 32 // The "mark" is any non-nil Aux value. 33 v.Aux = ssa.AuxMark 34 } 35 if v.Type.IsFlags() { 36 flive = false 37 } 38 for _, a := range v.Args { 39 if a.Type.IsFlags() { 40 flive = true 41 } 42 } 43 } 44 } 45 46 // loadByType returns the load instruction of the given type. 47 func loadByType(t *types.Type) obj.As { 48 // Avoid partial register write 49 if !t.IsFloat() { 50 switch t.Size() { 51 case 1: 52 return x86.AMOVBLZX 53 case 2: 54 return x86.AMOVWLZX 55 } 56 } 57 // Otherwise, there's no difference between load and store opcodes. 58 return storeByType(t) 59 } 60 61 // storeByType returns the store instruction of the given type. 62 func storeByType(t *types.Type) obj.As { 63 width := t.Size() 64 if t.IsFloat() { 65 switch width { 66 case 4: 67 return x86.AMOVSS 68 case 8: 69 return x86.AMOVSD 70 } 71 } else { 72 switch width { 73 case 1: 74 return x86.AMOVB 75 case 2: 76 return x86.AMOVW 77 case 4: 78 return x86.AMOVL 79 case 8: 80 return x86.AMOVQ 81 case 16: 82 return x86.AMOVUPS 83 } 84 } 85 panic(fmt.Sprintf("bad store type %v", t)) 86 } 87 88 // moveByType returns the reg->reg move instruction of the given type. 89 func moveByType(t *types.Type) obj.As { 90 if t.IsFloat() { 91 // Moving the whole sse2 register is faster 92 // than moving just the correct low portion of it. 93 // There is no xmm->xmm move with 1 byte opcode, 94 // so use movups, which has 2 byte opcode. 95 return x86.AMOVUPS 96 } else { 97 switch t.Size() { 98 case 1: 99 // Avoids partial register write 100 return x86.AMOVL 101 case 2: 102 return x86.AMOVL 103 case 4: 104 return x86.AMOVL 105 case 8: 106 return x86.AMOVQ 107 case 16: 108 return x86.AMOVUPS // int128s are in SSE registers 109 default: 110 panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t)) 111 } 112 } 113 } 114 115 // opregreg emits instructions for 116 // 117 // dest := dest(To) op src(From) 118 // 119 // and also returns the created obj.Prog so it 120 // may be further adjusted (offset, scale, etc). 121 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog { 122 p := s.Prog(op) 123 p.From.Type = obj.TYPE_REG 124 p.To.Type = obj.TYPE_REG 125 p.To.Reg = dest 126 p.From.Reg = src 127 return p 128 } 129 130 // memIdx fills out a as an indexed memory reference for v. 131 // It assumes that the base register and the index register 132 // are v.Args[0].Reg() and v.Args[1].Reg(), respectively. 133 // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary. 134 func memIdx(a *obj.Addr, v *ssa.Value) { 135 r, i := v.Args[0].Reg(), v.Args[1].Reg() 136 a.Type = obj.TYPE_MEM 137 a.Scale = v.Op.Scale() 138 if a.Scale == 1 && i == x86.REG_SP { 139 r, i = i, r 140 } 141 a.Reg = r 142 a.Index = i 143 } 144 145 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 146 // See runtime/mkduff.go. 147 func duffStart(size int64) int64 { 148 x, _ := duff(size) 149 return x 150 } 151 func duffAdj(size int64) int64 { 152 _, x := duff(size) 153 return x 154 } 155 156 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 157 // required to use the duffzero mechanism for a block of the given size. 158 func duff(size int64) (int64, int64) { 159 if size < 32 || size > 1024 || size%dzClearStep != 0 { 160 panic("bad duffzero size") 161 } 162 steps := size / dzClearStep 163 blocks := steps / dzBlockLen 164 steps %= dzBlockLen 165 off := dzBlockSize * (dzBlocks - blocks) 166 var adj int64 167 if steps != 0 { 168 off -= dzLeaqSize 169 off -= dzMovSize * steps 170 adj -= dzClearStep * (dzBlockLen - steps) 171 } 172 return off, adj 173 } 174 175 func getgFromTLS(s *ssagen.State, r int16) { 176 // See the comments in cmd/internal/obj/x86/obj6.go 177 // near CanUse1InsnTLS for a detailed explanation of these instructions. 178 if x86.CanUse1InsnTLS(base.Ctxt) { 179 // MOVQ (TLS), r 180 p := s.Prog(x86.AMOVQ) 181 p.From.Type = obj.TYPE_MEM 182 p.From.Reg = x86.REG_TLS 183 p.To.Type = obj.TYPE_REG 184 p.To.Reg = r 185 } else { 186 // MOVQ TLS, r 187 // MOVQ (r)(TLS*1), r 188 p := s.Prog(x86.AMOVQ) 189 p.From.Type = obj.TYPE_REG 190 p.From.Reg = x86.REG_TLS 191 p.To.Type = obj.TYPE_REG 192 p.To.Reg = r 193 q := s.Prog(x86.AMOVQ) 194 q.From.Type = obj.TYPE_MEM 195 q.From.Reg = r 196 q.From.Index = x86.REG_TLS 197 q.From.Scale = 1 198 q.To.Type = obj.TYPE_REG 199 q.To.Reg = r 200 } 201 } 202 203 func ssaGenValue(s *ssagen.State, v *ssa.Value) { 204 switch v.Op { 205 case ssa.OpAMD64VFMADD231SD: 206 p := s.Prog(v.Op.Asm()) 207 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()} 208 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 209 p.AddRestSourceReg(v.Args[1].Reg()) 210 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 211 r := v.Reg() 212 r1 := v.Args[0].Reg() 213 r2 := v.Args[1].Reg() 214 switch { 215 case r == r1: 216 p := s.Prog(v.Op.Asm()) 217 p.From.Type = obj.TYPE_REG 218 p.From.Reg = r2 219 p.To.Type = obj.TYPE_REG 220 p.To.Reg = r 221 case r == r2: 222 p := s.Prog(v.Op.Asm()) 223 p.From.Type = obj.TYPE_REG 224 p.From.Reg = r1 225 p.To.Type = obj.TYPE_REG 226 p.To.Reg = r 227 default: 228 var asm obj.As 229 if v.Op == ssa.OpAMD64ADDQ { 230 asm = x86.ALEAQ 231 } else { 232 asm = x86.ALEAL 233 } 234 p := s.Prog(asm) 235 p.From.Type = obj.TYPE_MEM 236 p.From.Reg = r1 237 p.From.Scale = 1 238 p.From.Index = r2 239 p.To.Type = obj.TYPE_REG 240 p.To.Reg = r 241 } 242 // 2-address opcode arithmetic 243 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 244 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 245 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 246 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 247 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 248 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 249 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 250 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 251 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, 252 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, 253 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 254 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 255 ssa.OpAMD64MINSS, ssa.OpAMD64MINSD, 256 ssa.OpAMD64POR, ssa.OpAMD64PXOR, 257 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ, 258 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ, 259 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ: 260 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 261 262 case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ: 263 p := s.Prog(v.Op.Asm()) 264 lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg() 265 p.From.Type = obj.TYPE_REG 266 p.From.Reg = bits 267 p.To.Type = obj.TYPE_REG 268 p.To.Reg = lo 269 p.AddRestSourceReg(hi) 270 271 case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL, 272 ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL, 273 ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL: 274 p := s.Prog(v.Op.Asm()) 275 p.From.Type = obj.TYPE_REG 276 p.From.Reg = v.Args[0].Reg() 277 p.To.Type = obj.TYPE_REG 278 switch v.Op { 279 case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL: 280 p.To.Reg = v.Reg0() 281 default: 282 p.To.Reg = v.Reg() 283 } 284 285 case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL: 286 p := s.Prog(v.Op.Asm()) 287 p.From.Type = obj.TYPE_REG 288 p.From.Reg = v.Args[0].Reg() 289 p.To.Type = obj.TYPE_REG 290 p.To.Reg = v.Reg() 291 p.AddRestSourceReg(v.Args[1].Reg()) 292 293 case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ, 294 ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ, 295 ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ: 296 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 297 p.AddRestSourceReg(v.Args[0].Reg()) 298 299 case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload, 300 ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload, 301 ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload: 302 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 303 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 304 ssagen.AddAux(&m, v) 305 p.AddRestSource(m) 306 307 case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8, 308 ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8, 309 ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8, 310 ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8, 311 ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8, 312 ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8: 313 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg()) 314 m := obj.Addr{Type: obj.TYPE_MEM} 315 memIdx(&m, v) 316 ssagen.AddAux(&m, v) 317 p.AddRestSource(m) 318 319 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 320 // Arg[0] (the dividend) is in AX. 321 // Arg[1] (the divisor) can be in any other register. 322 // Result[0] (the quotient) is in AX. 323 // Result[1] (the remainder) is in DX. 324 r := v.Args[1].Reg() 325 326 // Zero extend dividend. 327 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX) 328 329 // Issue divide. 330 p := s.Prog(v.Op.Asm()) 331 p.From.Type = obj.TYPE_REG 332 p.From.Reg = r 333 334 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 335 // Arg[0] (the dividend) is in AX. 336 // Arg[1] (the divisor) can be in any other register. 337 // Result[0] (the quotient) is in AX. 338 // Result[1] (the remainder) is in DX. 339 r := v.Args[1].Reg() 340 341 var opCMP, opNEG, opSXD obj.As 342 switch v.Op { 343 case ssa.OpAMD64DIVQ: 344 opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO 345 case ssa.OpAMD64DIVL: 346 opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ 347 case ssa.OpAMD64DIVW: 348 opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD 349 } 350 351 // CPU faults upon signed overflow, which occurs when the most 352 // negative int is divided by -1. Handle divide by -1 as a special case. 353 var j1, j2 *obj.Prog 354 if ssa.DivisionNeedsFixUp(v) { 355 c := s.Prog(opCMP) 356 c.From.Type = obj.TYPE_REG 357 c.From.Reg = r 358 c.To.Type = obj.TYPE_CONST 359 c.To.Offset = -1 360 361 // Divisor is not -1, proceed with normal division. 362 j1 = s.Prog(x86.AJNE) 363 j1.To.Type = obj.TYPE_BRANCH 364 365 // Divisor is -1, manually compute quotient and remainder via fixup code. 366 // n / -1 = -n 367 n1 := s.Prog(opNEG) 368 n1.To.Type = obj.TYPE_REG 369 n1.To.Reg = x86.REG_AX 370 371 // n % -1 == 0 372 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX) 373 374 // TODO(khr): issue only the -1 fixup code we need. 375 // For instance, if only the quotient is used, no point in zeroing the remainder. 376 377 // Skip over normal division. 378 j2 = s.Prog(obj.AJMP) 379 j2.To.Type = obj.TYPE_BRANCH 380 } 381 382 // Sign extend dividend and perform division. 383 p := s.Prog(opSXD) 384 if j1 != nil { 385 j1.To.SetTarget(p) 386 } 387 p = s.Prog(v.Op.Asm()) 388 p.From.Type = obj.TYPE_REG 389 p.From.Reg = r 390 391 if j2 != nil { 392 j2.To.SetTarget(s.Pc()) 393 } 394 395 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 396 // the frontend rewrites constant division by 8/16/32 bit integers into 397 // HMUL by a constant 398 // SSA rewrites generate the 64 bit versions 399 400 // Arg[0] is already in AX as it's the only register we allow 401 // and DX is the only output we care about (the high bits) 402 p := s.Prog(v.Op.Asm()) 403 p.From.Type = obj.TYPE_REG 404 p.From.Reg = v.Args[1].Reg() 405 406 // IMULB puts the high portion in AH instead of DL, 407 // so move it to DL for consistency 408 if v.Type.Size() == 1 { 409 m := s.Prog(x86.AMOVB) 410 m.From.Type = obj.TYPE_REG 411 m.From.Reg = x86.REG_AH 412 m.To.Type = obj.TYPE_REG 413 m.To.Reg = x86.REG_DX 414 } 415 416 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU: 417 // Arg[0] is already in AX as it's the only register we allow 418 // results lo in AX 419 p := s.Prog(v.Op.Asm()) 420 p.From.Type = obj.TYPE_REG 421 p.From.Reg = v.Args[1].Reg() 422 423 case ssa.OpAMD64MULQU2: 424 // Arg[0] is already in AX as it's the only register we allow 425 // results hi in DX, lo in AX 426 p := s.Prog(v.Op.Asm()) 427 p.From.Type = obj.TYPE_REG 428 p.From.Reg = v.Args[1].Reg() 429 430 case ssa.OpAMD64DIVQU2: 431 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 432 // results q in AX, r in DX 433 p := s.Prog(v.Op.Asm()) 434 p.From.Type = obj.TYPE_REG 435 p.From.Reg = v.Args[2].Reg() 436 437 case ssa.OpAMD64AVGQU: 438 // compute (x+y)/2 unsigned. 439 // Do a 64-bit add, the overflow goes into the carry. 440 // Shift right once and pull the carry back into the 63rd bit. 441 p := s.Prog(x86.AADDQ) 442 p.From.Type = obj.TYPE_REG 443 p.To.Type = obj.TYPE_REG 444 p.To.Reg = v.Reg() 445 p.From.Reg = v.Args[1].Reg() 446 p = s.Prog(x86.ARCRQ) 447 p.From.Type = obj.TYPE_CONST 448 p.From.Offset = 1 449 p.To.Type = obj.TYPE_REG 450 p.To.Reg = v.Reg() 451 452 case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ: 453 r := v.Reg0() 454 r0 := v.Args[0].Reg() 455 r1 := v.Args[1].Reg() 456 switch r { 457 case r0: 458 p := s.Prog(v.Op.Asm()) 459 p.From.Type = obj.TYPE_REG 460 p.From.Reg = r1 461 p.To.Type = obj.TYPE_REG 462 p.To.Reg = r 463 case r1: 464 p := s.Prog(v.Op.Asm()) 465 p.From.Type = obj.TYPE_REG 466 p.From.Reg = r0 467 p.To.Type = obj.TYPE_REG 468 p.To.Reg = r 469 default: 470 v.Fatalf("output not in same register as an input %s", v.LongString()) 471 } 472 473 case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ: 474 p := s.Prog(v.Op.Asm()) 475 p.From.Type = obj.TYPE_REG 476 p.From.Reg = v.Args[1].Reg() 477 p.To.Type = obj.TYPE_REG 478 p.To.Reg = v.Reg0() 479 480 case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst: 481 p := s.Prog(v.Op.Asm()) 482 p.From.Type = obj.TYPE_CONST 483 p.From.Offset = v.AuxInt 484 p.To.Type = obj.TYPE_REG 485 p.To.Reg = v.Reg0() 486 487 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 488 r := v.Reg() 489 a := v.Args[0].Reg() 490 if r == a { 491 switch v.AuxInt { 492 case 1: 493 var asm obj.As 494 // Software optimization manual recommends add $1,reg. 495 // But inc/dec is 1 byte smaller. ICC always uses inc 496 // Clang/GCC choose depending on flags, but prefer add. 497 // Experiments show that inc/dec is both a little faster 498 // and make a binary a little smaller. 499 if v.Op == ssa.OpAMD64ADDQconst { 500 asm = x86.AINCQ 501 } else { 502 asm = x86.AINCL 503 } 504 p := s.Prog(asm) 505 p.To.Type = obj.TYPE_REG 506 p.To.Reg = r 507 return 508 case -1: 509 var asm obj.As 510 if v.Op == ssa.OpAMD64ADDQconst { 511 asm = x86.ADECQ 512 } else { 513 asm = x86.ADECL 514 } 515 p := s.Prog(asm) 516 p.To.Type = obj.TYPE_REG 517 p.To.Reg = r 518 return 519 case 0x80: 520 // 'SUBQ $-0x80, r' is shorter to encode than 521 // and functionally equivalent to 'ADDQ $0x80, r'. 522 asm := x86.ASUBL 523 if v.Op == ssa.OpAMD64ADDQconst { 524 asm = x86.ASUBQ 525 } 526 p := s.Prog(asm) 527 p.From.Type = obj.TYPE_CONST 528 p.From.Offset = -0x80 529 p.To.Type = obj.TYPE_REG 530 p.To.Reg = r 531 return 532 533 } 534 p := s.Prog(v.Op.Asm()) 535 p.From.Type = obj.TYPE_CONST 536 p.From.Offset = v.AuxInt 537 p.To.Type = obj.TYPE_REG 538 p.To.Reg = r 539 return 540 } 541 var asm obj.As 542 if v.Op == ssa.OpAMD64ADDQconst { 543 asm = x86.ALEAQ 544 } else { 545 asm = x86.ALEAL 546 } 547 p := s.Prog(asm) 548 p.From.Type = obj.TYPE_MEM 549 p.From.Reg = a 550 p.From.Offset = v.AuxInt 551 p.To.Type = obj.TYPE_REG 552 p.To.Reg = r 553 554 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ, 555 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT, 556 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE, 557 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT, 558 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE, 559 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE, 560 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI, 561 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS, 562 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC, 563 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS, 564 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF, 565 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF: 566 p := s.Prog(v.Op.Asm()) 567 p.From.Type = obj.TYPE_REG 568 p.From.Reg = v.Args[1].Reg() 569 p.To.Type = obj.TYPE_REG 570 p.To.Reg = v.Reg() 571 572 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF: 573 // Flag condition: ^ZERO || PARITY 574 // Generate: 575 // CMOV*NE SRC,DST 576 // CMOV*PS SRC,DST 577 p := s.Prog(v.Op.Asm()) 578 p.From.Type = obj.TYPE_REG 579 p.From.Reg = v.Args[1].Reg() 580 p.To.Type = obj.TYPE_REG 581 p.To.Reg = v.Reg() 582 var q *obj.Prog 583 if v.Op == ssa.OpAMD64CMOVQNEF { 584 q = s.Prog(x86.ACMOVQPS) 585 } else if v.Op == ssa.OpAMD64CMOVLNEF { 586 q = s.Prog(x86.ACMOVLPS) 587 } else { 588 q = s.Prog(x86.ACMOVWPS) 589 } 590 q.From.Type = obj.TYPE_REG 591 q.From.Reg = v.Args[1].Reg() 592 q.To.Type = obj.TYPE_REG 593 q.To.Reg = v.Reg() 594 595 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF: 596 // Flag condition: ZERO && !PARITY 597 // Generate: 598 // MOV SRC,TMP 599 // CMOV*NE DST,TMP 600 // CMOV*PC TMP,DST 601 // 602 // TODO(rasky): we could generate: 603 // CMOV*NE DST,SRC 604 // CMOV*PC SRC,DST 605 // But this requires a way for regalloc to know that SRC might be 606 // clobbered by this instruction. 607 t := v.RegTmp() 608 opregreg(s, moveByType(v.Type), t, v.Args[1].Reg()) 609 610 p := s.Prog(v.Op.Asm()) 611 p.From.Type = obj.TYPE_REG 612 p.From.Reg = v.Reg() 613 p.To.Type = obj.TYPE_REG 614 p.To.Reg = t 615 var q *obj.Prog 616 if v.Op == ssa.OpAMD64CMOVQEQF { 617 q = s.Prog(x86.ACMOVQPC) 618 } else if v.Op == ssa.OpAMD64CMOVLEQF { 619 q = s.Prog(x86.ACMOVLPC) 620 } else { 621 q = s.Prog(x86.ACMOVWPC) 622 } 623 q.From.Type = obj.TYPE_REG 624 q.From.Reg = t 625 q.To.Type = obj.TYPE_REG 626 q.To.Reg = v.Reg() 627 628 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 629 r := v.Reg() 630 p := s.Prog(v.Op.Asm()) 631 p.From.Type = obj.TYPE_CONST 632 p.From.Offset = v.AuxInt 633 p.To.Type = obj.TYPE_REG 634 p.To.Reg = r 635 p.AddRestSourceReg(v.Args[0].Reg()) 636 637 case ssa.OpAMD64ANDQconst: 638 asm := v.Op.Asm() 639 // If the constant is positive and fits into 32 bits, use ANDL. 640 // This saves a few bytes of encoding. 641 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 642 asm = x86.AANDL 643 } 644 p := s.Prog(asm) 645 p.From.Type = obj.TYPE_CONST 646 p.From.Offset = v.AuxInt 647 p.To.Type = obj.TYPE_REG 648 p.To.Reg = v.Reg() 649 650 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 651 ssa.OpAMD64ANDLconst, 652 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 653 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 654 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 655 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 656 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 657 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 658 p := s.Prog(v.Op.Asm()) 659 p.From.Type = obj.TYPE_CONST 660 p.From.Offset = v.AuxInt 661 p.To.Type = obj.TYPE_REG 662 p.To.Reg = v.Reg() 663 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 664 r := v.Reg() 665 p := s.Prog(v.Op.Asm()) 666 p.From.Type = obj.TYPE_REG 667 p.From.Reg = r 668 p.To.Type = obj.TYPE_REG 669 p.To.Reg = r 670 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8, 671 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8, 672 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 673 p := s.Prog(v.Op.Asm()) 674 memIdx(&p.From, v) 675 o := v.Reg() 676 p.To.Type = obj.TYPE_REG 677 p.To.Reg = o 678 if v.AuxInt != 0 && v.Aux == nil { 679 // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA. 680 switch v.Op { 681 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 682 p = s.Prog(x86.ALEAQ) 683 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8: 684 p = s.Prog(x86.ALEAL) 685 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 686 p = s.Prog(x86.ALEAW) 687 } 688 p.From.Type = obj.TYPE_MEM 689 p.From.Reg = o 690 p.To.Type = obj.TYPE_REG 691 p.To.Reg = o 692 } 693 ssagen.AddAux(&p.From, v) 694 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW: 695 p := s.Prog(v.Op.Asm()) 696 p.From.Type = obj.TYPE_MEM 697 p.From.Reg = v.Args[0].Reg() 698 ssagen.AddAux(&p.From, v) 699 p.To.Type = obj.TYPE_REG 700 p.To.Reg = v.Reg() 701 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 702 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 703 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 704 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 705 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 706 // Go assembler has swapped operands for UCOMISx relative to CMP, 707 // must account for that right here. 708 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 709 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 710 p := s.Prog(v.Op.Asm()) 711 p.From.Type = obj.TYPE_REG 712 p.From.Reg = v.Args[0].Reg() 713 p.To.Type = obj.TYPE_CONST 714 p.To.Offset = v.AuxInt 715 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, 716 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 717 ssa.OpAMD64BTSQconst, 718 ssa.OpAMD64BTCQconst, 719 ssa.OpAMD64BTRQconst: 720 op := v.Op 721 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { 722 // Emit 32-bit version because it's shorter 723 op = ssa.OpAMD64BTLconst 724 } 725 p := s.Prog(op.Asm()) 726 p.From.Type = obj.TYPE_CONST 727 p.From.Offset = v.AuxInt 728 p.To.Type = obj.TYPE_REG 729 p.To.Reg = v.Args[0].Reg() 730 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload: 731 p := s.Prog(v.Op.Asm()) 732 p.From.Type = obj.TYPE_MEM 733 p.From.Reg = v.Args[0].Reg() 734 ssagen.AddAux(&p.From, v) 735 p.To.Type = obj.TYPE_REG 736 p.To.Reg = v.Args[1].Reg() 737 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload: 738 sc := v.AuxValAndOff() 739 p := s.Prog(v.Op.Asm()) 740 p.From.Type = obj.TYPE_MEM 741 p.From.Reg = v.Args[0].Reg() 742 ssagen.AddAux2(&p.From, v, sc.Off64()) 743 p.To.Type = obj.TYPE_CONST 744 p.To.Offset = sc.Val64() 745 case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1: 746 p := s.Prog(v.Op.Asm()) 747 memIdx(&p.From, v) 748 ssagen.AddAux(&p.From, v) 749 p.To.Type = obj.TYPE_REG 750 p.To.Reg = v.Args[2].Reg() 751 case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1: 752 sc := v.AuxValAndOff() 753 p := s.Prog(v.Op.Asm()) 754 memIdx(&p.From, v) 755 ssagen.AddAux2(&p.From, v, sc.Off64()) 756 p.To.Type = obj.TYPE_CONST 757 p.To.Offset = sc.Val64() 758 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 759 x := v.Reg() 760 761 // If flags aren't live (indicated by v.Aux == nil), 762 // then we can rewrite MOV $0, AX into XOR AX, AX. 763 if v.AuxInt == 0 && v.Aux == nil { 764 opregreg(s, x86.AXORL, x, x) 765 break 766 } 767 768 asm := v.Op.Asm() 769 // Use MOVL to move a small constant into a register 770 // when the constant is positive and fits into 32 bits. 771 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 772 // The upper 32bit are zeroed automatically when using MOVL. 773 asm = x86.AMOVL 774 } 775 p := s.Prog(asm) 776 p.From.Type = obj.TYPE_CONST 777 p.From.Offset = v.AuxInt 778 p.To.Type = obj.TYPE_REG 779 p.To.Reg = x 780 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 781 x := v.Reg() 782 p := s.Prog(v.Op.Asm()) 783 p.From.Type = obj.TYPE_FCONST 784 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 785 p.To.Type = obj.TYPE_REG 786 p.To.Reg = x 787 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload, 788 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, 789 ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload: 790 p := s.Prog(v.Op.Asm()) 791 p.From.Type = obj.TYPE_MEM 792 p.From.Reg = v.Args[0].Reg() 793 ssagen.AddAux(&p.From, v) 794 p.To.Type = obj.TYPE_REG 795 p.To.Reg = v.Reg() 796 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1, 797 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2, 798 ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8: 799 p := s.Prog(v.Op.Asm()) 800 memIdx(&p.From, v) 801 ssagen.AddAux(&p.From, v) 802 p.To.Type = obj.TYPE_REG 803 p.To.Reg = v.Reg() 804 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, 805 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, 806 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify, 807 ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore: 808 p := s.Prog(v.Op.Asm()) 809 p.From.Type = obj.TYPE_REG 810 p.From.Reg = v.Args[1].Reg() 811 p.To.Type = obj.TYPE_MEM 812 p.To.Reg = v.Args[0].Reg() 813 ssagen.AddAux(&p.To, v) 814 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1, 815 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2, 816 ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8, 817 ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8, 818 ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8, 819 ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8, 820 ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8, 821 ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8: 822 p := s.Prog(v.Op.Asm()) 823 p.From.Type = obj.TYPE_REG 824 p.From.Reg = v.Args[2].Reg() 825 memIdx(&p.To, v) 826 ssagen.AddAux(&p.To, v) 827 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify: 828 sc := v.AuxValAndOff() 829 off := sc.Off64() 830 val := sc.Val() 831 if val == 1 || val == -1 { 832 var asm obj.As 833 if v.Op == ssa.OpAMD64ADDQconstmodify { 834 if val == 1 { 835 asm = x86.AINCQ 836 } else { 837 asm = x86.ADECQ 838 } 839 } else { 840 if val == 1 { 841 asm = x86.AINCL 842 } else { 843 asm = x86.ADECL 844 } 845 } 846 p := s.Prog(asm) 847 p.To.Type = obj.TYPE_MEM 848 p.To.Reg = v.Args[0].Reg() 849 ssagen.AddAux2(&p.To, v, off) 850 break 851 } 852 fallthrough 853 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, 854 ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify, 855 ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify: 856 sc := v.AuxValAndOff() 857 off := sc.Off64() 858 val := sc.Val64() 859 p := s.Prog(v.Op.Asm()) 860 p.From.Type = obj.TYPE_CONST 861 p.From.Offset = val 862 p.To.Type = obj.TYPE_MEM 863 p.To.Reg = v.Args[0].Reg() 864 ssagen.AddAux2(&p.To, v, off) 865 866 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 867 p := s.Prog(v.Op.Asm()) 868 p.From.Type = obj.TYPE_CONST 869 sc := v.AuxValAndOff() 870 p.From.Offset = sc.Val64() 871 p.To.Type = obj.TYPE_MEM 872 p.To.Reg = v.Args[0].Reg() 873 ssagen.AddAux2(&p.To, v, sc.Off64()) 874 case ssa.OpAMD64MOVOstoreconst: 875 sc := v.AuxValAndOff() 876 if sc.Val() != 0 { 877 v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString()) 878 } 879 880 if s.ABI != obj.ABIInternal { 881 // zero X15 manually 882 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 883 } 884 p := s.Prog(v.Op.Asm()) 885 p.From.Type = obj.TYPE_REG 886 p.From.Reg = x86.REG_X15 887 p.To.Type = obj.TYPE_MEM 888 p.To.Reg = v.Args[0].Reg() 889 ssagen.AddAux2(&p.To, v, sc.Off64()) 890 891 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1, 892 ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8, 893 ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8, 894 ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8, 895 ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8: 896 p := s.Prog(v.Op.Asm()) 897 p.From.Type = obj.TYPE_CONST 898 sc := v.AuxValAndOff() 899 p.From.Offset = sc.Val64() 900 switch { 901 case p.As == x86.AADDQ && p.From.Offset == 1: 902 p.As = x86.AINCQ 903 p.From.Type = obj.TYPE_NONE 904 case p.As == x86.AADDQ && p.From.Offset == -1: 905 p.As = x86.ADECQ 906 p.From.Type = obj.TYPE_NONE 907 case p.As == x86.AADDL && p.From.Offset == 1: 908 p.As = x86.AINCL 909 p.From.Type = obj.TYPE_NONE 910 case p.As == x86.AADDL && p.From.Offset == -1: 911 p.As = x86.ADECL 912 p.From.Type = obj.TYPE_NONE 913 } 914 memIdx(&p.To, v) 915 ssagen.AddAux2(&p.To, v, sc.Off64()) 916 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 917 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 918 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 919 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 920 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 921 r := v.Reg() 922 // Break false dependency on destination register. 923 opregreg(s, x86.AXORPS, r, r) 924 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 925 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 926 var p *obj.Prog 927 switch v.Op { 928 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: 929 p = s.Prog(x86.AMOVQ) 930 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 931 p = s.Prog(x86.AMOVL) 932 } 933 p.From.Type = obj.TYPE_REG 934 p.From.Reg = v.Args[0].Reg() 935 p.To.Type = obj.TYPE_REG 936 p.To.Reg = v.Reg() 937 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload, 938 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload, 939 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload, 940 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload, 941 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload: 942 p := s.Prog(v.Op.Asm()) 943 p.From.Type = obj.TYPE_MEM 944 p.From.Reg = v.Args[1].Reg() 945 ssagen.AddAux(&p.From, v) 946 p.To.Type = obj.TYPE_REG 947 p.To.Reg = v.Reg() 948 case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8, 949 ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8, 950 ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8, 951 ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8, 952 ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8, 953 ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8, 954 ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8, 955 ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8, 956 ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8: 957 p := s.Prog(v.Op.Asm()) 958 959 r, i := v.Args[1].Reg(), v.Args[2].Reg() 960 p.From.Type = obj.TYPE_MEM 961 p.From.Scale = v.Op.Scale() 962 if p.From.Scale == 1 && i == x86.REG_SP { 963 r, i = i, r 964 } 965 p.From.Reg = r 966 p.From.Index = i 967 968 ssagen.AddAux(&p.From, v) 969 p.To.Type = obj.TYPE_REG 970 p.To.Reg = v.Reg() 971 case ssa.OpAMD64DUFFZERO: 972 if s.ABI != obj.ABIInternal { 973 // zero X15 manually 974 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 975 } 976 off := duffStart(v.AuxInt) 977 adj := duffAdj(v.AuxInt) 978 var p *obj.Prog 979 if adj != 0 { 980 p = s.Prog(x86.ALEAQ) 981 p.From.Type = obj.TYPE_MEM 982 p.From.Offset = adj 983 p.From.Reg = x86.REG_DI 984 p.To.Type = obj.TYPE_REG 985 p.To.Reg = x86.REG_DI 986 } 987 p = s.Prog(obj.ADUFFZERO) 988 p.To.Type = obj.TYPE_ADDR 989 p.To.Sym = ir.Syms.Duffzero 990 p.To.Offset = off 991 case ssa.OpAMD64DUFFCOPY: 992 p := s.Prog(obj.ADUFFCOPY) 993 p.To.Type = obj.TYPE_ADDR 994 p.To.Sym = ir.Syms.Duffcopy 995 if v.AuxInt%16 != 0 { 996 v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt) 997 } 998 p.To.Offset = 14 * (64 - v.AuxInt/16) 999 // 14 and 64 are magic constants. 14 is the number of bytes to encode: 1000 // MOVUPS (SI), X0 1001 // ADDQ $16, SI 1002 // MOVUPS X0, (DI) 1003 // ADDQ $16, DI 1004 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy. 1005 1006 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 1007 if v.Type.IsMemory() { 1008 return 1009 } 1010 x := v.Args[0].Reg() 1011 y := v.Reg() 1012 if x != y { 1013 opregreg(s, moveByType(v.Type), y, x) 1014 } 1015 case ssa.OpLoadReg: 1016 if v.Type.IsFlags() { 1017 v.Fatalf("load flags not implemented: %v", v.LongString()) 1018 return 1019 } 1020 p := s.Prog(loadByType(v.Type)) 1021 ssagen.AddrAuto(&p.From, v.Args[0]) 1022 p.To.Type = obj.TYPE_REG 1023 p.To.Reg = v.Reg() 1024 1025 case ssa.OpStoreReg: 1026 if v.Type.IsFlags() { 1027 v.Fatalf("store flags not implemented: %v", v.LongString()) 1028 return 1029 } 1030 p := s.Prog(storeByType(v.Type)) 1031 p.From.Type = obj.TYPE_REG 1032 p.From.Reg = v.Args[0].Reg() 1033 ssagen.AddrAuto(&p.To, v) 1034 case ssa.OpAMD64LoweredHasCPUFeature: 1035 p := s.Prog(x86.AMOVBLZX) 1036 p.From.Type = obj.TYPE_MEM 1037 ssagen.AddAux(&p.From, v) 1038 p.To.Type = obj.TYPE_REG 1039 p.To.Reg = v.Reg() 1040 case ssa.OpArgIntReg, ssa.OpArgFloatReg: 1041 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill 1042 // The loop only runs once. 1043 for _, ap := range v.Block.Func.RegArgs { 1044 // Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack. 1045 addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize) 1046 s.FuncInfo().AddSpill( 1047 obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)}) 1048 } 1049 v.Block.Func.RegArgs = nil 1050 ssagen.CheckArgReg(v) 1051 case ssa.OpAMD64LoweredGetClosurePtr: 1052 // Closure pointer is DX. 1053 ssagen.CheckLoweredGetClosurePtr(v) 1054 case ssa.OpAMD64LoweredGetG: 1055 if s.ABI == obj.ABIInternal { 1056 v.Fatalf("LoweredGetG should not appear in ABIInternal") 1057 } 1058 r := v.Reg() 1059 getgFromTLS(s, r) 1060 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail: 1061 if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal { 1062 // zeroing X15 when entering ABIInternal from ABI0 1063 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9 1064 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 1065 } 1066 // set G register from TLS 1067 getgFromTLS(s, x86.REG_R14) 1068 } 1069 if v.Op == ssa.OpAMD64CALLtail { 1070 s.TailCall(v) 1071 break 1072 } 1073 s.Call(v) 1074 if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 { 1075 // zeroing X15 when entering ABIInternal from ABI0 1076 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9 1077 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 1078 } 1079 // set G register from TLS 1080 getgFromTLS(s, x86.REG_R14) 1081 } 1082 case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 1083 s.Call(v) 1084 1085 case ssa.OpAMD64LoweredGetCallerPC: 1086 p := s.Prog(x86.AMOVQ) 1087 p.From.Type = obj.TYPE_MEM 1088 p.From.Offset = -8 // PC is stored 8 bytes below first parameter. 1089 p.From.Name = obj.NAME_PARAM 1090 p.To.Type = obj.TYPE_REG 1091 p.To.Reg = v.Reg() 1092 1093 case ssa.OpAMD64LoweredGetCallerSP: 1094 // caller's SP is the address of the first arg 1095 mov := x86.AMOVQ 1096 if types.PtrSize == 4 { 1097 mov = x86.AMOVL 1098 } 1099 p := s.Prog(mov) 1100 p.From.Type = obj.TYPE_ADDR 1101 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures 1102 p.From.Name = obj.NAME_PARAM 1103 p.To.Type = obj.TYPE_REG 1104 p.To.Reg = v.Reg() 1105 1106 case ssa.OpAMD64LoweredWB: 1107 p := s.Prog(obj.ACALL) 1108 p.To.Type = obj.TYPE_MEM 1109 p.To.Name = obj.NAME_EXTERN 1110 // AuxInt encodes how many buffer entries we need. 1111 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1] 1112 1113 case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC: 1114 p := s.Prog(obj.ACALL) 1115 p.To.Type = obj.TYPE_MEM 1116 p.To.Name = obj.NAME_EXTERN 1117 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] 1118 s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs 1119 1120 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 1121 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 1122 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 1123 p := s.Prog(v.Op.Asm()) 1124 p.To.Type = obj.TYPE_REG 1125 p.To.Reg = v.Reg() 1126 1127 case ssa.OpAMD64NEGLflags: 1128 p := s.Prog(v.Op.Asm()) 1129 p.To.Type = obj.TYPE_REG 1130 p.To.Reg = v.Reg0() 1131 1132 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS: 1133 p := s.Prog(v.Op.Asm()) 1134 p.From.Type = obj.TYPE_REG 1135 p.From.Reg = v.Args[0].Reg() 1136 p.To.Type = obj.TYPE_REG 1137 switch v.Op { 1138 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ: 1139 p.To.Reg = v.Reg0() 1140 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS: 1141 p.To.Reg = v.Reg() 1142 } 1143 case ssa.OpAMD64ROUNDSD: 1144 p := s.Prog(v.Op.Asm()) 1145 val := v.AuxInt 1146 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc 1147 if val < 0 || val > 3 { 1148 v.Fatalf("Invalid rounding mode") 1149 } 1150 p.From.Offset = val 1151 p.From.Type = obj.TYPE_CONST 1152 p.AddRestSourceReg(v.Args[0].Reg()) 1153 p.To.Type = obj.TYPE_REG 1154 p.To.Reg = v.Reg() 1155 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL, 1156 ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL, 1157 ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL: 1158 if v.Args[0].Reg() != v.Reg() { 1159 // POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus. 1160 // TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7. 1161 // Xor register with itself to break the dependency. 1162 opregreg(s, x86.AXORL, v.Reg(), v.Reg()) 1163 } 1164 p := s.Prog(v.Op.Asm()) 1165 p.From.Type = obj.TYPE_REG 1166 p.From.Reg = v.Args[0].Reg() 1167 p.To.Type = obj.TYPE_REG 1168 p.To.Reg = v.Reg() 1169 1170 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 1171 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 1172 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 1173 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 1174 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 1175 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 1176 ssa.OpAMD64SETA, ssa.OpAMD64SETAE, 1177 ssa.OpAMD64SETO: 1178 p := s.Prog(v.Op.Asm()) 1179 p.To.Type = obj.TYPE_REG 1180 p.To.Reg = v.Reg() 1181 1182 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore, 1183 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore, 1184 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore, 1185 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore, 1186 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore: 1187 p := s.Prog(v.Op.Asm()) 1188 p.To.Type = obj.TYPE_MEM 1189 p.To.Reg = v.Args[0].Reg() 1190 ssagen.AddAux(&p.To, v) 1191 1192 case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1, 1193 ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1, 1194 ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1, 1195 ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1, 1196 ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1: 1197 p := s.Prog(v.Op.Asm()) 1198 memIdx(&p.To, v) 1199 ssagen.AddAux(&p.To, v) 1200 1201 case ssa.OpAMD64SETNEF: 1202 t := v.RegTmp() 1203 p := s.Prog(v.Op.Asm()) 1204 p.To.Type = obj.TYPE_REG 1205 p.To.Reg = v.Reg() 1206 q := s.Prog(x86.ASETPS) 1207 q.To.Type = obj.TYPE_REG 1208 q.To.Reg = t 1209 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 1210 opregreg(s, x86.AORL, v.Reg(), t) 1211 1212 case ssa.OpAMD64SETEQF: 1213 t := v.RegTmp() 1214 p := s.Prog(v.Op.Asm()) 1215 p.To.Type = obj.TYPE_REG 1216 p.To.Reg = v.Reg() 1217 q := s.Prog(x86.ASETPC) 1218 q.To.Type = obj.TYPE_REG 1219 q.To.Reg = t 1220 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 1221 opregreg(s, x86.AANDL, v.Reg(), t) 1222 1223 case ssa.OpAMD64InvertFlags: 1224 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1225 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 1226 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1227 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 1228 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 1229 case ssa.OpAMD64REPSTOSQ: 1230 s.Prog(x86.AREP) 1231 s.Prog(x86.ASTOSQ) 1232 case ssa.OpAMD64REPMOVSQ: 1233 s.Prog(x86.AREP) 1234 s.Prog(x86.AMOVSQ) 1235 case ssa.OpAMD64LoweredNilCheck: 1236 // Issue a load which will fault if the input is nil. 1237 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 1238 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 1239 // but it doesn't have false dependency on AX. 1240 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 1241 // That trades clobbering flags for clobbering a register. 1242 p := s.Prog(x86.ATESTB) 1243 p.From.Type = obj.TYPE_REG 1244 p.From.Reg = x86.REG_AX 1245 p.To.Type = obj.TYPE_MEM 1246 p.To.Reg = v.Args[0].Reg() 1247 if logopt.Enabled() { 1248 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 1249 } 1250 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1251 base.WarnfAt(v.Pos, "generated nil check") 1252 } 1253 case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 1254 p := s.Prog(v.Op.Asm()) 1255 p.From.Type = obj.TYPE_MEM 1256 p.From.Reg = v.Args[0].Reg() 1257 ssagen.AddAux(&p.From, v) 1258 p.To.Type = obj.TYPE_REG 1259 p.To.Reg = v.Reg0() 1260 case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 1261 p := s.Prog(v.Op.Asm()) 1262 p.From.Type = obj.TYPE_REG 1263 p.From.Reg = v.Reg0() 1264 p.To.Type = obj.TYPE_MEM 1265 p.To.Reg = v.Args[1].Reg() 1266 ssagen.AddAux(&p.To, v) 1267 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 1268 s.Prog(x86.ALOCK) 1269 p := s.Prog(v.Op.Asm()) 1270 p.From.Type = obj.TYPE_REG 1271 p.From.Reg = v.Reg0() 1272 p.To.Type = obj.TYPE_MEM 1273 p.To.Reg = v.Args[1].Reg() 1274 ssagen.AddAux(&p.To, v) 1275 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 1276 if v.Args[1].Reg() != x86.REG_AX { 1277 v.Fatalf("input[1] not in AX %s", v.LongString()) 1278 } 1279 s.Prog(x86.ALOCK) 1280 p := s.Prog(v.Op.Asm()) 1281 p.From.Type = obj.TYPE_REG 1282 p.From.Reg = v.Args[2].Reg() 1283 p.To.Type = obj.TYPE_MEM 1284 p.To.Reg = v.Args[0].Reg() 1285 ssagen.AddAux(&p.To, v) 1286 p = s.Prog(x86.ASETEQ) 1287 p.To.Type = obj.TYPE_REG 1288 p.To.Reg = v.Reg0() 1289 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock: 1290 s.Prog(x86.ALOCK) 1291 p := s.Prog(v.Op.Asm()) 1292 p.From.Type = obj.TYPE_REG 1293 p.From.Reg = v.Args[1].Reg() 1294 p.To.Type = obj.TYPE_MEM 1295 p.To.Reg = v.Args[0].Reg() 1296 ssagen.AddAux(&p.To, v) 1297 case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA: 1298 p := s.Prog(v.Op.Asm()) 1299 p.From.Type = obj.TYPE_MEM 1300 p.From.Reg = v.Args[0].Reg() 1301 case ssa.OpClobber: 1302 p := s.Prog(x86.AMOVL) 1303 p.From.Type = obj.TYPE_CONST 1304 p.From.Offset = 0xdeaddead 1305 p.To.Type = obj.TYPE_MEM 1306 p.To.Reg = x86.REG_SP 1307 ssagen.AddAux(&p.To, v) 1308 p = s.Prog(x86.AMOVL) 1309 p.From.Type = obj.TYPE_CONST 1310 p.From.Offset = 0xdeaddead 1311 p.To.Type = obj.TYPE_MEM 1312 p.To.Reg = x86.REG_SP 1313 ssagen.AddAux(&p.To, v) 1314 p.To.Offset += 4 1315 case ssa.OpClobberReg: 1316 x := uint64(0xdeaddeaddeaddead) 1317 p := s.Prog(x86.AMOVQ) 1318 p.From.Type = obj.TYPE_CONST 1319 p.From.Offset = int64(x) 1320 p.To.Type = obj.TYPE_REG 1321 p.To.Reg = v.Reg() 1322 default: 1323 v.Fatalf("genValue not implemented: %s", v.LongString()) 1324 } 1325 } 1326 1327 var blockJump = [...]struct { 1328 asm, invasm obj.As 1329 }{ 1330 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1331 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1332 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1333 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1334 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1335 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1336 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC}, 1337 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS}, 1338 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1339 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1340 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1341 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1342 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1343 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1344 } 1345 1346 var eqfJumps = [2][2]ssagen.IndexJump{ 1347 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1348 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1349 } 1350 var nefJumps = [2][2]ssagen.IndexJump{ 1351 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1352 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1353 } 1354 1355 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { 1356 switch b.Kind { 1357 case ssa.BlockPlain: 1358 if b.Succs[0].Block() != next { 1359 p := s.Prog(obj.AJMP) 1360 p.To.Type = obj.TYPE_BRANCH 1361 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 1362 } 1363 case ssa.BlockDefer: 1364 // defer returns in rax: 1365 // 0 if we should continue executing 1366 // 1 if we should jump to deferreturn call 1367 p := s.Prog(x86.ATESTL) 1368 p.From.Type = obj.TYPE_REG 1369 p.From.Reg = x86.REG_AX 1370 p.To.Type = obj.TYPE_REG 1371 p.To.Reg = x86.REG_AX 1372 p = s.Prog(x86.AJNE) 1373 p.To.Type = obj.TYPE_BRANCH 1374 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) 1375 if b.Succs[0].Block() != next { 1376 p := s.Prog(obj.AJMP) 1377 p.To.Type = obj.TYPE_BRANCH 1378 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 1379 } 1380 case ssa.BlockExit, ssa.BlockRetJmp: 1381 case ssa.BlockRet: 1382 s.Prog(obj.ARET) 1383 1384 case ssa.BlockAMD64EQF: 1385 s.CombJump(b, next, &eqfJumps) 1386 1387 case ssa.BlockAMD64NEF: 1388 s.CombJump(b, next, &nefJumps) 1389 1390 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1391 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1392 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1393 ssa.BlockAMD64OS, ssa.BlockAMD64OC, 1394 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1395 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1396 jmp := blockJump[b.Kind] 1397 switch next { 1398 case b.Succs[0].Block(): 1399 s.Br(jmp.invasm, b.Succs[1].Block()) 1400 case b.Succs[1].Block(): 1401 s.Br(jmp.asm, b.Succs[0].Block()) 1402 default: 1403 if b.Likely != ssa.BranchUnlikely { 1404 s.Br(jmp.asm, b.Succs[0].Block()) 1405 s.Br(obj.AJMP, b.Succs[1].Block()) 1406 } else { 1407 s.Br(jmp.invasm, b.Succs[1].Block()) 1408 s.Br(obj.AJMP, b.Succs[0].Block()) 1409 } 1410 } 1411 1412 case ssa.BlockAMD64JUMPTABLE: 1413 // JMP *(TABLE)(INDEX*8) 1414 p := s.Prog(obj.AJMP) 1415 p.To.Type = obj.TYPE_MEM 1416 p.To.Reg = b.Controls[1].Reg() 1417 p.To.Index = b.Controls[0].Reg() 1418 p.To.Scale = 8 1419 // Save jump tables for later resolution of the target blocks. 1420 s.JumpTables = append(s.JumpTables, b) 1421 1422 default: 1423 b.Fatalf("branch not implemented: %s", b.LongString()) 1424 } 1425 } 1426 1427 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 1428 p := s.Prog(loadByType(t)) 1429 p.From.Type = obj.TYPE_MEM 1430 p.From.Name = obj.NAME_AUTO 1431 p.From.Sym = n.Linksym() 1432 p.From.Offset = n.FrameOffset() + off 1433 p.To.Type = obj.TYPE_REG 1434 p.To.Reg = reg 1435 return p 1436 } 1437 1438 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 1439 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) 1440 p.To.Name = obj.NAME_PARAM 1441 p.To.Sym = n.Linksym() 1442 p.Pos = p.Pos.WithNotStmt() 1443 return p 1444 }