github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "github.com/bir3/gocompiler/src/internal/buildcfg" 10 "math" 11 12 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 13 "github.com/bir3/gocompiler/src/cmd/compile/internal/ir" 14 "github.com/bir3/gocompiler/src/cmd/compile/internal/logopt" 15 "github.com/bir3/gocompiler/src/cmd/compile/internal/objw" 16 "github.com/bir3/gocompiler/src/cmd/compile/internal/ssa" 17 "github.com/bir3/gocompiler/src/cmd/compile/internal/ssagen" 18 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 19 "github.com/bir3/gocompiler/src/cmd/internal/obj" 20 "github.com/bir3/gocompiler/src/cmd/internal/obj/x86" 21 ) 22 23 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. 24 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) { 25 flive := b.FlagsLiveAtEnd 26 for _, c := range b.ControlValues() { 27 flive = c.Type.IsFlags() || flive 28 } 29 for i := len(b.Values) - 1; i >= 0; i-- { 30 v := b.Values[i] 31 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 32 // The "mark" is any non-nil Aux value. 33 v.Aux = v 34 } 35 if v.Type.IsFlags() { 36 flive = false 37 } 38 for _, a := range v.Args { 39 if a.Type.IsFlags() { 40 flive = true 41 } 42 } 43 } 44 } 45 46 // loadByType returns the load instruction of the given type. 47 func loadByType(t *types.Type) obj.As { 48 // Avoid partial register write 49 if !t.IsFloat() { 50 switch t.Size() { 51 case 1: 52 return x86.AMOVBLZX 53 case 2: 54 return x86.AMOVWLZX 55 } 56 } 57 // Otherwise, there's no difference between load and store opcodes. 58 return storeByType(t) 59 } 60 61 // storeByType returns the store instruction of the given type. 62 func storeByType(t *types.Type) obj.As { 63 width := t.Size() 64 if t.IsFloat() { 65 switch width { 66 case 4: 67 return x86.AMOVSS 68 case 8: 69 return x86.AMOVSD 70 } 71 } else { 72 switch width { 73 case 1: 74 return x86.AMOVB 75 case 2: 76 return x86.AMOVW 77 case 4: 78 return x86.AMOVL 79 case 8: 80 return x86.AMOVQ 81 case 16: 82 return x86.AMOVUPS 83 } 84 } 85 panic(fmt.Sprintf("bad store type %v", t)) 86 } 87 88 // moveByType returns the reg->reg move instruction of the given type. 89 func moveByType(t *types.Type) obj.As { 90 if t.IsFloat() { 91 // Moving the whole sse2 register is faster 92 // than moving just the correct low portion of it. 93 // There is no xmm->xmm move with 1 byte opcode, 94 // so use movups, which has 2 byte opcode. 95 return x86.AMOVUPS 96 } else { 97 switch t.Size() { 98 case 1: 99 // Avoids partial register write 100 return x86.AMOVL 101 case 2: 102 return x86.AMOVL 103 case 4: 104 return x86.AMOVL 105 case 8: 106 return x86.AMOVQ 107 case 16: 108 return x86.AMOVUPS // int128s are in SSE registers 109 default: 110 panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t)) 111 } 112 } 113 } 114 115 // opregreg emits instructions for 116 // 117 // dest := dest(To) op src(From) 118 // 119 // and also returns the created obj.Prog so it 120 // may be further adjusted (offset, scale, etc). 121 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog { 122 p := s.Prog(op) 123 p.From.Type = obj.TYPE_REG 124 p.To.Type = obj.TYPE_REG 125 p.To.Reg = dest 126 p.From.Reg = src 127 return p 128 } 129 130 // memIdx fills out a as an indexed memory reference for v. 131 // It assumes that the base register and the index register 132 // are v.Args[0].Reg() and v.Args[1].Reg(), respectively. 133 // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary. 134 func memIdx(a *obj.Addr, v *ssa.Value) { 135 r, i := v.Args[0].Reg(), v.Args[1].Reg() 136 a.Type = obj.TYPE_MEM 137 a.Scale = v.Op.Scale() 138 if a.Scale == 1 && i == x86.REG_SP { 139 r, i = i, r 140 } 141 a.Reg = r 142 a.Index = i 143 } 144 145 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 146 // See runtime/mkduff.go. 147 func duffStart(size int64) int64 { 148 x, _ := duff(size) 149 return x 150 } 151 func duffAdj(size int64) int64 { 152 _, x := duff(size) 153 return x 154 } 155 156 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 157 // required to use the duffzero mechanism for a block of the given size. 158 func duff(size int64) (int64, int64) { 159 if size < 32 || size > 1024 || size%dzClearStep != 0 { 160 panic("bad duffzero size") 161 } 162 steps := size / dzClearStep 163 blocks := steps / dzBlockLen 164 steps %= dzBlockLen 165 off := dzBlockSize * (dzBlocks - blocks) 166 var adj int64 167 if steps != 0 { 168 off -= dzLeaqSize 169 off -= dzMovSize * steps 170 adj -= dzClearStep * (dzBlockLen - steps) 171 } 172 return off, adj 173 } 174 175 func getgFromTLS(s *ssagen.State, r int16) { 176 // See the comments in cmd/internal/obj/x86/obj6.go 177 // near CanUse1InsnTLS for a detailed explanation of these instructions. 178 if x86.CanUse1InsnTLS(base.Ctxt) { 179 // MOVQ (TLS), r 180 p := s.Prog(x86.AMOVQ) 181 p.From.Type = obj.TYPE_MEM 182 p.From.Reg = x86.REG_TLS 183 p.To.Type = obj.TYPE_REG 184 p.To.Reg = r 185 } else { 186 // MOVQ TLS, r 187 // MOVQ (r)(TLS*1), r 188 p := s.Prog(x86.AMOVQ) 189 p.From.Type = obj.TYPE_REG 190 p.From.Reg = x86.REG_TLS 191 p.To.Type = obj.TYPE_REG 192 p.To.Reg = r 193 q := s.Prog(x86.AMOVQ) 194 q.From.Type = obj.TYPE_MEM 195 q.From.Reg = r 196 q.From.Index = x86.REG_TLS 197 q.From.Scale = 1 198 q.To.Type = obj.TYPE_REG 199 q.To.Reg = r 200 } 201 } 202 203 func ssaGenValue(s *ssagen.State, v *ssa.Value) { 204 switch v.Op { 205 case ssa.OpAMD64VFMADD231SD: 206 p := s.Prog(v.Op.Asm()) 207 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()} 208 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 209 p.SetFrom3Reg(v.Args[1].Reg()) 210 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 211 r := v.Reg() 212 r1 := v.Args[0].Reg() 213 r2 := v.Args[1].Reg() 214 switch { 215 case r == r1: 216 p := s.Prog(v.Op.Asm()) 217 p.From.Type = obj.TYPE_REG 218 p.From.Reg = r2 219 p.To.Type = obj.TYPE_REG 220 p.To.Reg = r 221 case r == r2: 222 p := s.Prog(v.Op.Asm()) 223 p.From.Type = obj.TYPE_REG 224 p.From.Reg = r1 225 p.To.Type = obj.TYPE_REG 226 p.To.Reg = r 227 default: 228 var asm obj.As 229 if v.Op == ssa.OpAMD64ADDQ { 230 asm = x86.ALEAQ 231 } else { 232 asm = x86.ALEAL 233 } 234 p := s.Prog(asm) 235 p.From.Type = obj.TYPE_MEM 236 p.From.Reg = r1 237 p.From.Scale = 1 238 p.From.Index = r2 239 p.To.Type = obj.TYPE_REG 240 p.To.Reg = r 241 } 242 // 2-address opcode arithmetic 243 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 244 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 245 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 246 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 247 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 248 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 249 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 250 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 251 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, 252 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, 253 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 254 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 255 ssa.OpAMD64PXOR, 256 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ, 257 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ, 258 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ: 259 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 260 261 case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ: 262 p := s.Prog(v.Op.Asm()) 263 lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg() 264 p.From.Type = obj.TYPE_REG 265 p.From.Reg = bits 266 p.To.Type = obj.TYPE_REG 267 p.To.Reg = lo 268 p.SetFrom3Reg(hi) 269 270 case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL, 271 ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL, 272 ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL: 273 p := s.Prog(v.Op.Asm()) 274 p.From.Type = obj.TYPE_REG 275 p.From.Reg = v.Args[0].Reg() 276 p.To.Type = obj.TYPE_REG 277 p.To.Reg = v.Reg() 278 279 case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL: 280 p := s.Prog(v.Op.Asm()) 281 p.From.Type = obj.TYPE_REG 282 p.From.Reg = v.Args[0].Reg() 283 p.To.Type = obj.TYPE_REG 284 p.To.Reg = v.Reg() 285 p.SetFrom3Reg(v.Args[1].Reg()) 286 287 case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ, 288 ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ, 289 ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ: 290 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 291 p.SetFrom3Reg(v.Args[0].Reg()) 292 293 case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload, 294 ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload, 295 ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload: 296 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 297 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 298 ssagen.AddAux(&m, v) 299 p.SetFrom3(m) 300 301 case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8, 302 ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8, 303 ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8, 304 ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8, 305 ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8, 306 ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8: 307 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg()) 308 m := obj.Addr{Type: obj.TYPE_MEM} 309 memIdx(&m, v) 310 ssagen.AddAux(&m, v) 311 p.SetFrom3(m) 312 313 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 314 // Arg[0] (the dividend) is in AX. 315 // Arg[1] (the divisor) can be in any other register. 316 // Result[0] (the quotient) is in AX. 317 // Result[1] (the remainder) is in DX. 318 r := v.Args[1].Reg() 319 320 // Zero extend dividend. 321 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX) 322 323 // Issue divide. 324 p := s.Prog(v.Op.Asm()) 325 p.From.Type = obj.TYPE_REG 326 p.From.Reg = r 327 328 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 329 // Arg[0] (the dividend) is in AX. 330 // Arg[1] (the divisor) can be in any other register. 331 // Result[0] (the quotient) is in AX. 332 // Result[1] (the remainder) is in DX. 333 r := v.Args[1].Reg() 334 var j1 *obj.Prog 335 336 // CPU faults upon signed overflow, which occurs when the most 337 // negative int is divided by -1. Handle divide by -1 as a special case. 338 if ssa.DivisionNeedsFixUp(v) { 339 var c *obj.Prog 340 switch v.Op { 341 case ssa.OpAMD64DIVQ: 342 c = s.Prog(x86.ACMPQ) 343 case ssa.OpAMD64DIVL: 344 c = s.Prog(x86.ACMPL) 345 case ssa.OpAMD64DIVW: 346 c = s.Prog(x86.ACMPW) 347 } 348 c.From.Type = obj.TYPE_REG 349 c.From.Reg = r 350 c.To.Type = obj.TYPE_CONST 351 c.To.Offset = -1 352 j1 = s.Prog(x86.AJEQ) 353 j1.To.Type = obj.TYPE_BRANCH 354 } 355 356 // Sign extend dividend. 357 switch v.Op { 358 case ssa.OpAMD64DIVQ: 359 s.Prog(x86.ACQO) 360 case ssa.OpAMD64DIVL: 361 s.Prog(x86.ACDQ) 362 case ssa.OpAMD64DIVW: 363 s.Prog(x86.ACWD) 364 } 365 366 // Issue divide. 367 p := s.Prog(v.Op.Asm()) 368 p.From.Type = obj.TYPE_REG 369 p.From.Reg = r 370 371 if j1 != nil { 372 // Skip over -1 fixup code. 373 j2 := s.Prog(obj.AJMP) 374 j2.To.Type = obj.TYPE_BRANCH 375 376 // Issue -1 fixup code. 377 // n / -1 = -n 378 var n1 *obj.Prog 379 switch v.Op { 380 case ssa.OpAMD64DIVQ: 381 n1 = s.Prog(x86.ANEGQ) 382 case ssa.OpAMD64DIVL: 383 n1 = s.Prog(x86.ANEGL) 384 case ssa.OpAMD64DIVW: 385 n1 = s.Prog(x86.ANEGW) 386 } 387 n1.To.Type = obj.TYPE_REG 388 n1.To.Reg = x86.REG_AX 389 390 // n % -1 == 0 391 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX) 392 393 // TODO(khr): issue only the -1 fixup code we need. 394 // For instance, if only the quotient is used, no point in zeroing the remainder. 395 396 j1.To.SetTarget(n1) 397 j2.To.SetTarget(s.Pc()) 398 } 399 400 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 401 // the frontend rewrites constant division by 8/16/32 bit integers into 402 // HMUL by a constant 403 // SSA rewrites generate the 64 bit versions 404 405 // Arg[0] is already in AX as it's the only register we allow 406 // and DX is the only output we care about (the high bits) 407 p := s.Prog(v.Op.Asm()) 408 p.From.Type = obj.TYPE_REG 409 p.From.Reg = v.Args[1].Reg() 410 411 // IMULB puts the high portion in AH instead of DL, 412 // so move it to DL for consistency 413 if v.Type.Size() == 1 { 414 m := s.Prog(x86.AMOVB) 415 m.From.Type = obj.TYPE_REG 416 m.From.Reg = x86.REG_AH 417 m.To.Type = obj.TYPE_REG 418 m.To.Reg = x86.REG_DX 419 } 420 421 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU: 422 // Arg[0] is already in AX as it's the only register we allow 423 // results lo in AX 424 p := s.Prog(v.Op.Asm()) 425 p.From.Type = obj.TYPE_REG 426 p.From.Reg = v.Args[1].Reg() 427 428 case ssa.OpAMD64MULQU2: 429 // Arg[0] is already in AX as it's the only register we allow 430 // results hi in DX, lo in AX 431 p := s.Prog(v.Op.Asm()) 432 p.From.Type = obj.TYPE_REG 433 p.From.Reg = v.Args[1].Reg() 434 435 case ssa.OpAMD64DIVQU2: 436 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 437 // results q in AX, r in DX 438 p := s.Prog(v.Op.Asm()) 439 p.From.Type = obj.TYPE_REG 440 p.From.Reg = v.Args[2].Reg() 441 442 case ssa.OpAMD64AVGQU: 443 // compute (x+y)/2 unsigned. 444 // Do a 64-bit add, the overflow goes into the carry. 445 // Shift right once and pull the carry back into the 63rd bit. 446 p := s.Prog(x86.AADDQ) 447 p.From.Type = obj.TYPE_REG 448 p.To.Type = obj.TYPE_REG 449 p.To.Reg = v.Reg() 450 p.From.Reg = v.Args[1].Reg() 451 p = s.Prog(x86.ARCRQ) 452 p.From.Type = obj.TYPE_CONST 453 p.From.Offset = 1 454 p.To.Type = obj.TYPE_REG 455 p.To.Reg = v.Reg() 456 457 case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ: 458 r := v.Reg0() 459 r0 := v.Args[0].Reg() 460 r1 := v.Args[1].Reg() 461 switch r { 462 case r0: 463 p := s.Prog(v.Op.Asm()) 464 p.From.Type = obj.TYPE_REG 465 p.From.Reg = r1 466 p.To.Type = obj.TYPE_REG 467 p.To.Reg = r 468 case r1: 469 p := s.Prog(v.Op.Asm()) 470 p.From.Type = obj.TYPE_REG 471 p.From.Reg = r0 472 p.To.Type = obj.TYPE_REG 473 p.To.Reg = r 474 default: 475 v.Fatalf("output not in same register as an input %s", v.LongString()) 476 } 477 478 case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ: 479 p := s.Prog(v.Op.Asm()) 480 p.From.Type = obj.TYPE_REG 481 p.From.Reg = v.Args[1].Reg() 482 p.To.Type = obj.TYPE_REG 483 p.To.Reg = v.Reg0() 484 485 case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst: 486 p := s.Prog(v.Op.Asm()) 487 p.From.Type = obj.TYPE_CONST 488 p.From.Offset = v.AuxInt 489 p.To.Type = obj.TYPE_REG 490 p.To.Reg = v.Reg0() 491 492 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 493 r := v.Reg() 494 a := v.Args[0].Reg() 495 if r == a { 496 switch v.AuxInt { 497 case 1: 498 var asm obj.As 499 // Software optimization manual recommends add $1,reg. 500 // But inc/dec is 1 byte smaller. ICC always uses inc 501 // Clang/GCC choose depending on flags, but prefer add. 502 // Experiments show that inc/dec is both a little faster 503 // and make a binary a little smaller. 504 if v.Op == ssa.OpAMD64ADDQconst { 505 asm = x86.AINCQ 506 } else { 507 asm = x86.AINCL 508 } 509 p := s.Prog(asm) 510 p.To.Type = obj.TYPE_REG 511 p.To.Reg = r 512 return 513 case -1: 514 var asm obj.As 515 if v.Op == ssa.OpAMD64ADDQconst { 516 asm = x86.ADECQ 517 } else { 518 asm = x86.ADECL 519 } 520 p := s.Prog(asm) 521 p.To.Type = obj.TYPE_REG 522 p.To.Reg = r 523 return 524 case 0x80: 525 // 'SUBQ $-0x80, r' is shorter to encode than 526 // and functionally equivalent to 'ADDQ $0x80, r'. 527 asm := x86.ASUBL 528 if v.Op == ssa.OpAMD64ADDQconst { 529 asm = x86.ASUBQ 530 } 531 p := s.Prog(asm) 532 p.From.Type = obj.TYPE_CONST 533 p.From.Offset = -0x80 534 p.To.Type = obj.TYPE_REG 535 p.To.Reg = r 536 return 537 538 } 539 p := s.Prog(v.Op.Asm()) 540 p.From.Type = obj.TYPE_CONST 541 p.From.Offset = v.AuxInt 542 p.To.Type = obj.TYPE_REG 543 p.To.Reg = r 544 return 545 } 546 var asm obj.As 547 if v.Op == ssa.OpAMD64ADDQconst { 548 asm = x86.ALEAQ 549 } else { 550 asm = x86.ALEAL 551 } 552 p := s.Prog(asm) 553 p.From.Type = obj.TYPE_MEM 554 p.From.Reg = a 555 p.From.Offset = v.AuxInt 556 p.To.Type = obj.TYPE_REG 557 p.To.Reg = r 558 559 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ, 560 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT, 561 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE, 562 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT, 563 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE, 564 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE, 565 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI, 566 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS, 567 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC, 568 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS, 569 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF, 570 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF: 571 p := s.Prog(v.Op.Asm()) 572 p.From.Type = obj.TYPE_REG 573 p.From.Reg = v.Args[1].Reg() 574 p.To.Type = obj.TYPE_REG 575 p.To.Reg = v.Reg() 576 577 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF: 578 // Flag condition: ^ZERO || PARITY 579 // Generate: 580 // CMOV*NE SRC,DST 581 // CMOV*PS SRC,DST 582 p := s.Prog(v.Op.Asm()) 583 p.From.Type = obj.TYPE_REG 584 p.From.Reg = v.Args[1].Reg() 585 p.To.Type = obj.TYPE_REG 586 p.To.Reg = v.Reg() 587 var q *obj.Prog 588 if v.Op == ssa.OpAMD64CMOVQNEF { 589 q = s.Prog(x86.ACMOVQPS) 590 } else if v.Op == ssa.OpAMD64CMOVLNEF { 591 q = s.Prog(x86.ACMOVLPS) 592 } else { 593 q = s.Prog(x86.ACMOVWPS) 594 } 595 q.From.Type = obj.TYPE_REG 596 q.From.Reg = v.Args[1].Reg() 597 q.To.Type = obj.TYPE_REG 598 q.To.Reg = v.Reg() 599 600 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF: 601 // Flag condition: ZERO && !PARITY 602 // Generate: 603 // MOV SRC,TMP 604 // CMOV*NE DST,TMP 605 // CMOV*PC TMP,DST 606 // 607 // TODO(rasky): we could generate: 608 // CMOV*NE DST,SRC 609 // CMOV*PC SRC,DST 610 // But this requires a way for regalloc to know that SRC might be 611 // clobbered by this instruction. 612 t := v.RegTmp() 613 opregreg(s, moveByType(v.Type), t, v.Args[1].Reg()) 614 615 p := s.Prog(v.Op.Asm()) 616 p.From.Type = obj.TYPE_REG 617 p.From.Reg = v.Reg() 618 p.To.Type = obj.TYPE_REG 619 p.To.Reg = t 620 var q *obj.Prog 621 if v.Op == ssa.OpAMD64CMOVQEQF { 622 q = s.Prog(x86.ACMOVQPC) 623 } else if v.Op == ssa.OpAMD64CMOVLEQF { 624 q = s.Prog(x86.ACMOVLPC) 625 } else { 626 q = s.Prog(x86.ACMOVWPC) 627 } 628 q.From.Type = obj.TYPE_REG 629 q.From.Reg = t 630 q.To.Type = obj.TYPE_REG 631 q.To.Reg = v.Reg() 632 633 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 634 r := v.Reg() 635 p := s.Prog(v.Op.Asm()) 636 p.From.Type = obj.TYPE_CONST 637 p.From.Offset = v.AuxInt 638 p.To.Type = obj.TYPE_REG 639 p.To.Reg = r 640 p.SetFrom3Reg(v.Args[0].Reg()) 641 642 case ssa.OpAMD64ANDQconst: 643 asm := v.Op.Asm() 644 // If the constant is positive and fits into 32 bits, use ANDL. 645 // This saves a few bytes of encoding. 646 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 647 asm = x86.AANDL 648 } 649 p := s.Prog(asm) 650 p.From.Type = obj.TYPE_CONST 651 p.From.Offset = v.AuxInt 652 p.To.Type = obj.TYPE_REG 653 p.To.Reg = v.Reg() 654 655 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 656 ssa.OpAMD64ANDLconst, 657 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 658 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 659 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 660 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 661 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 662 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 663 p := s.Prog(v.Op.Asm()) 664 p.From.Type = obj.TYPE_CONST 665 p.From.Offset = v.AuxInt 666 p.To.Type = obj.TYPE_REG 667 p.To.Reg = v.Reg() 668 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 669 r := v.Reg() 670 p := s.Prog(v.Op.Asm()) 671 p.From.Type = obj.TYPE_REG 672 p.From.Reg = r 673 p.To.Type = obj.TYPE_REG 674 p.To.Reg = r 675 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8, 676 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8, 677 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 678 p := s.Prog(v.Op.Asm()) 679 memIdx(&p.From, v) 680 o := v.Reg() 681 p.To.Type = obj.TYPE_REG 682 p.To.Reg = o 683 if v.AuxInt != 0 && v.Aux == nil { 684 // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA. 685 switch v.Op { 686 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 687 p = s.Prog(x86.ALEAQ) 688 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8: 689 p = s.Prog(x86.ALEAL) 690 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 691 p = s.Prog(x86.ALEAW) 692 } 693 p.From.Type = obj.TYPE_MEM 694 p.From.Reg = o 695 p.To.Type = obj.TYPE_REG 696 p.To.Reg = o 697 } 698 ssagen.AddAux(&p.From, v) 699 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW: 700 p := s.Prog(v.Op.Asm()) 701 p.From.Type = obj.TYPE_MEM 702 p.From.Reg = v.Args[0].Reg() 703 ssagen.AddAux(&p.From, v) 704 p.To.Type = obj.TYPE_REG 705 p.To.Reg = v.Reg() 706 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 707 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 708 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 709 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 710 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 711 // Go assembler has swapped operands for UCOMISx relative to CMP, 712 // must account for that right here. 713 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 714 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 715 p := s.Prog(v.Op.Asm()) 716 p.From.Type = obj.TYPE_REG 717 p.From.Reg = v.Args[0].Reg() 718 p.To.Type = obj.TYPE_CONST 719 p.To.Offset = v.AuxInt 720 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, 721 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 722 ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst, 723 ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst, 724 ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst: 725 op := v.Op 726 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { 727 // Emit 32-bit version because it's shorter 728 op = ssa.OpAMD64BTLconst 729 } 730 p := s.Prog(op.Asm()) 731 p.From.Type = obj.TYPE_CONST 732 p.From.Offset = v.AuxInt 733 p.To.Type = obj.TYPE_REG 734 p.To.Reg = v.Args[0].Reg() 735 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload: 736 p := s.Prog(v.Op.Asm()) 737 p.From.Type = obj.TYPE_MEM 738 p.From.Reg = v.Args[0].Reg() 739 ssagen.AddAux(&p.From, v) 740 p.To.Type = obj.TYPE_REG 741 p.To.Reg = v.Args[1].Reg() 742 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload: 743 sc := v.AuxValAndOff() 744 p := s.Prog(v.Op.Asm()) 745 p.From.Type = obj.TYPE_MEM 746 p.From.Reg = v.Args[0].Reg() 747 ssagen.AddAux2(&p.From, v, sc.Off64()) 748 p.To.Type = obj.TYPE_CONST 749 p.To.Offset = sc.Val64() 750 case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1: 751 p := s.Prog(v.Op.Asm()) 752 memIdx(&p.From, v) 753 ssagen.AddAux(&p.From, v) 754 p.To.Type = obj.TYPE_REG 755 p.To.Reg = v.Args[2].Reg() 756 case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1: 757 sc := v.AuxValAndOff() 758 p := s.Prog(v.Op.Asm()) 759 memIdx(&p.From, v) 760 ssagen.AddAux2(&p.From, v, sc.Off64()) 761 p.To.Type = obj.TYPE_CONST 762 p.To.Offset = sc.Val64() 763 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 764 x := v.Reg() 765 766 // If flags aren't live (indicated by v.Aux == nil), 767 // then we can rewrite MOV $0, AX into XOR AX, AX. 768 if v.AuxInt == 0 && v.Aux == nil { 769 opregreg(s, x86.AXORL, x, x) 770 break 771 } 772 773 asm := v.Op.Asm() 774 // Use MOVL to move a small constant into a register 775 // when the constant is positive and fits into 32 bits. 776 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 777 // The upper 32bit are zeroed automatically when using MOVL. 778 asm = x86.AMOVL 779 } 780 p := s.Prog(asm) 781 p.From.Type = obj.TYPE_CONST 782 p.From.Offset = v.AuxInt 783 p.To.Type = obj.TYPE_REG 784 p.To.Reg = x 785 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 786 x := v.Reg() 787 p := s.Prog(v.Op.Asm()) 788 p.From.Type = obj.TYPE_FCONST 789 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 790 p.To.Type = obj.TYPE_REG 791 p.To.Reg = x 792 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload, 793 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, 794 ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload: 795 p := s.Prog(v.Op.Asm()) 796 p.From.Type = obj.TYPE_MEM 797 p.From.Reg = v.Args[0].Reg() 798 ssagen.AddAux(&p.From, v) 799 p.To.Type = obj.TYPE_REG 800 p.To.Reg = v.Reg() 801 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1, 802 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2, 803 ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8: 804 p := s.Prog(v.Op.Asm()) 805 memIdx(&p.From, v) 806 ssagen.AddAux(&p.From, v) 807 p.To.Type = obj.TYPE_REG 808 p.To.Reg = v.Reg() 809 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, 810 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, 811 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify, 812 ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore: 813 p := s.Prog(v.Op.Asm()) 814 p.From.Type = obj.TYPE_REG 815 p.From.Reg = v.Args[1].Reg() 816 p.To.Type = obj.TYPE_MEM 817 p.To.Reg = v.Args[0].Reg() 818 ssagen.AddAux(&p.To, v) 819 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1, 820 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2, 821 ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8, 822 ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8, 823 ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8, 824 ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8, 825 ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8, 826 ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8: 827 p := s.Prog(v.Op.Asm()) 828 p.From.Type = obj.TYPE_REG 829 p.From.Reg = v.Args[2].Reg() 830 memIdx(&p.To, v) 831 ssagen.AddAux(&p.To, v) 832 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify: 833 sc := v.AuxValAndOff() 834 off := sc.Off64() 835 val := sc.Val() 836 if val == 1 || val == -1 { 837 var asm obj.As 838 if v.Op == ssa.OpAMD64ADDQconstmodify { 839 if val == 1 { 840 asm = x86.AINCQ 841 } else { 842 asm = x86.ADECQ 843 } 844 } else { 845 if val == 1 { 846 asm = x86.AINCL 847 } else { 848 asm = x86.ADECL 849 } 850 } 851 p := s.Prog(asm) 852 p.To.Type = obj.TYPE_MEM 853 p.To.Reg = v.Args[0].Reg() 854 ssagen.AddAux2(&p.To, v, off) 855 break 856 } 857 fallthrough 858 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, 859 ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: 860 sc := v.AuxValAndOff() 861 off := sc.Off64() 862 val := sc.Val64() 863 p := s.Prog(v.Op.Asm()) 864 p.From.Type = obj.TYPE_CONST 865 p.From.Offset = val 866 p.To.Type = obj.TYPE_MEM 867 p.To.Reg = v.Args[0].Reg() 868 ssagen.AddAux2(&p.To, v, off) 869 870 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 871 p := s.Prog(v.Op.Asm()) 872 p.From.Type = obj.TYPE_CONST 873 sc := v.AuxValAndOff() 874 p.From.Offset = sc.Val64() 875 p.To.Type = obj.TYPE_MEM 876 p.To.Reg = v.Args[0].Reg() 877 ssagen.AddAux2(&p.To, v, sc.Off64()) 878 case ssa.OpAMD64MOVOstoreconst: 879 sc := v.AuxValAndOff() 880 if sc.Val() != 0 { 881 v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString()) 882 } 883 884 if s.ABI != obj.ABIInternal { 885 // zero X15 manually 886 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 887 } 888 p := s.Prog(v.Op.Asm()) 889 p.From.Type = obj.TYPE_REG 890 p.From.Reg = x86.REG_X15 891 p.To.Type = obj.TYPE_MEM 892 p.To.Reg = v.Args[0].Reg() 893 ssagen.AddAux2(&p.To, v, sc.Off64()) 894 895 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1, 896 ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8, 897 ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8, 898 ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8, 899 ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8: 900 p := s.Prog(v.Op.Asm()) 901 p.From.Type = obj.TYPE_CONST 902 sc := v.AuxValAndOff() 903 p.From.Offset = sc.Val64() 904 switch { 905 case p.As == x86.AADDQ && p.From.Offset == 1: 906 p.As = x86.AINCQ 907 p.From.Type = obj.TYPE_NONE 908 case p.As == x86.AADDQ && p.From.Offset == -1: 909 p.As = x86.ADECQ 910 p.From.Type = obj.TYPE_NONE 911 case p.As == x86.AADDL && p.From.Offset == 1: 912 p.As = x86.AINCL 913 p.From.Type = obj.TYPE_NONE 914 case p.As == x86.AADDL && p.From.Offset == -1: 915 p.As = x86.ADECL 916 p.From.Type = obj.TYPE_NONE 917 } 918 memIdx(&p.To, v) 919 ssagen.AddAux2(&p.To, v, sc.Off64()) 920 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 921 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 922 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 923 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 924 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 925 r := v.Reg() 926 // Break false dependency on destination register. 927 opregreg(s, x86.AXORPS, r, r) 928 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 929 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 930 var p *obj.Prog 931 switch v.Op { 932 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: 933 p = s.Prog(x86.AMOVQ) 934 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 935 p = s.Prog(x86.AMOVL) 936 } 937 p.From.Type = obj.TYPE_REG 938 p.From.Reg = v.Args[0].Reg() 939 p.To.Type = obj.TYPE_REG 940 p.To.Reg = v.Reg() 941 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload, 942 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload, 943 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload, 944 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload, 945 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload: 946 p := s.Prog(v.Op.Asm()) 947 p.From.Type = obj.TYPE_MEM 948 p.From.Reg = v.Args[1].Reg() 949 ssagen.AddAux(&p.From, v) 950 p.To.Type = obj.TYPE_REG 951 p.To.Reg = v.Reg() 952 case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8, 953 ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8, 954 ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8, 955 ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8, 956 ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8, 957 ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8, 958 ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8, 959 ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8, 960 ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8: 961 p := s.Prog(v.Op.Asm()) 962 963 r, i := v.Args[1].Reg(), v.Args[2].Reg() 964 p.From.Type = obj.TYPE_MEM 965 p.From.Scale = v.Op.Scale() 966 if p.From.Scale == 1 && i == x86.REG_SP { 967 r, i = i, r 968 } 969 p.From.Reg = r 970 p.From.Index = i 971 972 ssagen.AddAux(&p.From, v) 973 p.To.Type = obj.TYPE_REG 974 p.To.Reg = v.Reg() 975 case ssa.OpAMD64DUFFZERO: 976 if s.ABI != obj.ABIInternal { 977 // zero X15 manually 978 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 979 } 980 off := duffStart(v.AuxInt) 981 adj := duffAdj(v.AuxInt) 982 var p *obj.Prog 983 if adj != 0 { 984 p = s.Prog(x86.ALEAQ) 985 p.From.Type = obj.TYPE_MEM 986 p.From.Offset = adj 987 p.From.Reg = x86.REG_DI 988 p.To.Type = obj.TYPE_REG 989 p.To.Reg = x86.REG_DI 990 } 991 p = s.Prog(obj.ADUFFZERO) 992 p.To.Type = obj.TYPE_ADDR 993 p.To.Sym = ir.Syms.Duffzero 994 p.To.Offset = off 995 case ssa.OpAMD64DUFFCOPY: 996 p := s.Prog(obj.ADUFFCOPY) 997 p.To.Type = obj.TYPE_ADDR 998 p.To.Sym = ir.Syms.Duffcopy 999 if v.AuxInt%16 != 0 { 1000 v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt) 1001 } 1002 p.To.Offset = 14 * (64 - v.AuxInt/16) 1003 // 14 and 64 are magic constants. 14 is the number of bytes to encode: 1004 // MOVUPS (SI), X0 1005 // ADDQ $16, SI 1006 // MOVUPS X0, (DI) 1007 // ADDQ $16, DI 1008 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy. 1009 1010 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 1011 if v.Type.IsMemory() { 1012 return 1013 } 1014 x := v.Args[0].Reg() 1015 y := v.Reg() 1016 if x != y { 1017 opregreg(s, moveByType(v.Type), y, x) 1018 } 1019 case ssa.OpLoadReg: 1020 if v.Type.IsFlags() { 1021 v.Fatalf("load flags not implemented: %v", v.LongString()) 1022 return 1023 } 1024 p := s.Prog(loadByType(v.Type)) 1025 ssagen.AddrAuto(&p.From, v.Args[0]) 1026 p.To.Type = obj.TYPE_REG 1027 p.To.Reg = v.Reg() 1028 1029 case ssa.OpStoreReg: 1030 if v.Type.IsFlags() { 1031 v.Fatalf("store flags not implemented: %v", v.LongString()) 1032 return 1033 } 1034 p := s.Prog(storeByType(v.Type)) 1035 p.From.Type = obj.TYPE_REG 1036 p.From.Reg = v.Args[0].Reg() 1037 ssagen.AddrAuto(&p.To, v) 1038 case ssa.OpAMD64LoweredHasCPUFeature: 1039 p := s.Prog(x86.AMOVBQZX) 1040 p.From.Type = obj.TYPE_MEM 1041 ssagen.AddAux(&p.From, v) 1042 p.To.Type = obj.TYPE_REG 1043 p.To.Reg = v.Reg() 1044 case ssa.OpArgIntReg, ssa.OpArgFloatReg: 1045 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill 1046 // The loop only runs once. 1047 for _, ap := range v.Block.Func.RegArgs { 1048 // Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack. 1049 addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize) 1050 s.FuncInfo().AddSpill( 1051 obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)}) 1052 } 1053 v.Block.Func.RegArgs = nil 1054 ssagen.CheckArgReg(v) 1055 case ssa.OpAMD64LoweredGetClosurePtr: 1056 // Closure pointer is DX. 1057 ssagen.CheckLoweredGetClosurePtr(v) 1058 case ssa.OpAMD64LoweredGetG: 1059 if s.ABI == obj.ABIInternal { 1060 v.Fatalf("LoweredGetG should not appear in ABIInternal") 1061 } 1062 r := v.Reg() 1063 getgFromTLS(s, r) 1064 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail: 1065 if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal { 1066 // zeroing X15 when entering ABIInternal from ABI0 1067 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9 1068 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 1069 } 1070 // set G register from TLS 1071 getgFromTLS(s, x86.REG_R14) 1072 } 1073 if v.Op == ssa.OpAMD64CALLtail { 1074 s.TailCall(v) 1075 break 1076 } 1077 s.Call(v) 1078 if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 { 1079 // zeroing X15 when entering ABIInternal from ABI0 1080 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9 1081 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 1082 } 1083 // set G register from TLS 1084 getgFromTLS(s, x86.REG_R14) 1085 } 1086 case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 1087 s.Call(v) 1088 1089 case ssa.OpAMD64LoweredGetCallerPC: 1090 p := s.Prog(x86.AMOVQ) 1091 p.From.Type = obj.TYPE_MEM 1092 p.From.Offset = -8 // PC is stored 8 bytes below first parameter. 1093 p.From.Name = obj.NAME_PARAM 1094 p.To.Type = obj.TYPE_REG 1095 p.To.Reg = v.Reg() 1096 1097 case ssa.OpAMD64LoweredGetCallerSP: 1098 // caller's SP is the address of the first arg 1099 mov := x86.AMOVQ 1100 if types.PtrSize == 4 { 1101 mov = x86.AMOVL 1102 } 1103 p := s.Prog(mov) 1104 p.From.Type = obj.TYPE_ADDR 1105 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures 1106 p.From.Name = obj.NAME_PARAM 1107 p.To.Type = obj.TYPE_REG 1108 p.To.Reg = v.Reg() 1109 1110 case ssa.OpAMD64LoweredWB: 1111 p := s.Prog(obj.ACALL) 1112 p.To.Type = obj.TYPE_MEM 1113 p.To.Name = obj.NAME_EXTERN 1114 // arg0 is in DI. Set sym to match where regalloc put arg1. 1115 p.To.Sym = ssagen.GCWriteBarrierReg[v.Args[1].Reg()] 1116 1117 case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC: 1118 p := s.Prog(obj.ACALL) 1119 p.To.Type = obj.TYPE_MEM 1120 p.To.Name = obj.NAME_EXTERN 1121 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] 1122 s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs 1123 1124 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 1125 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 1126 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 1127 p := s.Prog(v.Op.Asm()) 1128 p.To.Type = obj.TYPE_REG 1129 p.To.Reg = v.Reg() 1130 1131 case ssa.OpAMD64NEGLflags: 1132 p := s.Prog(v.Op.Asm()) 1133 p.To.Type = obj.TYPE_REG 1134 p.To.Reg = v.Reg0() 1135 1136 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS: 1137 p := s.Prog(v.Op.Asm()) 1138 p.From.Type = obj.TYPE_REG 1139 p.From.Reg = v.Args[0].Reg() 1140 p.To.Type = obj.TYPE_REG 1141 switch v.Op { 1142 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ: 1143 p.To.Reg = v.Reg0() 1144 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS: 1145 p.To.Reg = v.Reg() 1146 } 1147 case ssa.OpAMD64ROUNDSD: 1148 p := s.Prog(v.Op.Asm()) 1149 val := v.AuxInt 1150 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc 1151 if val < 0 || val > 3 { 1152 v.Fatalf("Invalid rounding mode") 1153 } 1154 p.From.Offset = val 1155 p.From.Type = obj.TYPE_CONST 1156 p.SetFrom3Reg(v.Args[0].Reg()) 1157 p.To.Type = obj.TYPE_REG 1158 p.To.Reg = v.Reg() 1159 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL, 1160 ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL, 1161 ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL: 1162 if v.Args[0].Reg() != v.Reg() { 1163 // POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus. 1164 // TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7. 1165 // Xor register with itself to break the dependency. 1166 opregreg(s, x86.AXORL, v.Reg(), v.Reg()) 1167 } 1168 p := s.Prog(v.Op.Asm()) 1169 p.From.Type = obj.TYPE_REG 1170 p.From.Reg = v.Args[0].Reg() 1171 p.To.Type = obj.TYPE_REG 1172 p.To.Reg = v.Reg() 1173 1174 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 1175 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 1176 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 1177 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 1178 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 1179 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 1180 ssa.OpAMD64SETA, ssa.OpAMD64SETAE, 1181 ssa.OpAMD64SETO: 1182 p := s.Prog(v.Op.Asm()) 1183 p.To.Type = obj.TYPE_REG 1184 p.To.Reg = v.Reg() 1185 1186 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore, 1187 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore, 1188 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore, 1189 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore, 1190 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore: 1191 p := s.Prog(v.Op.Asm()) 1192 p.To.Type = obj.TYPE_MEM 1193 p.To.Reg = v.Args[0].Reg() 1194 ssagen.AddAux(&p.To, v) 1195 1196 case ssa.OpAMD64SETNEF: 1197 t := v.RegTmp() 1198 p := s.Prog(v.Op.Asm()) 1199 p.To.Type = obj.TYPE_REG 1200 p.To.Reg = v.Reg() 1201 q := s.Prog(x86.ASETPS) 1202 q.To.Type = obj.TYPE_REG 1203 q.To.Reg = t 1204 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 1205 opregreg(s, x86.AORL, v.Reg(), t) 1206 1207 case ssa.OpAMD64SETEQF: 1208 t := v.RegTmp() 1209 p := s.Prog(v.Op.Asm()) 1210 p.To.Type = obj.TYPE_REG 1211 p.To.Reg = v.Reg() 1212 q := s.Prog(x86.ASETPC) 1213 q.To.Type = obj.TYPE_REG 1214 q.To.Reg = t 1215 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 1216 opregreg(s, x86.AANDL, v.Reg(), t) 1217 1218 case ssa.OpAMD64InvertFlags: 1219 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1220 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 1221 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1222 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 1223 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 1224 case ssa.OpAMD64REPSTOSQ: 1225 s.Prog(x86.AREP) 1226 s.Prog(x86.ASTOSQ) 1227 case ssa.OpAMD64REPMOVSQ: 1228 s.Prog(x86.AREP) 1229 s.Prog(x86.AMOVSQ) 1230 case ssa.OpAMD64LoweredNilCheck: 1231 // Issue a load which will fault if the input is nil. 1232 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 1233 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 1234 // but it doesn't have false dependency on AX. 1235 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 1236 // That trades clobbering flags for clobbering a register. 1237 p := s.Prog(x86.ATESTB) 1238 p.From.Type = obj.TYPE_REG 1239 p.From.Reg = x86.REG_AX 1240 p.To.Type = obj.TYPE_MEM 1241 p.To.Reg = v.Args[0].Reg() 1242 if logopt.Enabled() { 1243 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 1244 } 1245 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1246 base.WarnfAt(v.Pos, "generated nil check") 1247 } 1248 case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 1249 p := s.Prog(v.Op.Asm()) 1250 p.From.Type = obj.TYPE_MEM 1251 p.From.Reg = v.Args[0].Reg() 1252 ssagen.AddAux(&p.From, v) 1253 p.To.Type = obj.TYPE_REG 1254 p.To.Reg = v.Reg0() 1255 case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 1256 p := s.Prog(v.Op.Asm()) 1257 p.From.Type = obj.TYPE_REG 1258 p.From.Reg = v.Reg0() 1259 p.To.Type = obj.TYPE_MEM 1260 p.To.Reg = v.Args[1].Reg() 1261 ssagen.AddAux(&p.To, v) 1262 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 1263 s.Prog(x86.ALOCK) 1264 p := s.Prog(v.Op.Asm()) 1265 p.From.Type = obj.TYPE_REG 1266 p.From.Reg = v.Reg0() 1267 p.To.Type = obj.TYPE_MEM 1268 p.To.Reg = v.Args[1].Reg() 1269 ssagen.AddAux(&p.To, v) 1270 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 1271 if v.Args[1].Reg() != x86.REG_AX { 1272 v.Fatalf("input[1] not in AX %s", v.LongString()) 1273 } 1274 s.Prog(x86.ALOCK) 1275 p := s.Prog(v.Op.Asm()) 1276 p.From.Type = obj.TYPE_REG 1277 p.From.Reg = v.Args[2].Reg() 1278 p.To.Type = obj.TYPE_MEM 1279 p.To.Reg = v.Args[0].Reg() 1280 ssagen.AddAux(&p.To, v) 1281 p = s.Prog(x86.ASETEQ) 1282 p.To.Type = obj.TYPE_REG 1283 p.To.Reg = v.Reg0() 1284 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock: 1285 s.Prog(x86.ALOCK) 1286 p := s.Prog(v.Op.Asm()) 1287 p.From.Type = obj.TYPE_REG 1288 p.From.Reg = v.Args[1].Reg() 1289 p.To.Type = obj.TYPE_MEM 1290 p.To.Reg = v.Args[0].Reg() 1291 ssagen.AddAux(&p.To, v) 1292 case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA: 1293 p := s.Prog(v.Op.Asm()) 1294 p.From.Type = obj.TYPE_MEM 1295 p.From.Reg = v.Args[0].Reg() 1296 case ssa.OpClobber: 1297 p := s.Prog(x86.AMOVL) 1298 p.From.Type = obj.TYPE_CONST 1299 p.From.Offset = 0xdeaddead 1300 p.To.Type = obj.TYPE_MEM 1301 p.To.Reg = x86.REG_SP 1302 ssagen.AddAux(&p.To, v) 1303 p = s.Prog(x86.AMOVL) 1304 p.From.Type = obj.TYPE_CONST 1305 p.From.Offset = 0xdeaddead 1306 p.To.Type = obj.TYPE_MEM 1307 p.To.Reg = x86.REG_SP 1308 ssagen.AddAux(&p.To, v) 1309 p.To.Offset += 4 1310 case ssa.OpClobberReg: 1311 x := uint64(0xdeaddeaddeaddead) 1312 p := s.Prog(x86.AMOVQ) 1313 p.From.Type = obj.TYPE_CONST 1314 p.From.Offset = int64(x) 1315 p.To.Type = obj.TYPE_REG 1316 p.To.Reg = v.Reg() 1317 default: 1318 v.Fatalf("genValue not implemented: %s", v.LongString()) 1319 } 1320 } 1321 1322 var blockJump = [...]struct { 1323 asm, invasm obj.As 1324 }{ 1325 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1326 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1327 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1328 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1329 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1330 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1331 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC}, 1332 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS}, 1333 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1334 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1335 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1336 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1337 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1338 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1339 } 1340 1341 var eqfJumps = [2][2]ssagen.IndexJump{ 1342 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1343 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1344 } 1345 var nefJumps = [2][2]ssagen.IndexJump{ 1346 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1347 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1348 } 1349 1350 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { 1351 switch b.Kind { 1352 case ssa.BlockPlain: 1353 if b.Succs[0].Block() != next { 1354 p := s.Prog(obj.AJMP) 1355 p.To.Type = obj.TYPE_BRANCH 1356 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 1357 } 1358 case ssa.BlockDefer: 1359 // defer returns in rax: 1360 // 0 if we should continue executing 1361 // 1 if we should jump to deferreturn call 1362 p := s.Prog(x86.ATESTL) 1363 p.From.Type = obj.TYPE_REG 1364 p.From.Reg = x86.REG_AX 1365 p.To.Type = obj.TYPE_REG 1366 p.To.Reg = x86.REG_AX 1367 p = s.Prog(x86.AJNE) 1368 p.To.Type = obj.TYPE_BRANCH 1369 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) 1370 if b.Succs[0].Block() != next { 1371 p := s.Prog(obj.AJMP) 1372 p.To.Type = obj.TYPE_BRANCH 1373 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 1374 } 1375 case ssa.BlockExit, ssa.BlockRetJmp: 1376 case ssa.BlockRet: 1377 s.Prog(obj.ARET) 1378 1379 case ssa.BlockAMD64EQF: 1380 s.CombJump(b, next, &eqfJumps) 1381 1382 case ssa.BlockAMD64NEF: 1383 s.CombJump(b, next, &nefJumps) 1384 1385 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1386 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1387 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1388 ssa.BlockAMD64OS, ssa.BlockAMD64OC, 1389 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1390 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1391 jmp := blockJump[b.Kind] 1392 switch next { 1393 case b.Succs[0].Block(): 1394 s.Br(jmp.invasm, b.Succs[1].Block()) 1395 case b.Succs[1].Block(): 1396 s.Br(jmp.asm, b.Succs[0].Block()) 1397 default: 1398 if b.Likely != ssa.BranchUnlikely { 1399 s.Br(jmp.asm, b.Succs[0].Block()) 1400 s.Br(obj.AJMP, b.Succs[1].Block()) 1401 } else { 1402 s.Br(jmp.invasm, b.Succs[1].Block()) 1403 s.Br(obj.AJMP, b.Succs[0].Block()) 1404 } 1405 } 1406 1407 case ssa.BlockAMD64JUMPTABLE: 1408 // JMP *(TABLE)(INDEX*8) 1409 p := s.Prog(obj.AJMP) 1410 p.To.Type = obj.TYPE_MEM 1411 p.To.Reg = b.Controls[1].Reg() 1412 p.To.Index = b.Controls[0].Reg() 1413 p.To.Scale = 8 1414 // Save jump tables for later resolution of the target blocks. 1415 s.JumpTables = append(s.JumpTables, b) 1416 1417 default: 1418 b.Fatalf("branch not implemented: %s", b.LongString()) 1419 } 1420 } 1421 1422 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 1423 p := s.Prog(loadByType(t)) 1424 p.From.Type = obj.TYPE_MEM 1425 p.From.Name = obj.NAME_AUTO 1426 p.From.Sym = n.Linksym() 1427 p.From.Offset = n.FrameOffset() + off 1428 p.To.Type = obj.TYPE_REG 1429 p.To.Reg = reg 1430 return p 1431 } 1432 1433 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 1434 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) 1435 p.To.Name = obj.NAME_PARAM 1436 p.To.Sym = n.Linksym() 1437 p.Pos = p.Pos.WithNotStmt() 1438 return p 1439 }