github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "github.com/go-asm/go/buildcfg" 12 13 "github.com/go-asm/go/cmd/compile/base" 14 "github.com/go-asm/go/cmd/compile/ir" 15 "github.com/go-asm/go/cmd/compile/logopt" 16 "github.com/go-asm/go/cmd/compile/objw" 17 "github.com/go-asm/go/cmd/compile/ssa" 18 "github.com/go-asm/go/cmd/compile/ssagen" 19 "github.com/go-asm/go/cmd/compile/types" 20 "github.com/go-asm/go/cmd/obj" 21 "github.com/go-asm/go/cmd/obj/x86" 22 ) 23 24 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. 25 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) { 26 flive := b.FlagsLiveAtEnd 27 for _, c := range b.ControlValues() { 28 flive = c.Type.IsFlags() || flive 29 } 30 for i := len(b.Values) - 1; i >= 0; i-- { 31 v := b.Values[i] 32 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 33 // The "mark" is any non-nil Aux value. 34 v.Aux = ssa.AuxMark 35 } 36 if v.Type.IsFlags() { 37 flive = false 38 } 39 for _, a := range v.Args { 40 if a.Type.IsFlags() { 41 flive = true 42 } 43 } 44 } 45 } 46 47 // loadByType returns the load instruction of the given type. 48 func loadByType(t *types.Type) obj.As { 49 // Avoid partial register write 50 if !t.IsFloat() { 51 switch t.Size() { 52 case 1: 53 return x86.AMOVBLZX 54 case 2: 55 return x86.AMOVWLZX 56 } 57 } 58 // Otherwise, there's no difference between load and store opcodes. 59 return storeByType(t) 60 } 61 62 // storeByType returns the store instruction of the given type. 63 func storeByType(t *types.Type) obj.As { 64 width := t.Size() 65 if t.IsFloat() { 66 switch width { 67 case 4: 68 return x86.AMOVSS 69 case 8: 70 return x86.AMOVSD 71 } 72 } else { 73 switch width { 74 case 1: 75 return x86.AMOVB 76 case 2: 77 return x86.AMOVW 78 case 4: 79 return x86.AMOVL 80 case 8: 81 return x86.AMOVQ 82 case 16: 83 return x86.AMOVUPS 84 } 85 } 86 panic(fmt.Sprintf("bad store type %v", t)) 87 } 88 89 // moveByType returns the reg->reg move instruction of the given type. 90 func moveByType(t *types.Type) obj.As { 91 if t.IsFloat() { 92 // Moving the whole sse2 register is faster 93 // than moving just the correct low portion of it. 94 // There is no xmm->xmm move with 1 byte opcode, 95 // so use movups, which has 2 byte opcode. 96 return x86.AMOVUPS 97 } else { 98 switch t.Size() { 99 case 1: 100 // Avoids partial register write 101 return x86.AMOVL 102 case 2: 103 return x86.AMOVL 104 case 4: 105 return x86.AMOVL 106 case 8: 107 return x86.AMOVQ 108 case 16: 109 return x86.AMOVUPS // int128s are in SSE registers 110 default: 111 panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t)) 112 } 113 } 114 } 115 116 // opregreg emits instructions for 117 // 118 // dest := dest(To) op src(From) 119 // 120 // and also returns the created obj.Prog so it 121 // may be further adjusted (offset, scale, etc). 122 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog { 123 p := s.Prog(op) 124 p.From.Type = obj.TYPE_REG 125 p.To.Type = obj.TYPE_REG 126 p.To.Reg = dest 127 p.From.Reg = src 128 return p 129 } 130 131 // memIdx fills out a as an indexed memory reference for v. 132 // It assumes that the base register and the index register 133 // are v.Args[0].Reg() and v.Args[1].Reg(), respectively. 134 // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary. 135 func memIdx(a *obj.Addr, v *ssa.Value) { 136 r, i := v.Args[0].Reg(), v.Args[1].Reg() 137 a.Type = obj.TYPE_MEM 138 a.Scale = v.Op.Scale() 139 if a.Scale == 1 && i == x86.REG_SP { 140 r, i = i, r 141 } 142 a.Reg = r 143 a.Index = i 144 } 145 146 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 147 // See runtime/mkduff.go. 148 func duffStart(size int64) int64 { 149 x, _ := duff(size) 150 return x 151 } 152 func duffAdj(size int64) int64 { 153 _, x := duff(size) 154 return x 155 } 156 157 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 158 // required to use the duffzero mechanism for a block of the given size. 159 func duff(size int64) (int64, int64) { 160 if size < 32 || size > 1024 || size%dzClearStep != 0 { 161 panic("bad duffzero size") 162 } 163 steps := size / dzClearStep 164 blocks := steps / dzBlockLen 165 steps %= dzBlockLen 166 off := dzBlockSize * (dzBlocks - blocks) 167 var adj int64 168 if steps != 0 { 169 off -= dzLeaqSize 170 off -= dzMovSize * steps 171 adj -= dzClearStep * (dzBlockLen - steps) 172 } 173 return off, adj 174 } 175 176 func getgFromTLS(s *ssagen.State, r int16) { 177 // See the comments in github.com/go-asm/go/cmd/obj/x86/obj6.go 178 // near CanUse1InsnTLS for a detailed explanation of these instructions. 179 if x86.CanUse1InsnTLS(base.Ctxt) { 180 // MOVQ (TLS), r 181 p := s.Prog(x86.AMOVQ) 182 p.From.Type = obj.TYPE_MEM 183 p.From.Reg = x86.REG_TLS 184 p.To.Type = obj.TYPE_REG 185 p.To.Reg = r 186 } else { 187 // MOVQ TLS, r 188 // MOVQ (r)(TLS*1), r 189 p := s.Prog(x86.AMOVQ) 190 p.From.Type = obj.TYPE_REG 191 p.From.Reg = x86.REG_TLS 192 p.To.Type = obj.TYPE_REG 193 p.To.Reg = r 194 q := s.Prog(x86.AMOVQ) 195 q.From.Type = obj.TYPE_MEM 196 q.From.Reg = r 197 q.From.Index = x86.REG_TLS 198 q.From.Scale = 1 199 q.To.Type = obj.TYPE_REG 200 q.To.Reg = r 201 } 202 } 203 204 func ssaGenValue(s *ssagen.State, v *ssa.Value) { 205 switch v.Op { 206 case ssa.OpAMD64VFMADD231SD: 207 p := s.Prog(v.Op.Asm()) 208 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()} 209 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 210 p.AddRestSourceReg(v.Args[1].Reg()) 211 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 212 r := v.Reg() 213 r1 := v.Args[0].Reg() 214 r2 := v.Args[1].Reg() 215 switch { 216 case r == r1: 217 p := s.Prog(v.Op.Asm()) 218 p.From.Type = obj.TYPE_REG 219 p.From.Reg = r2 220 p.To.Type = obj.TYPE_REG 221 p.To.Reg = r 222 case r == r2: 223 p := s.Prog(v.Op.Asm()) 224 p.From.Type = obj.TYPE_REG 225 p.From.Reg = r1 226 p.To.Type = obj.TYPE_REG 227 p.To.Reg = r 228 default: 229 var asm obj.As 230 if v.Op == ssa.OpAMD64ADDQ { 231 asm = x86.ALEAQ 232 } else { 233 asm = x86.ALEAL 234 } 235 p := s.Prog(asm) 236 p.From.Type = obj.TYPE_MEM 237 p.From.Reg = r1 238 p.From.Scale = 1 239 p.From.Index = r2 240 p.To.Type = obj.TYPE_REG 241 p.To.Reg = r 242 } 243 // 2-address opcode arithmetic 244 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 245 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 246 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 247 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 248 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 249 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 250 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 251 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 252 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, 253 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, 254 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 255 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 256 ssa.OpAMD64MINSS, ssa.OpAMD64MINSD, 257 ssa.OpAMD64POR, ssa.OpAMD64PXOR, 258 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ, 259 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ, 260 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ: 261 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 262 263 case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ: 264 p := s.Prog(v.Op.Asm()) 265 lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg() 266 p.From.Type = obj.TYPE_REG 267 p.From.Reg = bits 268 p.To.Type = obj.TYPE_REG 269 p.To.Reg = lo 270 p.AddRestSourceReg(hi) 271 272 case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL, 273 ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL, 274 ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL: 275 p := s.Prog(v.Op.Asm()) 276 p.From.Type = obj.TYPE_REG 277 p.From.Reg = v.Args[0].Reg() 278 p.To.Type = obj.TYPE_REG 279 switch v.Op { 280 case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL: 281 p.To.Reg = v.Reg0() 282 default: 283 p.To.Reg = v.Reg() 284 } 285 286 case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL: 287 p := s.Prog(v.Op.Asm()) 288 p.From.Type = obj.TYPE_REG 289 p.From.Reg = v.Args[0].Reg() 290 p.To.Type = obj.TYPE_REG 291 p.To.Reg = v.Reg() 292 p.AddRestSourceReg(v.Args[1].Reg()) 293 294 case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ, 295 ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ, 296 ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ: 297 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 298 p.AddRestSourceReg(v.Args[0].Reg()) 299 300 case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload, 301 ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload, 302 ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload: 303 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 304 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 305 ssagen.AddAux(&m, v) 306 p.AddRestSource(m) 307 308 case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8, 309 ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8, 310 ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8, 311 ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8, 312 ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8, 313 ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8: 314 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg()) 315 m := obj.Addr{Type: obj.TYPE_MEM} 316 memIdx(&m, v) 317 ssagen.AddAux(&m, v) 318 p.AddRestSource(m) 319 320 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 321 // Arg[0] (the dividend) is in AX. 322 // Arg[1] (the divisor) can be in any other register. 323 // Result[0] (the quotient) is in AX. 324 // Result[1] (the remainder) is in DX. 325 r := v.Args[1].Reg() 326 327 // Zero extend dividend. 328 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX) 329 330 // Issue divide. 331 p := s.Prog(v.Op.Asm()) 332 p.From.Type = obj.TYPE_REG 333 p.From.Reg = r 334 335 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 336 // Arg[0] (the dividend) is in AX. 337 // Arg[1] (the divisor) can be in any other register. 338 // Result[0] (the quotient) is in AX. 339 // Result[1] (the remainder) is in DX. 340 r := v.Args[1].Reg() 341 342 var opCMP, opNEG, opSXD obj.As 343 switch v.Op { 344 case ssa.OpAMD64DIVQ: 345 opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO 346 case ssa.OpAMD64DIVL: 347 opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ 348 case ssa.OpAMD64DIVW: 349 opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD 350 } 351 352 // CPU faults upon signed overflow, which occurs when the most 353 // negative int is divided by -1. Handle divide by -1 as a special case. 354 var j1, j2 *obj.Prog 355 if ssa.DivisionNeedsFixUp(v) { 356 c := s.Prog(opCMP) 357 c.From.Type = obj.TYPE_REG 358 c.From.Reg = r 359 c.To.Type = obj.TYPE_CONST 360 c.To.Offset = -1 361 362 // Divisor is not -1, proceed with normal division. 363 j1 = s.Prog(x86.AJNE) 364 j1.To.Type = obj.TYPE_BRANCH 365 366 // Divisor is -1, manually compute quotient and remainder via fixup code. 367 // n / -1 = -n 368 n1 := s.Prog(opNEG) 369 n1.To.Type = obj.TYPE_REG 370 n1.To.Reg = x86.REG_AX 371 372 // n % -1 == 0 373 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX) 374 375 // TODO(khr): issue only the -1 fixup code we need. 376 // For instance, if only the quotient is used, no point in zeroing the remainder. 377 378 // Skip over normal division. 379 j2 = s.Prog(obj.AJMP) 380 j2.To.Type = obj.TYPE_BRANCH 381 } 382 383 // Sign extend dividend and perform division. 384 p := s.Prog(opSXD) 385 if j1 != nil { 386 j1.To.SetTarget(p) 387 } 388 p = s.Prog(v.Op.Asm()) 389 p.From.Type = obj.TYPE_REG 390 p.From.Reg = r 391 392 if j2 != nil { 393 j2.To.SetTarget(s.Pc()) 394 } 395 396 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 397 // the frontend rewrites constant division by 8/16/32 bit integers into 398 // HMUL by a constant 399 // SSA rewrites generate the 64 bit versions 400 401 // Arg[0] is already in AX as it's the only register we allow 402 // and DX is the only output we care about (the high bits) 403 p := s.Prog(v.Op.Asm()) 404 p.From.Type = obj.TYPE_REG 405 p.From.Reg = v.Args[1].Reg() 406 407 // IMULB puts the high portion in AH instead of DL, 408 // so move it to DL for consistency 409 if v.Type.Size() == 1 { 410 m := s.Prog(x86.AMOVB) 411 m.From.Type = obj.TYPE_REG 412 m.From.Reg = x86.REG_AH 413 m.To.Type = obj.TYPE_REG 414 m.To.Reg = x86.REG_DX 415 } 416 417 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU: 418 // Arg[0] is already in AX as it's the only register we allow 419 // results lo in AX 420 p := s.Prog(v.Op.Asm()) 421 p.From.Type = obj.TYPE_REG 422 p.From.Reg = v.Args[1].Reg() 423 424 case ssa.OpAMD64MULQU2: 425 // Arg[0] is already in AX as it's the only register we allow 426 // results hi in DX, lo in AX 427 p := s.Prog(v.Op.Asm()) 428 p.From.Type = obj.TYPE_REG 429 p.From.Reg = v.Args[1].Reg() 430 431 case ssa.OpAMD64DIVQU2: 432 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 433 // results q in AX, r in DX 434 p := s.Prog(v.Op.Asm()) 435 p.From.Type = obj.TYPE_REG 436 p.From.Reg = v.Args[2].Reg() 437 438 case ssa.OpAMD64AVGQU: 439 // compute (x+y)/2 unsigned. 440 // Do a 64-bit add, the overflow goes into the carry. 441 // Shift right once and pull the carry back into the 63rd bit. 442 p := s.Prog(x86.AADDQ) 443 p.From.Type = obj.TYPE_REG 444 p.To.Type = obj.TYPE_REG 445 p.To.Reg = v.Reg() 446 p.From.Reg = v.Args[1].Reg() 447 p = s.Prog(x86.ARCRQ) 448 p.From.Type = obj.TYPE_CONST 449 p.From.Offset = 1 450 p.To.Type = obj.TYPE_REG 451 p.To.Reg = v.Reg() 452 453 case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ: 454 r := v.Reg0() 455 r0 := v.Args[0].Reg() 456 r1 := v.Args[1].Reg() 457 switch r { 458 case r0: 459 p := s.Prog(v.Op.Asm()) 460 p.From.Type = obj.TYPE_REG 461 p.From.Reg = r1 462 p.To.Type = obj.TYPE_REG 463 p.To.Reg = r 464 case r1: 465 p := s.Prog(v.Op.Asm()) 466 p.From.Type = obj.TYPE_REG 467 p.From.Reg = r0 468 p.To.Type = obj.TYPE_REG 469 p.To.Reg = r 470 default: 471 v.Fatalf("output not in same register as an input %s", v.LongString()) 472 } 473 474 case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ: 475 p := s.Prog(v.Op.Asm()) 476 p.From.Type = obj.TYPE_REG 477 p.From.Reg = v.Args[1].Reg() 478 p.To.Type = obj.TYPE_REG 479 p.To.Reg = v.Reg0() 480 481 case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst: 482 p := s.Prog(v.Op.Asm()) 483 p.From.Type = obj.TYPE_CONST 484 p.From.Offset = v.AuxInt 485 p.To.Type = obj.TYPE_REG 486 p.To.Reg = v.Reg0() 487 488 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 489 r := v.Reg() 490 a := v.Args[0].Reg() 491 if r == a { 492 switch v.AuxInt { 493 case 1: 494 var asm obj.As 495 // Software optimization manual recommends add $1,reg. 496 // But inc/dec is 1 byte smaller. ICC always uses inc 497 // Clang/GCC choose depending on flags, but prefer add. 498 // Experiments show that inc/dec is both a little faster 499 // and make a binary a little smaller. 500 if v.Op == ssa.OpAMD64ADDQconst { 501 asm = x86.AINCQ 502 } else { 503 asm = x86.AINCL 504 } 505 p := s.Prog(asm) 506 p.To.Type = obj.TYPE_REG 507 p.To.Reg = r 508 return 509 case -1: 510 var asm obj.As 511 if v.Op == ssa.OpAMD64ADDQconst { 512 asm = x86.ADECQ 513 } else { 514 asm = x86.ADECL 515 } 516 p := s.Prog(asm) 517 p.To.Type = obj.TYPE_REG 518 p.To.Reg = r 519 return 520 case 0x80: 521 // 'SUBQ $-0x80, r' is shorter to encode than 522 // and functionally equivalent to 'ADDQ $0x80, r'. 523 asm := x86.ASUBL 524 if v.Op == ssa.OpAMD64ADDQconst { 525 asm = x86.ASUBQ 526 } 527 p := s.Prog(asm) 528 p.From.Type = obj.TYPE_CONST 529 p.From.Offset = -0x80 530 p.To.Type = obj.TYPE_REG 531 p.To.Reg = r 532 return 533 534 } 535 p := s.Prog(v.Op.Asm()) 536 p.From.Type = obj.TYPE_CONST 537 p.From.Offset = v.AuxInt 538 p.To.Type = obj.TYPE_REG 539 p.To.Reg = r 540 return 541 } 542 var asm obj.As 543 if v.Op == ssa.OpAMD64ADDQconst { 544 asm = x86.ALEAQ 545 } else { 546 asm = x86.ALEAL 547 } 548 p := s.Prog(asm) 549 p.From.Type = obj.TYPE_MEM 550 p.From.Reg = a 551 p.From.Offset = v.AuxInt 552 p.To.Type = obj.TYPE_REG 553 p.To.Reg = r 554 555 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ, 556 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT, 557 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE, 558 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT, 559 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE, 560 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE, 561 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI, 562 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS, 563 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC, 564 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS, 565 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF, 566 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF: 567 p := s.Prog(v.Op.Asm()) 568 p.From.Type = obj.TYPE_REG 569 p.From.Reg = v.Args[1].Reg() 570 p.To.Type = obj.TYPE_REG 571 p.To.Reg = v.Reg() 572 573 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF: 574 // Flag condition: ^ZERO || PARITY 575 // Generate: 576 // CMOV*NE SRC,DST 577 // CMOV*PS SRC,DST 578 p := s.Prog(v.Op.Asm()) 579 p.From.Type = obj.TYPE_REG 580 p.From.Reg = v.Args[1].Reg() 581 p.To.Type = obj.TYPE_REG 582 p.To.Reg = v.Reg() 583 var q *obj.Prog 584 if v.Op == ssa.OpAMD64CMOVQNEF { 585 q = s.Prog(x86.ACMOVQPS) 586 } else if v.Op == ssa.OpAMD64CMOVLNEF { 587 q = s.Prog(x86.ACMOVLPS) 588 } else { 589 q = s.Prog(x86.ACMOVWPS) 590 } 591 q.From.Type = obj.TYPE_REG 592 q.From.Reg = v.Args[1].Reg() 593 q.To.Type = obj.TYPE_REG 594 q.To.Reg = v.Reg() 595 596 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF: 597 // Flag condition: ZERO && !PARITY 598 // Generate: 599 // MOV SRC,TMP 600 // CMOV*NE DST,TMP 601 // CMOV*PC TMP,DST 602 // 603 // TODO(rasky): we could generate: 604 // CMOV*NE DST,SRC 605 // CMOV*PC SRC,DST 606 // But this requires a way for regalloc to know that SRC might be 607 // clobbered by this instruction. 608 t := v.RegTmp() 609 opregreg(s, moveByType(v.Type), t, v.Args[1].Reg()) 610 611 p := s.Prog(v.Op.Asm()) 612 p.From.Type = obj.TYPE_REG 613 p.From.Reg = v.Reg() 614 p.To.Type = obj.TYPE_REG 615 p.To.Reg = t 616 var q *obj.Prog 617 if v.Op == ssa.OpAMD64CMOVQEQF { 618 q = s.Prog(x86.ACMOVQPC) 619 } else if v.Op == ssa.OpAMD64CMOVLEQF { 620 q = s.Prog(x86.ACMOVLPC) 621 } else { 622 q = s.Prog(x86.ACMOVWPC) 623 } 624 q.From.Type = obj.TYPE_REG 625 q.From.Reg = t 626 q.To.Type = obj.TYPE_REG 627 q.To.Reg = v.Reg() 628 629 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 630 r := v.Reg() 631 p := s.Prog(v.Op.Asm()) 632 p.From.Type = obj.TYPE_CONST 633 p.From.Offset = v.AuxInt 634 p.To.Type = obj.TYPE_REG 635 p.To.Reg = r 636 p.AddRestSourceReg(v.Args[0].Reg()) 637 638 case ssa.OpAMD64ANDQconst: 639 asm := v.Op.Asm() 640 // If the constant is positive and fits into 32 bits, use ANDL. 641 // This saves a few bytes of encoding. 642 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 643 asm = x86.AANDL 644 } 645 p := s.Prog(asm) 646 p.From.Type = obj.TYPE_CONST 647 p.From.Offset = v.AuxInt 648 p.To.Type = obj.TYPE_REG 649 p.To.Reg = v.Reg() 650 651 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 652 ssa.OpAMD64ANDLconst, 653 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 654 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 655 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 656 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 657 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 658 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 659 p := s.Prog(v.Op.Asm()) 660 p.From.Type = obj.TYPE_CONST 661 p.From.Offset = v.AuxInt 662 p.To.Type = obj.TYPE_REG 663 p.To.Reg = v.Reg() 664 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 665 r := v.Reg() 666 p := s.Prog(v.Op.Asm()) 667 p.From.Type = obj.TYPE_REG 668 p.From.Reg = r 669 p.To.Type = obj.TYPE_REG 670 p.To.Reg = r 671 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8, 672 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8, 673 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 674 p := s.Prog(v.Op.Asm()) 675 memIdx(&p.From, v) 676 o := v.Reg() 677 p.To.Type = obj.TYPE_REG 678 p.To.Reg = o 679 if v.AuxInt != 0 && v.Aux == nil { 680 // Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA. 681 switch v.Op { 682 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 683 p = s.Prog(x86.ALEAQ) 684 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8: 685 p = s.Prog(x86.ALEAL) 686 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8: 687 p = s.Prog(x86.ALEAW) 688 } 689 p.From.Type = obj.TYPE_MEM 690 p.From.Reg = o 691 p.To.Type = obj.TYPE_REG 692 p.To.Reg = o 693 } 694 ssagen.AddAux(&p.From, v) 695 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW: 696 p := s.Prog(v.Op.Asm()) 697 p.From.Type = obj.TYPE_MEM 698 p.From.Reg = v.Args[0].Reg() 699 ssagen.AddAux(&p.From, v) 700 p.To.Type = obj.TYPE_REG 701 p.To.Reg = v.Reg() 702 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 703 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 704 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 705 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 706 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 707 // Go assembler has swapped operands for UCOMISx relative to CMP, 708 // must account for that right here. 709 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 710 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 711 p := s.Prog(v.Op.Asm()) 712 p.From.Type = obj.TYPE_REG 713 p.From.Reg = v.Args[0].Reg() 714 p.To.Type = obj.TYPE_CONST 715 p.To.Offset = v.AuxInt 716 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst, 717 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 718 ssa.OpAMD64BTSQconst, 719 ssa.OpAMD64BTCQconst, 720 ssa.OpAMD64BTRQconst: 721 op := v.Op 722 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 { 723 // Emit 32-bit version because it's shorter 724 op = ssa.OpAMD64BTLconst 725 } 726 p := s.Prog(op.Asm()) 727 p.From.Type = obj.TYPE_CONST 728 p.From.Offset = v.AuxInt 729 p.To.Type = obj.TYPE_REG 730 p.To.Reg = v.Args[0].Reg() 731 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload: 732 p := s.Prog(v.Op.Asm()) 733 p.From.Type = obj.TYPE_MEM 734 p.From.Reg = v.Args[0].Reg() 735 ssagen.AddAux(&p.From, v) 736 p.To.Type = obj.TYPE_REG 737 p.To.Reg = v.Args[1].Reg() 738 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload: 739 sc := v.AuxValAndOff() 740 p := s.Prog(v.Op.Asm()) 741 p.From.Type = obj.TYPE_MEM 742 p.From.Reg = v.Args[0].Reg() 743 ssagen.AddAux2(&p.From, v, sc.Off64()) 744 p.To.Type = obj.TYPE_CONST 745 p.To.Offset = sc.Val64() 746 case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1: 747 p := s.Prog(v.Op.Asm()) 748 memIdx(&p.From, v) 749 ssagen.AddAux(&p.From, v) 750 p.To.Type = obj.TYPE_REG 751 p.To.Reg = v.Args[2].Reg() 752 case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1: 753 sc := v.AuxValAndOff() 754 p := s.Prog(v.Op.Asm()) 755 memIdx(&p.From, v) 756 ssagen.AddAux2(&p.From, v, sc.Off64()) 757 p.To.Type = obj.TYPE_CONST 758 p.To.Offset = sc.Val64() 759 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 760 x := v.Reg() 761 762 // If flags aren't live (indicated by v.Aux == nil), 763 // then we can rewrite MOV $0, AX into XOR AX, AX. 764 if v.AuxInt == 0 && v.Aux == nil { 765 opregreg(s, x86.AXORL, x, x) 766 break 767 } 768 769 asm := v.Op.Asm() 770 // Use MOVL to move a small constant into a register 771 // when the constant is positive and fits into 32 bits. 772 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 773 // The upper 32bit are zeroed automatically when using MOVL. 774 asm = x86.AMOVL 775 } 776 p := s.Prog(asm) 777 p.From.Type = obj.TYPE_CONST 778 p.From.Offset = v.AuxInt 779 p.To.Type = obj.TYPE_REG 780 p.To.Reg = x 781 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 782 x := v.Reg() 783 p := s.Prog(v.Op.Asm()) 784 p.From.Type = obj.TYPE_FCONST 785 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 786 p.To.Type = obj.TYPE_REG 787 p.To.Reg = x 788 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload, 789 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, 790 ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload: 791 p := s.Prog(v.Op.Asm()) 792 p.From.Type = obj.TYPE_MEM 793 p.From.Reg = v.Args[0].Reg() 794 ssagen.AddAux(&p.From, v) 795 p.To.Type = obj.TYPE_REG 796 p.To.Reg = v.Reg() 797 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1, 798 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2, 799 ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8: 800 p := s.Prog(v.Op.Asm()) 801 memIdx(&p.From, v) 802 ssagen.AddAux(&p.From, v) 803 p.To.Type = obj.TYPE_REG 804 p.To.Reg = v.Reg() 805 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, 806 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, 807 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify, 808 ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore: 809 p := s.Prog(v.Op.Asm()) 810 p.From.Type = obj.TYPE_REG 811 p.From.Reg = v.Args[1].Reg() 812 p.To.Type = obj.TYPE_MEM 813 p.To.Reg = v.Args[0].Reg() 814 ssagen.AddAux(&p.To, v) 815 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1, 816 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2, 817 ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8, 818 ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8, 819 ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8, 820 ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8, 821 ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8, 822 ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8: 823 p := s.Prog(v.Op.Asm()) 824 p.From.Type = obj.TYPE_REG 825 p.From.Reg = v.Args[2].Reg() 826 memIdx(&p.To, v) 827 ssagen.AddAux(&p.To, v) 828 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify: 829 sc := v.AuxValAndOff() 830 off := sc.Off64() 831 val := sc.Val() 832 if val == 1 || val == -1 { 833 var asm obj.As 834 if v.Op == ssa.OpAMD64ADDQconstmodify { 835 if val == 1 { 836 asm = x86.AINCQ 837 } else { 838 asm = x86.ADECQ 839 } 840 } else { 841 if val == 1 { 842 asm = x86.AINCL 843 } else { 844 asm = x86.ADECL 845 } 846 } 847 p := s.Prog(asm) 848 p.To.Type = obj.TYPE_MEM 849 p.To.Reg = v.Args[0].Reg() 850 ssagen.AddAux2(&p.To, v, off) 851 break 852 } 853 fallthrough 854 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, 855 ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify, 856 ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify: 857 sc := v.AuxValAndOff() 858 off := sc.Off64() 859 val := sc.Val64() 860 p := s.Prog(v.Op.Asm()) 861 p.From.Type = obj.TYPE_CONST 862 p.From.Offset = val 863 p.To.Type = obj.TYPE_MEM 864 p.To.Reg = v.Args[0].Reg() 865 ssagen.AddAux2(&p.To, v, off) 866 867 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 868 p := s.Prog(v.Op.Asm()) 869 p.From.Type = obj.TYPE_CONST 870 sc := v.AuxValAndOff() 871 p.From.Offset = sc.Val64() 872 p.To.Type = obj.TYPE_MEM 873 p.To.Reg = v.Args[0].Reg() 874 ssagen.AddAux2(&p.To, v, sc.Off64()) 875 case ssa.OpAMD64MOVOstoreconst: 876 sc := v.AuxValAndOff() 877 if sc.Val() != 0 { 878 v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString()) 879 } 880 881 if s.ABI != obj.ABIInternal { 882 // zero X15 manually 883 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 884 } 885 p := s.Prog(v.Op.Asm()) 886 p.From.Type = obj.TYPE_REG 887 p.From.Reg = x86.REG_X15 888 p.To.Type = obj.TYPE_MEM 889 p.To.Reg = v.Args[0].Reg() 890 ssagen.AddAux2(&p.To, v, sc.Off64()) 891 892 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1, 893 ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8, 894 ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8, 895 ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8, 896 ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8: 897 p := s.Prog(v.Op.Asm()) 898 p.From.Type = obj.TYPE_CONST 899 sc := v.AuxValAndOff() 900 p.From.Offset = sc.Val64() 901 switch { 902 case p.As == x86.AADDQ && p.From.Offset == 1: 903 p.As = x86.AINCQ 904 p.From.Type = obj.TYPE_NONE 905 case p.As == x86.AADDQ && p.From.Offset == -1: 906 p.As = x86.ADECQ 907 p.From.Type = obj.TYPE_NONE 908 case p.As == x86.AADDL && p.From.Offset == 1: 909 p.As = x86.AINCL 910 p.From.Type = obj.TYPE_NONE 911 case p.As == x86.AADDL && p.From.Offset == -1: 912 p.As = x86.ADECL 913 p.From.Type = obj.TYPE_NONE 914 } 915 memIdx(&p.To, v) 916 ssagen.AddAux2(&p.To, v, sc.Off64()) 917 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 918 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 919 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 920 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 921 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 922 r := v.Reg() 923 // Break false dependency on destination register. 924 opregreg(s, x86.AXORPS, r, r) 925 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 926 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 927 var p *obj.Prog 928 switch v.Op { 929 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: 930 p = s.Prog(x86.AMOVQ) 931 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 932 p = s.Prog(x86.AMOVL) 933 } 934 p.From.Type = obj.TYPE_REG 935 p.From.Reg = v.Args[0].Reg() 936 p.To.Type = obj.TYPE_REG 937 p.To.Reg = v.Reg() 938 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload, 939 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload, 940 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload, 941 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload, 942 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload: 943 p := s.Prog(v.Op.Asm()) 944 p.From.Type = obj.TYPE_MEM 945 p.From.Reg = v.Args[1].Reg() 946 ssagen.AddAux(&p.From, v) 947 p.To.Type = obj.TYPE_REG 948 p.To.Reg = v.Reg() 949 case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8, 950 ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8, 951 ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8, 952 ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8, 953 ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8, 954 ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8, 955 ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8, 956 ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8, 957 ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8: 958 p := s.Prog(v.Op.Asm()) 959 960 r, i := v.Args[1].Reg(), v.Args[2].Reg() 961 p.From.Type = obj.TYPE_MEM 962 p.From.Scale = v.Op.Scale() 963 if p.From.Scale == 1 && i == x86.REG_SP { 964 r, i = i, r 965 } 966 p.From.Reg = r 967 p.From.Index = i 968 969 ssagen.AddAux(&p.From, v) 970 p.To.Type = obj.TYPE_REG 971 p.To.Reg = v.Reg() 972 case ssa.OpAMD64DUFFZERO: 973 if s.ABI != obj.ABIInternal { 974 // zero X15 manually 975 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 976 } 977 off := duffStart(v.AuxInt) 978 adj := duffAdj(v.AuxInt) 979 var p *obj.Prog 980 if adj != 0 { 981 p = s.Prog(x86.ALEAQ) 982 p.From.Type = obj.TYPE_MEM 983 p.From.Offset = adj 984 p.From.Reg = x86.REG_DI 985 p.To.Type = obj.TYPE_REG 986 p.To.Reg = x86.REG_DI 987 } 988 p = s.Prog(obj.ADUFFZERO) 989 p.To.Type = obj.TYPE_ADDR 990 p.To.Sym = ir.Syms.Duffzero 991 p.To.Offset = off 992 case ssa.OpAMD64DUFFCOPY: 993 p := s.Prog(obj.ADUFFCOPY) 994 p.To.Type = obj.TYPE_ADDR 995 p.To.Sym = ir.Syms.Duffcopy 996 if v.AuxInt%16 != 0 { 997 v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt) 998 } 999 p.To.Offset = 14 * (64 - v.AuxInt/16) 1000 // 14 and 64 are magic constants. 14 is the number of bytes to encode: 1001 // MOVUPS (SI), X0 1002 // ADDQ $16, SI 1003 // MOVUPS X0, (DI) 1004 // ADDQ $16, DI 1005 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy. 1006 1007 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 1008 if v.Type.IsMemory() { 1009 return 1010 } 1011 x := v.Args[0].Reg() 1012 y := v.Reg() 1013 if x != y { 1014 opregreg(s, moveByType(v.Type), y, x) 1015 } 1016 case ssa.OpLoadReg: 1017 if v.Type.IsFlags() { 1018 v.Fatalf("load flags not implemented: %v", v.LongString()) 1019 return 1020 } 1021 p := s.Prog(loadByType(v.Type)) 1022 ssagen.AddrAuto(&p.From, v.Args[0]) 1023 p.To.Type = obj.TYPE_REG 1024 p.To.Reg = v.Reg() 1025 1026 case ssa.OpStoreReg: 1027 if v.Type.IsFlags() { 1028 v.Fatalf("store flags not implemented: %v", v.LongString()) 1029 return 1030 } 1031 p := s.Prog(storeByType(v.Type)) 1032 p.From.Type = obj.TYPE_REG 1033 p.From.Reg = v.Args[0].Reg() 1034 ssagen.AddrAuto(&p.To, v) 1035 case ssa.OpAMD64LoweredHasCPUFeature: 1036 p := s.Prog(x86.AMOVBLZX) 1037 p.From.Type = obj.TYPE_MEM 1038 ssagen.AddAux(&p.From, v) 1039 p.To.Type = obj.TYPE_REG 1040 p.To.Reg = v.Reg() 1041 case ssa.OpArgIntReg, ssa.OpArgFloatReg: 1042 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill 1043 // The loop only runs once. 1044 for _, ap := range v.Block.Func.RegArgs { 1045 // Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack. 1046 addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize) 1047 s.FuncInfo().AddSpill( 1048 obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)}) 1049 } 1050 v.Block.Func.RegArgs = nil 1051 ssagen.CheckArgReg(v) 1052 case ssa.OpAMD64LoweredGetClosurePtr: 1053 // Closure pointer is DX. 1054 ssagen.CheckLoweredGetClosurePtr(v) 1055 case ssa.OpAMD64LoweredGetG: 1056 if s.ABI == obj.ABIInternal { 1057 v.Fatalf("LoweredGetG should not appear in ABIInternal") 1058 } 1059 r := v.Reg() 1060 getgFromTLS(s, r) 1061 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail: 1062 if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal { 1063 // zeroing X15 when entering ABIInternal from ABI0 1064 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9 1065 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 1066 } 1067 // set G register from TLS 1068 getgFromTLS(s, x86.REG_R14) 1069 } 1070 if v.Op == ssa.OpAMD64CALLtail { 1071 s.TailCall(v) 1072 break 1073 } 1074 s.Call(v) 1075 if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 { 1076 // zeroing X15 when entering ABIInternal from ABI0 1077 if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9 1078 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) 1079 } 1080 // set G register from TLS 1081 getgFromTLS(s, x86.REG_R14) 1082 } 1083 case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 1084 s.Call(v) 1085 1086 case ssa.OpAMD64LoweredGetCallerPC: 1087 p := s.Prog(x86.AMOVQ) 1088 p.From.Type = obj.TYPE_MEM 1089 p.From.Offset = -8 // PC is stored 8 bytes below first parameter. 1090 p.From.Name = obj.NAME_PARAM 1091 p.To.Type = obj.TYPE_REG 1092 p.To.Reg = v.Reg() 1093 1094 case ssa.OpAMD64LoweredGetCallerSP: 1095 // caller's SP is the address of the first arg 1096 mov := x86.AMOVQ 1097 if types.PtrSize == 4 { 1098 mov = x86.AMOVL 1099 } 1100 p := s.Prog(mov) 1101 p.From.Type = obj.TYPE_ADDR 1102 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures 1103 p.From.Name = obj.NAME_PARAM 1104 p.To.Type = obj.TYPE_REG 1105 p.To.Reg = v.Reg() 1106 1107 case ssa.OpAMD64LoweredWB: 1108 p := s.Prog(obj.ACALL) 1109 p.To.Type = obj.TYPE_MEM 1110 p.To.Name = obj.NAME_EXTERN 1111 // AuxInt encodes how many buffer entries we need. 1112 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1] 1113 1114 case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC: 1115 p := s.Prog(obj.ACALL) 1116 p.To.Type = obj.TYPE_MEM 1117 p.To.Name = obj.NAME_EXTERN 1118 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] 1119 s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs 1120 1121 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 1122 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 1123 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 1124 p := s.Prog(v.Op.Asm()) 1125 p.To.Type = obj.TYPE_REG 1126 p.To.Reg = v.Reg() 1127 1128 case ssa.OpAMD64NEGLflags: 1129 p := s.Prog(v.Op.Asm()) 1130 p.To.Type = obj.TYPE_REG 1131 p.To.Reg = v.Reg0() 1132 1133 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS: 1134 p := s.Prog(v.Op.Asm()) 1135 p.From.Type = obj.TYPE_REG 1136 p.From.Reg = v.Args[0].Reg() 1137 p.To.Type = obj.TYPE_REG 1138 switch v.Op { 1139 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ: 1140 p.To.Reg = v.Reg0() 1141 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS: 1142 p.To.Reg = v.Reg() 1143 } 1144 case ssa.OpAMD64ROUNDSD: 1145 p := s.Prog(v.Op.Asm()) 1146 val := v.AuxInt 1147 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc 1148 if val < 0 || val > 3 { 1149 v.Fatalf("Invalid rounding mode") 1150 } 1151 p.From.Offset = val 1152 p.From.Type = obj.TYPE_CONST 1153 p.AddRestSourceReg(v.Args[0].Reg()) 1154 p.To.Type = obj.TYPE_REG 1155 p.To.Reg = v.Reg() 1156 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL, 1157 ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL, 1158 ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL: 1159 if v.Args[0].Reg() != v.Reg() { 1160 // POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus. 1161 // TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7. 1162 // Xor register with itself to break the dependency. 1163 opregreg(s, x86.AXORL, v.Reg(), v.Reg()) 1164 } 1165 p := s.Prog(v.Op.Asm()) 1166 p.From.Type = obj.TYPE_REG 1167 p.From.Reg = v.Args[0].Reg() 1168 p.To.Type = obj.TYPE_REG 1169 p.To.Reg = v.Reg() 1170 1171 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 1172 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 1173 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 1174 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 1175 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 1176 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 1177 ssa.OpAMD64SETA, ssa.OpAMD64SETAE, 1178 ssa.OpAMD64SETO: 1179 p := s.Prog(v.Op.Asm()) 1180 p.To.Type = obj.TYPE_REG 1181 p.To.Reg = v.Reg() 1182 1183 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore, 1184 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore, 1185 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore, 1186 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore, 1187 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore: 1188 p := s.Prog(v.Op.Asm()) 1189 p.To.Type = obj.TYPE_MEM 1190 p.To.Reg = v.Args[0].Reg() 1191 ssagen.AddAux(&p.To, v) 1192 1193 case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1, 1194 ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1, 1195 ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1, 1196 ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1, 1197 ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1: 1198 p := s.Prog(v.Op.Asm()) 1199 memIdx(&p.To, v) 1200 ssagen.AddAux(&p.To, v) 1201 1202 case ssa.OpAMD64SETNEF: 1203 t := v.RegTmp() 1204 p := s.Prog(v.Op.Asm()) 1205 p.To.Type = obj.TYPE_REG 1206 p.To.Reg = v.Reg() 1207 q := s.Prog(x86.ASETPS) 1208 q.To.Type = obj.TYPE_REG 1209 q.To.Reg = t 1210 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 1211 opregreg(s, x86.AORL, v.Reg(), t) 1212 1213 case ssa.OpAMD64SETEQF: 1214 t := v.RegTmp() 1215 p := s.Prog(v.Op.Asm()) 1216 p.To.Type = obj.TYPE_REG 1217 p.To.Reg = v.Reg() 1218 q := s.Prog(x86.ASETPC) 1219 q.To.Type = obj.TYPE_REG 1220 q.To.Reg = t 1221 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 1222 opregreg(s, x86.AANDL, v.Reg(), t) 1223 1224 case ssa.OpAMD64InvertFlags: 1225 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1226 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 1227 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1228 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 1229 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 1230 case ssa.OpAMD64REPSTOSQ: 1231 s.Prog(x86.AREP) 1232 s.Prog(x86.ASTOSQ) 1233 case ssa.OpAMD64REPMOVSQ: 1234 s.Prog(x86.AREP) 1235 s.Prog(x86.AMOVSQ) 1236 case ssa.OpAMD64LoweredNilCheck: 1237 // Issue a load which will fault if the input is nil. 1238 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 1239 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 1240 // but it doesn't have false dependency on AX. 1241 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 1242 // That trades clobbering flags for clobbering a register. 1243 p := s.Prog(x86.ATESTB) 1244 p.From.Type = obj.TYPE_REG 1245 p.From.Reg = x86.REG_AX 1246 p.To.Type = obj.TYPE_MEM 1247 p.To.Reg = v.Args[0].Reg() 1248 if logopt.Enabled() { 1249 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 1250 } 1251 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1252 base.WarnfAt(v.Pos, "generated nil check") 1253 } 1254 case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 1255 p := s.Prog(v.Op.Asm()) 1256 p.From.Type = obj.TYPE_MEM 1257 p.From.Reg = v.Args[0].Reg() 1258 ssagen.AddAux(&p.From, v) 1259 p.To.Type = obj.TYPE_REG 1260 p.To.Reg = v.Reg0() 1261 case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 1262 p := s.Prog(v.Op.Asm()) 1263 p.From.Type = obj.TYPE_REG 1264 p.From.Reg = v.Reg0() 1265 p.To.Type = obj.TYPE_MEM 1266 p.To.Reg = v.Args[1].Reg() 1267 ssagen.AddAux(&p.To, v) 1268 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 1269 s.Prog(x86.ALOCK) 1270 p := s.Prog(v.Op.Asm()) 1271 p.From.Type = obj.TYPE_REG 1272 p.From.Reg = v.Reg0() 1273 p.To.Type = obj.TYPE_MEM 1274 p.To.Reg = v.Args[1].Reg() 1275 ssagen.AddAux(&p.To, v) 1276 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 1277 if v.Args[1].Reg() != x86.REG_AX { 1278 v.Fatalf("input[1] not in AX %s", v.LongString()) 1279 } 1280 s.Prog(x86.ALOCK) 1281 p := s.Prog(v.Op.Asm()) 1282 p.From.Type = obj.TYPE_REG 1283 p.From.Reg = v.Args[2].Reg() 1284 p.To.Type = obj.TYPE_MEM 1285 p.To.Reg = v.Args[0].Reg() 1286 ssagen.AddAux(&p.To, v) 1287 p = s.Prog(x86.ASETEQ) 1288 p.To.Type = obj.TYPE_REG 1289 p.To.Reg = v.Reg0() 1290 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock: 1291 s.Prog(x86.ALOCK) 1292 p := s.Prog(v.Op.Asm()) 1293 p.From.Type = obj.TYPE_REG 1294 p.From.Reg = v.Args[1].Reg() 1295 p.To.Type = obj.TYPE_MEM 1296 p.To.Reg = v.Args[0].Reg() 1297 ssagen.AddAux(&p.To, v) 1298 case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA: 1299 p := s.Prog(v.Op.Asm()) 1300 p.From.Type = obj.TYPE_MEM 1301 p.From.Reg = v.Args[0].Reg() 1302 case ssa.OpClobber: 1303 p := s.Prog(x86.AMOVL) 1304 p.From.Type = obj.TYPE_CONST 1305 p.From.Offset = 0xdeaddead 1306 p.To.Type = obj.TYPE_MEM 1307 p.To.Reg = x86.REG_SP 1308 ssagen.AddAux(&p.To, v) 1309 p = s.Prog(x86.AMOVL) 1310 p.From.Type = obj.TYPE_CONST 1311 p.From.Offset = 0xdeaddead 1312 p.To.Type = obj.TYPE_MEM 1313 p.To.Reg = x86.REG_SP 1314 ssagen.AddAux(&p.To, v) 1315 p.To.Offset += 4 1316 case ssa.OpClobberReg: 1317 x := uint64(0xdeaddeaddeaddead) 1318 p := s.Prog(x86.AMOVQ) 1319 p.From.Type = obj.TYPE_CONST 1320 p.From.Offset = int64(x) 1321 p.To.Type = obj.TYPE_REG 1322 p.To.Reg = v.Reg() 1323 default: 1324 v.Fatalf("genValue not implemented: %s", v.LongString()) 1325 } 1326 } 1327 1328 var blockJump = [...]struct { 1329 asm, invasm obj.As 1330 }{ 1331 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1332 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1333 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1334 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1335 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1336 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1337 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC}, 1338 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS}, 1339 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1340 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1341 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1342 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1343 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1344 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1345 } 1346 1347 var eqfJumps = [2][2]ssagen.IndexJump{ 1348 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1349 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1350 } 1351 var nefJumps = [2][2]ssagen.IndexJump{ 1352 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1353 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1354 } 1355 1356 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { 1357 switch b.Kind { 1358 case ssa.BlockPlain: 1359 if b.Succs[0].Block() != next { 1360 p := s.Prog(obj.AJMP) 1361 p.To.Type = obj.TYPE_BRANCH 1362 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 1363 } 1364 case ssa.BlockDefer: 1365 // defer returns in rax: 1366 // 0 if we should continue executing 1367 // 1 if we should jump to deferreturn call 1368 p := s.Prog(x86.ATESTL) 1369 p.From.Type = obj.TYPE_REG 1370 p.From.Reg = x86.REG_AX 1371 p.To.Type = obj.TYPE_REG 1372 p.To.Reg = x86.REG_AX 1373 p = s.Prog(x86.AJNE) 1374 p.To.Type = obj.TYPE_BRANCH 1375 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) 1376 if b.Succs[0].Block() != next { 1377 p := s.Prog(obj.AJMP) 1378 p.To.Type = obj.TYPE_BRANCH 1379 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 1380 } 1381 case ssa.BlockExit, ssa.BlockRetJmp: 1382 case ssa.BlockRet: 1383 s.Prog(obj.ARET) 1384 1385 case ssa.BlockAMD64EQF: 1386 s.CombJump(b, next, &eqfJumps) 1387 1388 case ssa.BlockAMD64NEF: 1389 s.CombJump(b, next, &nefJumps) 1390 1391 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1392 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1393 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1394 ssa.BlockAMD64OS, ssa.BlockAMD64OC, 1395 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1396 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1397 jmp := blockJump[b.Kind] 1398 switch next { 1399 case b.Succs[0].Block(): 1400 s.Br(jmp.invasm, b.Succs[1].Block()) 1401 case b.Succs[1].Block(): 1402 s.Br(jmp.asm, b.Succs[0].Block()) 1403 default: 1404 if b.Likely != ssa.BranchUnlikely { 1405 s.Br(jmp.asm, b.Succs[0].Block()) 1406 s.Br(obj.AJMP, b.Succs[1].Block()) 1407 } else { 1408 s.Br(jmp.invasm, b.Succs[1].Block()) 1409 s.Br(obj.AJMP, b.Succs[0].Block()) 1410 } 1411 } 1412 1413 case ssa.BlockAMD64JUMPTABLE: 1414 // JMP *(TABLE)(INDEX*8) 1415 p := s.Prog(obj.AJMP) 1416 p.To.Type = obj.TYPE_MEM 1417 p.To.Reg = b.Controls[1].Reg() 1418 p.To.Index = b.Controls[0].Reg() 1419 p.To.Scale = 8 1420 // Save jump tables for later resolution of the target blocks. 1421 s.JumpTables = append(s.JumpTables, b) 1422 1423 default: 1424 b.Fatalf("branch not implemented: %s", b.LongString()) 1425 } 1426 } 1427 1428 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 1429 p := s.Prog(loadByType(t)) 1430 p.From.Type = obj.TYPE_MEM 1431 p.From.Name = obj.NAME_AUTO 1432 p.From.Sym = n.Linksym() 1433 p.From.Offset = n.FrameOffset() + off 1434 p.To.Type = obj.TYPE_REG 1435 p.To.Reg = reg 1436 return p 1437 } 1438 1439 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 1440 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) 1441 p.To.Name = obj.NAME_PARAM 1442 p.To.Sym = n.Linksym() 1443 p.Pos = p.Pos.WithNotStmt() 1444 return p 1445 }