github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/x86/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package x86 6 7 import ( 8 "fmt" 9 "math" 10 11 "github.com/go-asm/go/cmd/compile/base" 12 "github.com/go-asm/go/cmd/compile/ir" 13 "github.com/go-asm/go/cmd/compile/logopt" 14 "github.com/go-asm/go/cmd/compile/ssa" 15 "github.com/go-asm/go/cmd/compile/ssagen" 16 "github.com/go-asm/go/cmd/compile/types" 17 "github.com/go-asm/go/cmd/obj" 18 "github.com/go-asm/go/cmd/obj/x86" 19 ) 20 21 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. 22 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) { 23 flive := b.FlagsLiveAtEnd 24 for _, c := range b.ControlValues() { 25 flive = c.Type.IsFlags() || flive 26 } 27 for i := len(b.Values) - 1; i >= 0; i-- { 28 v := b.Values[i] 29 if flive && v.Op == ssa.Op386MOVLconst { 30 // The "mark" is any non-nil Aux value. 31 v.Aux = ssa.AuxMark 32 } 33 if v.Type.IsFlags() { 34 flive = false 35 } 36 for _, a := range v.Args { 37 if a.Type.IsFlags() { 38 flive = true 39 } 40 } 41 } 42 } 43 44 // loadByType returns the load instruction of the given type. 45 func loadByType(t *types.Type) obj.As { 46 // Avoid partial register write 47 if !t.IsFloat() { 48 switch t.Size() { 49 case 1: 50 return x86.AMOVBLZX 51 case 2: 52 return x86.AMOVWLZX 53 } 54 } 55 // Otherwise, there's no difference between load and store opcodes. 56 return storeByType(t) 57 } 58 59 // storeByType returns the store instruction of the given type. 60 func storeByType(t *types.Type) obj.As { 61 width := t.Size() 62 if t.IsFloat() { 63 switch width { 64 case 4: 65 return x86.AMOVSS 66 case 8: 67 return x86.AMOVSD 68 } 69 } else { 70 switch width { 71 case 1: 72 return x86.AMOVB 73 case 2: 74 return x86.AMOVW 75 case 4: 76 return x86.AMOVL 77 } 78 } 79 panic("bad store type") 80 } 81 82 // moveByType returns the reg->reg move instruction of the given type. 83 func moveByType(t *types.Type) obj.As { 84 if t.IsFloat() { 85 switch t.Size() { 86 case 4: 87 return x86.AMOVSS 88 case 8: 89 return x86.AMOVSD 90 default: 91 panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t)) 92 } 93 } else { 94 switch t.Size() { 95 case 1: 96 // Avoids partial register write 97 return x86.AMOVL 98 case 2: 99 return x86.AMOVL 100 case 4: 101 return x86.AMOVL 102 default: 103 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 104 } 105 } 106 } 107 108 // opregreg emits instructions for 109 // 110 // dest := dest(To) op src(From) 111 // 112 // and also returns the created obj.Prog so it 113 // may be further adjusted (offset, scale, etc). 114 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog { 115 p := s.Prog(op) 116 p.From.Type = obj.TYPE_REG 117 p.To.Type = obj.TYPE_REG 118 p.To.Reg = dest 119 p.From.Reg = src 120 return p 121 } 122 123 func ssaGenValue(s *ssagen.State, v *ssa.Value) { 124 switch v.Op { 125 case ssa.Op386ADDL: 126 r := v.Reg() 127 r1 := v.Args[0].Reg() 128 r2 := v.Args[1].Reg() 129 switch { 130 case r == r1: 131 p := s.Prog(v.Op.Asm()) 132 p.From.Type = obj.TYPE_REG 133 p.From.Reg = r2 134 p.To.Type = obj.TYPE_REG 135 p.To.Reg = r 136 case r == r2: 137 p := s.Prog(v.Op.Asm()) 138 p.From.Type = obj.TYPE_REG 139 p.From.Reg = r1 140 p.To.Type = obj.TYPE_REG 141 p.To.Reg = r 142 default: 143 p := s.Prog(x86.ALEAL) 144 p.From.Type = obj.TYPE_MEM 145 p.From.Reg = r1 146 p.From.Scale = 1 147 p.From.Index = r2 148 p.To.Type = obj.TYPE_REG 149 p.To.Reg = r 150 } 151 152 // 2-address opcode arithmetic 153 case ssa.Op386SUBL, 154 ssa.Op386MULL, 155 ssa.Op386ANDL, 156 ssa.Op386ORL, 157 ssa.Op386XORL, 158 ssa.Op386SHLL, 159 ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB, 160 ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB, 161 ssa.Op386ROLL, ssa.Op386ROLW, ssa.Op386ROLB, 162 ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD, 163 ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD, 164 ssa.Op386PXOR, 165 ssa.Op386ADCL, 166 ssa.Op386SBBL: 167 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) 168 169 case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry: 170 // output 0 is carry/borrow, output 1 is the low 32 bits. 171 opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg()) 172 173 case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry: 174 // output 0 is carry/borrow, output 1 is the low 32 bits. 175 p := s.Prog(v.Op.Asm()) 176 p.From.Type = obj.TYPE_CONST 177 p.From.Offset = v.AuxInt 178 p.To.Type = obj.TYPE_REG 179 p.To.Reg = v.Reg0() 180 181 case ssa.Op386DIVL, ssa.Op386DIVW, 182 ssa.Op386DIVLU, ssa.Op386DIVWU, 183 ssa.Op386MODL, ssa.Op386MODW, 184 ssa.Op386MODLU, ssa.Op386MODWU: 185 186 // Arg[0] is already in AX as it's the only register we allow 187 // and AX is the only output 188 x := v.Args[1].Reg() 189 190 // CPU faults upon signed overflow, which occurs when most 191 // negative int is divided by -1. 192 var j *obj.Prog 193 if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW || 194 v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW { 195 196 if ssa.DivisionNeedsFixUp(v) { 197 var c *obj.Prog 198 switch v.Op { 199 case ssa.Op386DIVL, ssa.Op386MODL: 200 c = s.Prog(x86.ACMPL) 201 j = s.Prog(x86.AJEQ) 202 203 case ssa.Op386DIVW, ssa.Op386MODW: 204 c = s.Prog(x86.ACMPW) 205 j = s.Prog(x86.AJEQ) 206 } 207 c.From.Type = obj.TYPE_REG 208 c.From.Reg = x 209 c.To.Type = obj.TYPE_CONST 210 c.To.Offset = -1 211 212 j.To.Type = obj.TYPE_BRANCH 213 } 214 // sign extend the dividend 215 switch v.Op { 216 case ssa.Op386DIVL, ssa.Op386MODL: 217 s.Prog(x86.ACDQ) 218 case ssa.Op386DIVW, ssa.Op386MODW: 219 s.Prog(x86.ACWD) 220 } 221 } 222 223 // for unsigned ints, we sign extend by setting DX = 0 224 // signed ints were sign extended above 225 if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU || 226 v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU { 227 c := s.Prog(x86.AXORL) 228 c.From.Type = obj.TYPE_REG 229 c.From.Reg = x86.REG_DX 230 c.To.Type = obj.TYPE_REG 231 c.To.Reg = x86.REG_DX 232 } 233 234 p := s.Prog(v.Op.Asm()) 235 p.From.Type = obj.TYPE_REG 236 p.From.Reg = x 237 238 // signed division, rest of the check for -1 case 239 if j != nil { 240 j2 := s.Prog(obj.AJMP) 241 j2.To.Type = obj.TYPE_BRANCH 242 243 var n *obj.Prog 244 if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW { 245 // n * -1 = -n 246 n = s.Prog(x86.ANEGL) 247 n.To.Type = obj.TYPE_REG 248 n.To.Reg = x86.REG_AX 249 } else { 250 // n % -1 == 0 251 n = s.Prog(x86.AXORL) 252 n.From.Type = obj.TYPE_REG 253 n.From.Reg = x86.REG_DX 254 n.To.Type = obj.TYPE_REG 255 n.To.Reg = x86.REG_DX 256 } 257 258 j.To.SetTarget(n) 259 j2.To.SetTarget(s.Pc()) 260 } 261 262 case ssa.Op386HMULL, ssa.Op386HMULLU: 263 // the frontend rewrites constant division by 8/16/32 bit integers into 264 // HMUL by a constant 265 // SSA rewrites generate the 64 bit versions 266 267 // Arg[0] is already in AX as it's the only register we allow 268 // and DX is the only output we care about (the high bits) 269 p := s.Prog(v.Op.Asm()) 270 p.From.Type = obj.TYPE_REG 271 p.From.Reg = v.Args[1].Reg() 272 273 // IMULB puts the high portion in AH instead of DL, 274 // so move it to DL for consistency 275 if v.Type.Size() == 1 { 276 m := s.Prog(x86.AMOVB) 277 m.From.Type = obj.TYPE_REG 278 m.From.Reg = x86.REG_AH 279 m.To.Type = obj.TYPE_REG 280 m.To.Reg = x86.REG_DX 281 } 282 283 case ssa.Op386MULLU: 284 // Arg[0] is already in AX as it's the only register we allow 285 // results lo in AX 286 p := s.Prog(v.Op.Asm()) 287 p.From.Type = obj.TYPE_REG 288 p.From.Reg = v.Args[1].Reg() 289 290 case ssa.Op386MULLQU: 291 // AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]). 292 p := s.Prog(v.Op.Asm()) 293 p.From.Type = obj.TYPE_REG 294 p.From.Reg = v.Args[1].Reg() 295 296 case ssa.Op386AVGLU: 297 // compute (x+y)/2 unsigned. 298 // Do a 32-bit add, the overflow goes into the carry. 299 // Shift right once and pull the carry back into the 31st bit. 300 p := s.Prog(x86.AADDL) 301 p.From.Type = obj.TYPE_REG 302 p.To.Type = obj.TYPE_REG 303 p.To.Reg = v.Reg() 304 p.From.Reg = v.Args[1].Reg() 305 p = s.Prog(x86.ARCRL) 306 p.From.Type = obj.TYPE_CONST 307 p.From.Offset = 1 308 p.To.Type = obj.TYPE_REG 309 p.To.Reg = v.Reg() 310 311 case ssa.Op386ADDLconst: 312 r := v.Reg() 313 a := v.Args[0].Reg() 314 if r == a { 315 if v.AuxInt == 1 { 316 p := s.Prog(x86.AINCL) 317 p.To.Type = obj.TYPE_REG 318 p.To.Reg = r 319 return 320 } 321 if v.AuxInt == -1 { 322 p := s.Prog(x86.ADECL) 323 p.To.Type = obj.TYPE_REG 324 p.To.Reg = r 325 return 326 } 327 p := s.Prog(v.Op.Asm()) 328 p.From.Type = obj.TYPE_CONST 329 p.From.Offset = v.AuxInt 330 p.To.Type = obj.TYPE_REG 331 p.To.Reg = r 332 return 333 } 334 p := s.Prog(x86.ALEAL) 335 p.From.Type = obj.TYPE_MEM 336 p.From.Reg = a 337 p.From.Offset = v.AuxInt 338 p.To.Type = obj.TYPE_REG 339 p.To.Reg = r 340 341 case ssa.Op386MULLconst: 342 r := v.Reg() 343 p := s.Prog(v.Op.Asm()) 344 p.From.Type = obj.TYPE_CONST 345 p.From.Offset = v.AuxInt 346 p.To.Type = obj.TYPE_REG 347 p.To.Reg = r 348 p.AddRestSourceReg(v.Args[0].Reg()) 349 350 case ssa.Op386SUBLconst, 351 ssa.Op386ADCLconst, 352 ssa.Op386SBBLconst, 353 ssa.Op386ANDLconst, 354 ssa.Op386ORLconst, 355 ssa.Op386XORLconst, 356 ssa.Op386SHLLconst, 357 ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst, 358 ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst, 359 ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst: 360 p := s.Prog(v.Op.Asm()) 361 p.From.Type = obj.TYPE_CONST 362 p.From.Offset = v.AuxInt 363 p.To.Type = obj.TYPE_REG 364 p.To.Reg = v.Reg() 365 case ssa.Op386SBBLcarrymask: 366 r := v.Reg() 367 p := s.Prog(v.Op.Asm()) 368 p.From.Type = obj.TYPE_REG 369 p.From.Reg = r 370 p.To.Type = obj.TYPE_REG 371 p.To.Reg = r 372 case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8: 373 r := v.Args[0].Reg() 374 i := v.Args[1].Reg() 375 p := s.Prog(x86.ALEAL) 376 switch v.Op { 377 case ssa.Op386LEAL1: 378 p.From.Scale = 1 379 if i == x86.REG_SP { 380 r, i = i, r 381 } 382 case ssa.Op386LEAL2: 383 p.From.Scale = 2 384 case ssa.Op386LEAL4: 385 p.From.Scale = 4 386 case ssa.Op386LEAL8: 387 p.From.Scale = 8 388 } 389 p.From.Type = obj.TYPE_MEM 390 p.From.Reg = r 391 p.From.Index = i 392 ssagen.AddAux(&p.From, v) 393 p.To.Type = obj.TYPE_REG 394 p.To.Reg = v.Reg() 395 case ssa.Op386LEAL: 396 p := s.Prog(x86.ALEAL) 397 p.From.Type = obj.TYPE_MEM 398 p.From.Reg = v.Args[0].Reg() 399 ssagen.AddAux(&p.From, v) 400 p.To.Type = obj.TYPE_REG 401 p.To.Reg = v.Reg() 402 case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB, 403 ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB: 404 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 405 case ssa.Op386UCOMISS, ssa.Op386UCOMISD: 406 // Go assembler has swapped operands for UCOMISx relative to CMP, 407 // must account for that right here. 408 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 409 case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst: 410 p := s.Prog(v.Op.Asm()) 411 p.From.Type = obj.TYPE_REG 412 p.From.Reg = v.Args[0].Reg() 413 p.To.Type = obj.TYPE_CONST 414 p.To.Offset = v.AuxInt 415 case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst: 416 p := s.Prog(v.Op.Asm()) 417 p.From.Type = obj.TYPE_CONST 418 p.From.Offset = v.AuxInt 419 p.To.Type = obj.TYPE_REG 420 p.To.Reg = v.Args[0].Reg() 421 case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload: 422 p := s.Prog(v.Op.Asm()) 423 p.From.Type = obj.TYPE_MEM 424 p.From.Reg = v.Args[0].Reg() 425 ssagen.AddAux(&p.From, v) 426 p.To.Type = obj.TYPE_REG 427 p.To.Reg = v.Args[1].Reg() 428 case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload: 429 sc := v.AuxValAndOff() 430 p := s.Prog(v.Op.Asm()) 431 p.From.Type = obj.TYPE_MEM 432 p.From.Reg = v.Args[0].Reg() 433 ssagen.AddAux2(&p.From, v, sc.Off64()) 434 p.To.Type = obj.TYPE_CONST 435 p.To.Offset = sc.Val64() 436 case ssa.Op386MOVLconst: 437 x := v.Reg() 438 439 // If flags aren't live (indicated by v.Aux == nil), 440 // then we can rewrite MOV $0, AX into XOR AX, AX. 441 if v.AuxInt == 0 && v.Aux == nil { 442 p := s.Prog(x86.AXORL) 443 p.From.Type = obj.TYPE_REG 444 p.From.Reg = x 445 p.To.Type = obj.TYPE_REG 446 p.To.Reg = x 447 break 448 } 449 450 p := s.Prog(v.Op.Asm()) 451 p.From.Type = obj.TYPE_CONST 452 p.From.Offset = v.AuxInt 453 p.To.Type = obj.TYPE_REG 454 p.To.Reg = x 455 case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst: 456 x := v.Reg() 457 p := s.Prog(v.Op.Asm()) 458 p.From.Type = obj.TYPE_FCONST 459 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 460 p.To.Type = obj.TYPE_REG 461 p.To.Reg = x 462 case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1: 463 p := s.Prog(x86.ALEAL) 464 p.From.Type = obj.TYPE_MEM 465 p.From.Name = obj.NAME_EXTERN 466 f := math.Float64frombits(uint64(v.AuxInt)) 467 if v.Op == ssa.Op386MOVSDconst1 { 468 p.From.Sym = base.Ctxt.Float64Sym(f) 469 } else { 470 p.From.Sym = base.Ctxt.Float32Sym(float32(f)) 471 } 472 p.To.Type = obj.TYPE_REG 473 p.To.Reg = v.Reg() 474 case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2: 475 p := s.Prog(v.Op.Asm()) 476 p.From.Type = obj.TYPE_MEM 477 p.From.Reg = v.Args[0].Reg() 478 p.To.Type = obj.TYPE_REG 479 p.To.Reg = v.Reg() 480 481 case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload: 482 p := s.Prog(v.Op.Asm()) 483 p.From.Type = obj.TYPE_MEM 484 p.From.Reg = v.Args[0].Reg() 485 ssagen.AddAux(&p.From, v) 486 p.To.Type = obj.TYPE_REG 487 p.To.Reg = v.Reg() 488 case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1, 489 ssa.Op386MOVSDloadidx8, ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4, ssa.Op386MOVWloadidx2: 490 r := v.Args[0].Reg() 491 i := v.Args[1].Reg() 492 p := s.Prog(v.Op.Asm()) 493 p.From.Type = obj.TYPE_MEM 494 switch v.Op { 495 case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1: 496 if i == x86.REG_SP { 497 r, i = i, r 498 } 499 p.From.Scale = 1 500 case ssa.Op386MOVSDloadidx8: 501 p.From.Scale = 8 502 case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4: 503 p.From.Scale = 4 504 case ssa.Op386MOVWloadidx2: 505 p.From.Scale = 2 506 } 507 p.From.Reg = r 508 p.From.Index = i 509 ssagen.AddAux(&p.From, v) 510 p.To.Type = obj.TYPE_REG 511 p.To.Reg = v.Reg() 512 case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4, 513 ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4: 514 p := s.Prog(v.Op.Asm()) 515 p.From.Type = obj.TYPE_MEM 516 p.From.Reg = v.Args[1].Reg() 517 p.From.Index = v.Args[2].Reg() 518 p.From.Scale = 4 519 ssagen.AddAux(&p.From, v) 520 p.To.Type = obj.TYPE_REG 521 p.To.Reg = v.Reg() 522 case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload, 523 ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload, 524 ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload, 525 ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload: 526 p := s.Prog(v.Op.Asm()) 527 p.From.Type = obj.TYPE_MEM 528 p.From.Reg = v.Args[1].Reg() 529 ssagen.AddAux(&p.From, v) 530 p.To.Type = obj.TYPE_REG 531 p.To.Reg = v.Reg() 532 case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore, 533 ssa.Op386ADDLmodify, ssa.Op386SUBLmodify, ssa.Op386ANDLmodify, ssa.Op386ORLmodify, ssa.Op386XORLmodify: 534 p := s.Prog(v.Op.Asm()) 535 p.From.Type = obj.TYPE_REG 536 p.From.Reg = v.Args[1].Reg() 537 p.To.Type = obj.TYPE_MEM 538 p.To.Reg = v.Args[0].Reg() 539 ssagen.AddAux(&p.To, v) 540 case ssa.Op386ADDLconstmodify: 541 sc := v.AuxValAndOff() 542 val := sc.Val() 543 if val == 1 || val == -1 { 544 var p *obj.Prog 545 if val == 1 { 546 p = s.Prog(x86.AINCL) 547 } else { 548 p = s.Prog(x86.ADECL) 549 } 550 off := sc.Off64() 551 p.To.Type = obj.TYPE_MEM 552 p.To.Reg = v.Args[0].Reg() 553 ssagen.AddAux2(&p.To, v, off) 554 break 555 } 556 fallthrough 557 case ssa.Op386ANDLconstmodify, ssa.Op386ORLconstmodify, ssa.Op386XORLconstmodify: 558 sc := v.AuxValAndOff() 559 off := sc.Off64() 560 val := sc.Val64() 561 p := s.Prog(v.Op.Asm()) 562 p.From.Type = obj.TYPE_CONST 563 p.From.Offset = val 564 p.To.Type = obj.TYPE_MEM 565 p.To.Reg = v.Args[0].Reg() 566 ssagen.AddAux2(&p.To, v, off) 567 case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1, 568 ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2, 569 ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4: 570 r := v.Args[0].Reg() 571 i := v.Args[1].Reg() 572 p := s.Prog(v.Op.Asm()) 573 p.From.Type = obj.TYPE_REG 574 p.From.Reg = v.Args[2].Reg() 575 p.To.Type = obj.TYPE_MEM 576 switch v.Op { 577 case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1: 578 if i == x86.REG_SP { 579 r, i = i, r 580 } 581 p.To.Scale = 1 582 case ssa.Op386MOVSDstoreidx8: 583 p.To.Scale = 8 584 case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, 585 ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4: 586 p.To.Scale = 4 587 case ssa.Op386MOVWstoreidx2: 588 p.To.Scale = 2 589 } 590 p.To.Reg = r 591 p.To.Index = i 592 ssagen.AddAux(&p.To, v) 593 case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst: 594 p := s.Prog(v.Op.Asm()) 595 p.From.Type = obj.TYPE_CONST 596 sc := v.AuxValAndOff() 597 p.From.Offset = sc.Val64() 598 p.To.Type = obj.TYPE_MEM 599 p.To.Reg = v.Args[0].Reg() 600 ssagen.AddAux2(&p.To, v, sc.Off64()) 601 case ssa.Op386ADDLconstmodifyidx4: 602 sc := v.AuxValAndOff() 603 val := sc.Val() 604 if val == 1 || val == -1 { 605 var p *obj.Prog 606 if val == 1 { 607 p = s.Prog(x86.AINCL) 608 } else { 609 p = s.Prog(x86.ADECL) 610 } 611 off := sc.Off64() 612 p.To.Type = obj.TYPE_MEM 613 p.To.Reg = v.Args[0].Reg() 614 p.To.Scale = 4 615 p.To.Index = v.Args[1].Reg() 616 ssagen.AddAux2(&p.To, v, off) 617 break 618 } 619 fallthrough 620 case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1, 621 ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4: 622 p := s.Prog(v.Op.Asm()) 623 p.From.Type = obj.TYPE_CONST 624 sc := v.AuxValAndOff() 625 p.From.Offset = sc.Val64() 626 r := v.Args[0].Reg() 627 i := v.Args[1].Reg() 628 switch v.Op { 629 case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1: 630 p.To.Scale = 1 631 if i == x86.REG_SP { 632 r, i = i, r 633 } 634 case ssa.Op386MOVWstoreconstidx2: 635 p.To.Scale = 2 636 case ssa.Op386MOVLstoreconstidx4, 637 ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4: 638 p.To.Scale = 4 639 } 640 p.To.Type = obj.TYPE_MEM 641 p.To.Reg = r 642 p.To.Index = i 643 ssagen.AddAux2(&p.To, v, sc.Off64()) 644 case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX, 645 ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD, 646 ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL, 647 ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS: 648 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 649 case ssa.Op386DUFFZERO: 650 p := s.Prog(obj.ADUFFZERO) 651 p.To.Type = obj.TYPE_ADDR 652 p.To.Sym = ir.Syms.Duffzero 653 p.To.Offset = v.AuxInt 654 case ssa.Op386DUFFCOPY: 655 p := s.Prog(obj.ADUFFCOPY) 656 p.To.Type = obj.TYPE_ADDR 657 p.To.Sym = ir.Syms.Duffcopy 658 p.To.Offset = v.AuxInt 659 660 case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy? 661 if v.Type.IsMemory() { 662 return 663 } 664 x := v.Args[0].Reg() 665 y := v.Reg() 666 if x != y { 667 opregreg(s, moveByType(v.Type), y, x) 668 } 669 case ssa.OpLoadReg: 670 if v.Type.IsFlags() { 671 v.Fatalf("load flags not implemented: %v", v.LongString()) 672 return 673 } 674 p := s.Prog(loadByType(v.Type)) 675 ssagen.AddrAuto(&p.From, v.Args[0]) 676 p.To.Type = obj.TYPE_REG 677 p.To.Reg = v.Reg() 678 679 case ssa.OpStoreReg: 680 if v.Type.IsFlags() { 681 v.Fatalf("store flags not implemented: %v", v.LongString()) 682 return 683 } 684 p := s.Prog(storeByType(v.Type)) 685 p.From.Type = obj.TYPE_REG 686 p.From.Reg = v.Args[0].Reg() 687 ssagen.AddrAuto(&p.To, v) 688 case ssa.Op386LoweredGetClosurePtr: 689 // Closure pointer is DX. 690 ssagen.CheckLoweredGetClosurePtr(v) 691 case ssa.Op386LoweredGetG: 692 r := v.Reg() 693 // See the comments in github.com/go-asm/go/cmd/obj/x86/obj6.go 694 // near CanUse1InsnTLS for a detailed explanation of these instructions. 695 if x86.CanUse1InsnTLS(base.Ctxt) { 696 // MOVL (TLS), r 697 p := s.Prog(x86.AMOVL) 698 p.From.Type = obj.TYPE_MEM 699 p.From.Reg = x86.REG_TLS 700 p.To.Type = obj.TYPE_REG 701 p.To.Reg = r 702 } else { 703 // MOVL TLS, r 704 // MOVL (r)(TLS*1), r 705 p := s.Prog(x86.AMOVL) 706 p.From.Type = obj.TYPE_REG 707 p.From.Reg = x86.REG_TLS 708 p.To.Type = obj.TYPE_REG 709 p.To.Reg = r 710 q := s.Prog(x86.AMOVL) 711 q.From.Type = obj.TYPE_MEM 712 q.From.Reg = r 713 q.From.Index = x86.REG_TLS 714 q.From.Scale = 1 715 q.To.Type = obj.TYPE_REG 716 q.To.Reg = r 717 } 718 719 case ssa.Op386LoweredGetCallerPC: 720 p := s.Prog(x86.AMOVL) 721 p.From.Type = obj.TYPE_MEM 722 p.From.Offset = -4 // PC is stored 4 bytes below first parameter. 723 p.From.Name = obj.NAME_PARAM 724 p.To.Type = obj.TYPE_REG 725 p.To.Reg = v.Reg() 726 727 case ssa.Op386LoweredGetCallerSP: 728 // caller's SP is the address of the first arg 729 p := s.Prog(x86.AMOVL) 730 p.From.Type = obj.TYPE_ADDR 731 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on 386, just to be consistent with other architectures 732 p.From.Name = obj.NAME_PARAM 733 p.To.Type = obj.TYPE_REG 734 p.To.Reg = v.Reg() 735 736 case ssa.Op386LoweredWB: 737 p := s.Prog(obj.ACALL) 738 p.To.Type = obj.TYPE_MEM 739 p.To.Name = obj.NAME_EXTERN 740 // AuxInt encodes how many buffer entries we need. 741 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1] 742 743 case ssa.Op386LoweredPanicBoundsA, ssa.Op386LoweredPanicBoundsB, ssa.Op386LoweredPanicBoundsC: 744 p := s.Prog(obj.ACALL) 745 p.To.Type = obj.TYPE_MEM 746 p.To.Name = obj.NAME_EXTERN 747 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] 748 s.UseArgs(8) // space used in callee args area by assembly stubs 749 750 case ssa.Op386LoweredPanicExtendA, ssa.Op386LoweredPanicExtendB, ssa.Op386LoweredPanicExtendC: 751 p := s.Prog(obj.ACALL) 752 p.To.Type = obj.TYPE_MEM 753 p.To.Name = obj.NAME_EXTERN 754 p.To.Sym = ssagen.ExtendCheckFunc[v.AuxInt] 755 s.UseArgs(12) // space used in callee args area by assembly stubs 756 757 case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter: 758 s.Call(v) 759 case ssa.Op386CALLtail: 760 s.TailCall(v) 761 case ssa.Op386NEGL, 762 ssa.Op386BSWAPL, 763 ssa.Op386NOTL: 764 p := s.Prog(v.Op.Asm()) 765 p.To.Type = obj.TYPE_REG 766 p.To.Reg = v.Reg() 767 case ssa.Op386BSFL, ssa.Op386BSFW, 768 ssa.Op386BSRL, ssa.Op386BSRW, 769 ssa.Op386SQRTSS, ssa.Op386SQRTSD: 770 p := s.Prog(v.Op.Asm()) 771 p.From.Type = obj.TYPE_REG 772 p.From.Reg = v.Args[0].Reg() 773 p.To.Type = obj.TYPE_REG 774 p.To.Reg = v.Reg() 775 case ssa.Op386SETEQ, ssa.Op386SETNE, 776 ssa.Op386SETL, ssa.Op386SETLE, 777 ssa.Op386SETG, ssa.Op386SETGE, 778 ssa.Op386SETGF, ssa.Op386SETGEF, 779 ssa.Op386SETB, ssa.Op386SETBE, 780 ssa.Op386SETORD, ssa.Op386SETNAN, 781 ssa.Op386SETA, ssa.Op386SETAE, 782 ssa.Op386SETO: 783 p := s.Prog(v.Op.Asm()) 784 p.To.Type = obj.TYPE_REG 785 p.To.Reg = v.Reg() 786 787 case ssa.Op386SETNEF: 788 p := s.Prog(v.Op.Asm()) 789 p.To.Type = obj.TYPE_REG 790 p.To.Reg = v.Reg() 791 q := s.Prog(x86.ASETPS) 792 q.To.Type = obj.TYPE_REG 793 q.To.Reg = x86.REG_AX 794 opregreg(s, x86.AORL, v.Reg(), x86.REG_AX) 795 796 case ssa.Op386SETEQF: 797 p := s.Prog(v.Op.Asm()) 798 p.To.Type = obj.TYPE_REG 799 p.To.Reg = v.Reg() 800 q := s.Prog(x86.ASETPC) 801 q.To.Type = obj.TYPE_REG 802 q.To.Reg = x86.REG_AX 803 opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX) 804 805 case ssa.Op386InvertFlags: 806 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 807 case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT: 808 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 809 case ssa.Op386REPSTOSL: 810 s.Prog(x86.AREP) 811 s.Prog(x86.ASTOSL) 812 case ssa.Op386REPMOVSL: 813 s.Prog(x86.AREP) 814 s.Prog(x86.AMOVSL) 815 case ssa.Op386LoweredNilCheck: 816 // Issue a load which will fault if the input is nil. 817 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 818 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 819 // but it doesn't have false dependency on AX. 820 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 821 // That trades clobbering flags for clobbering a register. 822 p := s.Prog(x86.ATESTB) 823 p.From.Type = obj.TYPE_REG 824 p.From.Reg = x86.REG_AX 825 p.To.Type = obj.TYPE_MEM 826 p.To.Reg = v.Args[0].Reg() 827 ssagen.AddAux(&p.To, v) 828 if logopt.Enabled() { 829 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 830 } 831 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 832 base.WarnfAt(v.Pos, "generated nil check") 833 } 834 case ssa.Op386LoweredCtz32: 835 // BSFL in, out 836 p := s.Prog(x86.ABSFL) 837 p.From.Type = obj.TYPE_REG 838 p.From.Reg = v.Args[0].Reg() 839 p.To.Type = obj.TYPE_REG 840 p.To.Reg = v.Reg() 841 842 // JNZ 2(PC) 843 p1 := s.Prog(x86.AJNE) 844 p1.To.Type = obj.TYPE_BRANCH 845 846 // MOVL $32, out 847 p2 := s.Prog(x86.AMOVL) 848 p2.From.Type = obj.TYPE_CONST 849 p2.From.Offset = 32 850 p2.To.Type = obj.TYPE_REG 851 p2.To.Reg = v.Reg() 852 853 // NOP (so the JNZ has somewhere to land) 854 nop := s.Prog(obj.ANOP) 855 p1.To.SetTarget(nop) 856 857 case ssa.OpClobber: 858 p := s.Prog(x86.AMOVL) 859 p.From.Type = obj.TYPE_CONST 860 p.From.Offset = 0xdeaddead 861 p.To.Type = obj.TYPE_MEM 862 p.To.Reg = x86.REG_SP 863 ssagen.AddAux(&p.To, v) 864 case ssa.OpClobberReg: 865 // TODO: implement for clobberdead experiment. Nop is ok for now. 866 default: 867 v.Fatalf("genValue not implemented: %s", v.LongString()) 868 } 869 } 870 871 var blockJump = [...]struct { 872 asm, invasm obj.As 873 }{ 874 ssa.Block386EQ: {x86.AJEQ, x86.AJNE}, 875 ssa.Block386NE: {x86.AJNE, x86.AJEQ}, 876 ssa.Block386LT: {x86.AJLT, x86.AJGE}, 877 ssa.Block386GE: {x86.AJGE, x86.AJLT}, 878 ssa.Block386LE: {x86.AJLE, x86.AJGT}, 879 ssa.Block386GT: {x86.AJGT, x86.AJLE}, 880 ssa.Block386OS: {x86.AJOS, x86.AJOC}, 881 ssa.Block386OC: {x86.AJOC, x86.AJOS}, 882 ssa.Block386ULT: {x86.AJCS, x86.AJCC}, 883 ssa.Block386UGE: {x86.AJCC, x86.AJCS}, 884 ssa.Block386UGT: {x86.AJHI, x86.AJLS}, 885 ssa.Block386ULE: {x86.AJLS, x86.AJHI}, 886 ssa.Block386ORD: {x86.AJPC, x86.AJPS}, 887 ssa.Block386NAN: {x86.AJPS, x86.AJPC}, 888 } 889 890 var eqfJumps = [2][2]ssagen.IndexJump{ 891 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 892 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 893 } 894 var nefJumps = [2][2]ssagen.IndexJump{ 895 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 896 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 897 } 898 899 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { 900 switch b.Kind { 901 case ssa.BlockPlain: 902 if b.Succs[0].Block() != next { 903 p := s.Prog(obj.AJMP) 904 p.To.Type = obj.TYPE_BRANCH 905 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 906 } 907 case ssa.BlockDefer: 908 // defer returns in rax: 909 // 0 if we should continue executing 910 // 1 if we should jump to deferreturn call 911 p := s.Prog(x86.ATESTL) 912 p.From.Type = obj.TYPE_REG 913 p.From.Reg = x86.REG_AX 914 p.To.Type = obj.TYPE_REG 915 p.To.Reg = x86.REG_AX 916 p = s.Prog(x86.AJNE) 917 p.To.Type = obj.TYPE_BRANCH 918 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) 919 if b.Succs[0].Block() != next { 920 p := s.Prog(obj.AJMP) 921 p.To.Type = obj.TYPE_BRANCH 922 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 923 } 924 case ssa.BlockExit, ssa.BlockRetJmp: 925 case ssa.BlockRet: 926 s.Prog(obj.ARET) 927 928 case ssa.Block386EQF: 929 s.CombJump(b, next, &eqfJumps) 930 931 case ssa.Block386NEF: 932 s.CombJump(b, next, &nefJumps) 933 934 case ssa.Block386EQ, ssa.Block386NE, 935 ssa.Block386LT, ssa.Block386GE, 936 ssa.Block386LE, ssa.Block386GT, 937 ssa.Block386OS, ssa.Block386OC, 938 ssa.Block386ULT, ssa.Block386UGT, 939 ssa.Block386ULE, ssa.Block386UGE: 940 jmp := blockJump[b.Kind] 941 switch next { 942 case b.Succs[0].Block(): 943 s.Br(jmp.invasm, b.Succs[1].Block()) 944 case b.Succs[1].Block(): 945 s.Br(jmp.asm, b.Succs[0].Block()) 946 default: 947 if b.Likely != ssa.BranchUnlikely { 948 s.Br(jmp.asm, b.Succs[0].Block()) 949 s.Br(obj.AJMP, b.Succs[1].Block()) 950 } else { 951 s.Br(jmp.invasm, b.Succs[1].Block()) 952 s.Br(obj.AJMP, b.Succs[0].Block()) 953 } 954 } 955 default: 956 b.Fatalf("branch not implemented: %s", b.LongString()) 957 } 958 }