github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/cmd/compile/internal/x86/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package x86 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/internal/obj" 14 "cmd/internal/obj/x86" 15 ) 16 17 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 18 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 19 flive := b.FlagsLiveAtEnd 20 if b.Control != nil && b.Control.Type.IsFlags() { 21 flive = true 22 } 23 for i := len(b.Values) - 1; i >= 0; i-- { 24 v := b.Values[i] 25 if flive && v.Op == ssa.Op386MOVLconst { 26 // The "mark" is any non-nil Aux value. 27 v.Aux = v 28 } 29 if v.Type.IsFlags() { 30 flive = false 31 } 32 for _, a := range v.Args { 33 if a.Type.IsFlags() { 34 flive = true 35 } 36 } 37 } 38 } 39 40 // loadByType returns the load instruction of the given type. 41 func loadByType(t ssa.Type) obj.As { 42 // Avoid partial register write 43 if !t.IsFloat() && t.Size() <= 2 { 44 if t.Size() == 1 { 45 return x86.AMOVBLZX 46 } else { 47 return x86.AMOVWLZX 48 } 49 } 50 // Otherwise, there's no difference between load and store opcodes. 51 return storeByType(t) 52 } 53 54 // storeByType returns the store instruction of the given type. 55 func storeByType(t ssa.Type) obj.As { 56 width := t.Size() 57 if t.IsFloat() { 58 switch width { 59 case 4: 60 return x86.AMOVSS 61 case 8: 62 return x86.AMOVSD 63 } 64 } else { 65 switch width { 66 case 1: 67 return x86.AMOVB 68 case 2: 69 return x86.AMOVW 70 case 4: 71 return x86.AMOVL 72 } 73 } 74 panic("bad store type") 75 } 76 77 // moveByType returns the reg->reg move instruction of the given type. 78 func moveByType(t ssa.Type) obj.As { 79 if t.IsFloat() { 80 switch t.Size() { 81 case 4: 82 return x86.AMOVSS 83 case 8: 84 return x86.AMOVSD 85 default: 86 panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t)) 87 } 88 } else { 89 switch t.Size() { 90 case 1: 91 // Avoids partial register write 92 return x86.AMOVL 93 case 2: 94 return x86.AMOVL 95 case 4: 96 return x86.AMOVL 97 default: 98 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 99 } 100 } 101 } 102 103 // opregreg emits instructions for 104 // dest := dest(To) op src(From) 105 // and also returns the created obj.Prog so it 106 // may be further adjusted (offset, scale, etc). 107 func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog { 108 p := s.Prog(op) 109 p.From.Type = obj.TYPE_REG 110 p.To.Type = obj.TYPE_REG 111 p.To.Reg = dest 112 p.From.Reg = src 113 return p 114 } 115 116 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 117 switch v.Op { 118 case ssa.Op386ADDL: 119 r := v.Reg() 120 r1 := v.Args[0].Reg() 121 r2 := v.Args[1].Reg() 122 switch { 123 case r == r1: 124 p := s.Prog(v.Op.Asm()) 125 p.From.Type = obj.TYPE_REG 126 p.From.Reg = r2 127 p.To.Type = obj.TYPE_REG 128 p.To.Reg = r 129 case r == r2: 130 p := s.Prog(v.Op.Asm()) 131 p.From.Type = obj.TYPE_REG 132 p.From.Reg = r1 133 p.To.Type = obj.TYPE_REG 134 p.To.Reg = r 135 default: 136 p := s.Prog(x86.ALEAL) 137 p.From.Type = obj.TYPE_MEM 138 p.From.Reg = r1 139 p.From.Scale = 1 140 p.From.Index = r2 141 p.To.Type = obj.TYPE_REG 142 p.To.Reg = r 143 } 144 145 // 2-address opcode arithmetic 146 case ssa.Op386SUBL, 147 ssa.Op386MULL, 148 ssa.Op386ANDL, 149 ssa.Op386ORL, 150 ssa.Op386XORL, 151 ssa.Op386SHLL, 152 ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB, 153 ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB, 154 ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD, 155 ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD, 156 ssa.Op386PXOR, 157 ssa.Op386ADCL, 158 ssa.Op386SBBL: 159 r := v.Reg() 160 if r != v.Args[0].Reg() { 161 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 162 } 163 opregreg(s, v.Op.Asm(), r, v.Args[1].Reg()) 164 165 case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry: 166 // output 0 is carry/borrow, output 1 is the low 32 bits. 167 r := v.Reg0() 168 if r != v.Args[0].Reg() { 169 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 170 } 171 opregreg(s, v.Op.Asm(), r, v.Args[1].Reg()) 172 173 case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry: 174 // output 0 is carry/borrow, output 1 is the low 32 bits. 175 r := v.Reg0() 176 if r != v.Args[0].Reg() { 177 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 178 } 179 p := s.Prog(v.Op.Asm()) 180 p.From.Type = obj.TYPE_CONST 181 p.From.Offset = v.AuxInt 182 p.To.Type = obj.TYPE_REG 183 p.To.Reg = r 184 185 case ssa.Op386DIVL, ssa.Op386DIVW, 186 ssa.Op386DIVLU, ssa.Op386DIVWU, 187 ssa.Op386MODL, ssa.Op386MODW, 188 ssa.Op386MODLU, ssa.Op386MODWU: 189 190 // Arg[0] is already in AX as it's the only register we allow 191 // and AX is the only output 192 x := v.Args[1].Reg() 193 194 // CPU faults upon signed overflow, which occurs when most 195 // negative int is divided by -1. 196 var j *obj.Prog 197 if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW || 198 v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW { 199 200 var c *obj.Prog 201 switch v.Op { 202 case ssa.Op386DIVL, ssa.Op386MODL: 203 c = s.Prog(x86.ACMPL) 204 j = s.Prog(x86.AJEQ) 205 s.Prog(x86.ACDQ) //TODO: fix 206 207 case ssa.Op386DIVW, ssa.Op386MODW: 208 c = s.Prog(x86.ACMPW) 209 j = s.Prog(x86.AJEQ) 210 s.Prog(x86.ACWD) 211 } 212 c.From.Type = obj.TYPE_REG 213 c.From.Reg = x 214 c.To.Type = obj.TYPE_CONST 215 c.To.Offset = -1 216 217 j.To.Type = obj.TYPE_BRANCH 218 } 219 220 // for unsigned ints, we sign extend by setting DX = 0 221 // signed ints were sign extended above 222 if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU || 223 v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU { 224 c := s.Prog(x86.AXORL) 225 c.From.Type = obj.TYPE_REG 226 c.From.Reg = x86.REG_DX 227 c.To.Type = obj.TYPE_REG 228 c.To.Reg = x86.REG_DX 229 } 230 231 p := s.Prog(v.Op.Asm()) 232 p.From.Type = obj.TYPE_REG 233 p.From.Reg = x 234 235 // signed division, rest of the check for -1 case 236 if j != nil { 237 j2 := s.Prog(obj.AJMP) 238 j2.To.Type = obj.TYPE_BRANCH 239 240 var n *obj.Prog 241 if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW { 242 // n * -1 = -n 243 n = s.Prog(x86.ANEGL) 244 n.To.Type = obj.TYPE_REG 245 n.To.Reg = x86.REG_AX 246 } else { 247 // n % -1 == 0 248 n = s.Prog(x86.AXORL) 249 n.From.Type = obj.TYPE_REG 250 n.From.Reg = x86.REG_DX 251 n.To.Type = obj.TYPE_REG 252 n.To.Reg = x86.REG_DX 253 } 254 255 j.To.Val = n 256 j2.To.Val = s.Pc() 257 } 258 259 case ssa.Op386HMULL, ssa.Op386HMULLU: 260 // the frontend rewrites constant division by 8/16/32 bit integers into 261 // HMUL by a constant 262 // SSA rewrites generate the 64 bit versions 263 264 // Arg[0] is already in AX as it's the only register we allow 265 // and DX is the only output we care about (the high bits) 266 p := s.Prog(v.Op.Asm()) 267 p.From.Type = obj.TYPE_REG 268 p.From.Reg = v.Args[1].Reg() 269 270 // IMULB puts the high portion in AH instead of DL, 271 // so move it to DL for consistency 272 if v.Type.Size() == 1 { 273 m := s.Prog(x86.AMOVB) 274 m.From.Type = obj.TYPE_REG 275 m.From.Reg = x86.REG_AH 276 m.To.Type = obj.TYPE_REG 277 m.To.Reg = x86.REG_DX 278 } 279 280 case ssa.Op386MULLQU: 281 // AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]). 282 p := s.Prog(v.Op.Asm()) 283 p.From.Type = obj.TYPE_REG 284 p.From.Reg = v.Args[1].Reg() 285 286 case ssa.Op386AVGLU: 287 // compute (x+y)/2 unsigned. 288 // Do a 32-bit add, the overflow goes into the carry. 289 // Shift right once and pull the carry back into the 31st bit. 290 r := v.Reg() 291 if r != v.Args[0].Reg() { 292 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 293 } 294 p := s.Prog(x86.AADDL) 295 p.From.Type = obj.TYPE_REG 296 p.To.Type = obj.TYPE_REG 297 p.To.Reg = r 298 p.From.Reg = v.Args[1].Reg() 299 p = s.Prog(x86.ARCRL) 300 p.From.Type = obj.TYPE_CONST 301 p.From.Offset = 1 302 p.To.Type = obj.TYPE_REG 303 p.To.Reg = r 304 305 case ssa.Op386ADDLconst: 306 r := v.Reg() 307 a := v.Args[0].Reg() 308 if r == a { 309 if v.AuxInt == 1 { 310 p := s.Prog(x86.AINCL) 311 p.To.Type = obj.TYPE_REG 312 p.To.Reg = r 313 return 314 } 315 if v.AuxInt == -1 { 316 p := s.Prog(x86.ADECL) 317 p.To.Type = obj.TYPE_REG 318 p.To.Reg = r 319 return 320 } 321 p := s.Prog(v.Op.Asm()) 322 p.From.Type = obj.TYPE_CONST 323 p.From.Offset = v.AuxInt 324 p.To.Type = obj.TYPE_REG 325 p.To.Reg = r 326 return 327 } 328 p := s.Prog(x86.ALEAL) 329 p.From.Type = obj.TYPE_MEM 330 p.From.Reg = a 331 p.From.Offset = v.AuxInt 332 p.To.Type = obj.TYPE_REG 333 p.To.Reg = r 334 335 case ssa.Op386MULLconst: 336 r := v.Reg() 337 if r != v.Args[0].Reg() { 338 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 339 } 340 p := s.Prog(v.Op.Asm()) 341 p.From.Type = obj.TYPE_CONST 342 p.From.Offset = v.AuxInt 343 p.To.Type = obj.TYPE_REG 344 p.To.Reg = r 345 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 346 // then we don't need to use resultInArg0 for these ops. 347 //p.From3 = new(obj.Addr) 348 //p.From3.Type = obj.TYPE_REG 349 //p.From3.Reg = v.Args[0].Reg() 350 351 case ssa.Op386SUBLconst, 352 ssa.Op386ADCLconst, 353 ssa.Op386SBBLconst, 354 ssa.Op386ANDLconst, 355 ssa.Op386ORLconst, 356 ssa.Op386XORLconst, 357 ssa.Op386SHLLconst, 358 ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst, 359 ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst, 360 ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst: 361 r := v.Reg() 362 if r != v.Args[0].Reg() { 363 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 364 } 365 p := s.Prog(v.Op.Asm()) 366 p.From.Type = obj.TYPE_CONST 367 p.From.Offset = v.AuxInt 368 p.To.Type = obj.TYPE_REG 369 p.To.Reg = r 370 case ssa.Op386SBBLcarrymask: 371 r := v.Reg() 372 p := s.Prog(v.Op.Asm()) 373 p.From.Type = obj.TYPE_REG 374 p.From.Reg = r 375 p.To.Type = obj.TYPE_REG 376 p.To.Reg = r 377 case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8: 378 r := v.Args[0].Reg() 379 i := v.Args[1].Reg() 380 p := s.Prog(x86.ALEAL) 381 switch v.Op { 382 case ssa.Op386LEAL1: 383 p.From.Scale = 1 384 if i == x86.REG_SP { 385 r, i = i, r 386 } 387 case ssa.Op386LEAL2: 388 p.From.Scale = 2 389 case ssa.Op386LEAL4: 390 p.From.Scale = 4 391 case ssa.Op386LEAL8: 392 p.From.Scale = 8 393 } 394 p.From.Type = obj.TYPE_MEM 395 p.From.Reg = r 396 p.From.Index = i 397 gc.AddAux(&p.From, v) 398 p.To.Type = obj.TYPE_REG 399 p.To.Reg = v.Reg() 400 case ssa.Op386LEAL: 401 p := s.Prog(x86.ALEAL) 402 p.From.Type = obj.TYPE_MEM 403 p.From.Reg = v.Args[0].Reg() 404 gc.AddAux(&p.From, v) 405 p.To.Type = obj.TYPE_REG 406 p.To.Reg = v.Reg() 407 case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB, 408 ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB: 409 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 410 case ssa.Op386UCOMISS, ssa.Op386UCOMISD: 411 // Go assembler has swapped operands for UCOMISx relative to CMP, 412 // must account for that right here. 413 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 414 case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst: 415 p := s.Prog(v.Op.Asm()) 416 p.From.Type = obj.TYPE_REG 417 p.From.Reg = v.Args[0].Reg() 418 p.To.Type = obj.TYPE_CONST 419 p.To.Offset = v.AuxInt 420 case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst: 421 p := s.Prog(v.Op.Asm()) 422 p.From.Type = obj.TYPE_CONST 423 p.From.Offset = v.AuxInt 424 p.To.Type = obj.TYPE_REG 425 p.To.Reg = v.Args[0].Reg() 426 case ssa.Op386MOVLconst: 427 x := v.Reg() 428 p := s.Prog(v.Op.Asm()) 429 p.From.Type = obj.TYPE_CONST 430 p.From.Offset = v.AuxInt 431 p.To.Type = obj.TYPE_REG 432 p.To.Reg = x 433 // If flags are live at this instruction, suppress the 434 // MOV $0,AX -> XOR AX,AX optimization. 435 if v.Aux != nil { 436 p.Mark |= x86.PRESERVEFLAGS 437 } 438 case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst: 439 x := v.Reg() 440 p := s.Prog(v.Op.Asm()) 441 p.From.Type = obj.TYPE_FCONST 442 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 443 p.To.Type = obj.TYPE_REG 444 p.To.Reg = x 445 case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1: 446 p := s.Prog(x86.ALEAL) 447 p.From.Type = obj.TYPE_MEM 448 p.From.Name = obj.NAME_EXTERN 449 f := math.Float64frombits(uint64(v.AuxInt)) 450 if v.Op == ssa.Op386MOVSDconst1 { 451 p.From.Sym = gc.Ctxt.Float64Sym(f) 452 } else { 453 p.From.Sym = gc.Ctxt.Float32Sym(float32(f)) 454 } 455 p.To.Type = obj.TYPE_REG 456 p.To.Reg = v.Reg() 457 case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2: 458 p := s.Prog(v.Op.Asm()) 459 p.From.Type = obj.TYPE_MEM 460 p.From.Reg = v.Args[0].Reg() 461 p.To.Type = obj.TYPE_REG 462 p.To.Reg = v.Reg() 463 464 case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload: 465 p := s.Prog(v.Op.Asm()) 466 p.From.Type = obj.TYPE_MEM 467 p.From.Reg = v.Args[0].Reg() 468 gc.AddAux(&p.From, v) 469 p.To.Type = obj.TYPE_REG 470 p.To.Reg = v.Reg() 471 case ssa.Op386MOVSDloadidx8: 472 p := s.Prog(v.Op.Asm()) 473 p.From.Type = obj.TYPE_MEM 474 p.From.Reg = v.Args[0].Reg() 475 gc.AddAux(&p.From, v) 476 p.From.Scale = 8 477 p.From.Index = v.Args[1].Reg() 478 p.To.Type = obj.TYPE_REG 479 p.To.Reg = v.Reg() 480 case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4: 481 p := s.Prog(v.Op.Asm()) 482 p.From.Type = obj.TYPE_MEM 483 p.From.Reg = v.Args[0].Reg() 484 gc.AddAux(&p.From, v) 485 p.From.Scale = 4 486 p.From.Index = v.Args[1].Reg() 487 p.To.Type = obj.TYPE_REG 488 p.To.Reg = v.Reg() 489 case ssa.Op386MOVWloadidx2: 490 p := s.Prog(v.Op.Asm()) 491 p.From.Type = obj.TYPE_MEM 492 p.From.Reg = v.Args[0].Reg() 493 gc.AddAux(&p.From, v) 494 p.From.Scale = 2 495 p.From.Index = v.Args[1].Reg() 496 p.To.Type = obj.TYPE_REG 497 p.To.Reg = v.Reg() 498 case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1: 499 r := v.Args[0].Reg() 500 i := v.Args[1].Reg() 501 if i == x86.REG_SP { 502 r, i = i, r 503 } 504 p := s.Prog(v.Op.Asm()) 505 p.From.Type = obj.TYPE_MEM 506 p.From.Reg = r 507 p.From.Scale = 1 508 p.From.Index = i 509 gc.AddAux(&p.From, v) 510 p.To.Type = obj.TYPE_REG 511 p.To.Reg = v.Reg() 512 case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore: 513 p := s.Prog(v.Op.Asm()) 514 p.From.Type = obj.TYPE_REG 515 p.From.Reg = v.Args[1].Reg() 516 p.To.Type = obj.TYPE_MEM 517 p.To.Reg = v.Args[0].Reg() 518 gc.AddAux(&p.To, v) 519 case ssa.Op386MOVSDstoreidx8: 520 p := s.Prog(v.Op.Asm()) 521 p.From.Type = obj.TYPE_REG 522 p.From.Reg = v.Args[2].Reg() 523 p.To.Type = obj.TYPE_MEM 524 p.To.Reg = v.Args[0].Reg() 525 p.To.Scale = 8 526 p.To.Index = v.Args[1].Reg() 527 gc.AddAux(&p.To, v) 528 case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4: 529 p := s.Prog(v.Op.Asm()) 530 p.From.Type = obj.TYPE_REG 531 p.From.Reg = v.Args[2].Reg() 532 p.To.Type = obj.TYPE_MEM 533 p.To.Reg = v.Args[0].Reg() 534 p.To.Scale = 4 535 p.To.Index = v.Args[1].Reg() 536 gc.AddAux(&p.To, v) 537 case ssa.Op386MOVWstoreidx2: 538 p := s.Prog(v.Op.Asm()) 539 p.From.Type = obj.TYPE_REG 540 p.From.Reg = v.Args[2].Reg() 541 p.To.Type = obj.TYPE_MEM 542 p.To.Reg = v.Args[0].Reg() 543 p.To.Scale = 2 544 p.To.Index = v.Args[1].Reg() 545 gc.AddAux(&p.To, v) 546 case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1: 547 r := v.Args[0].Reg() 548 i := v.Args[1].Reg() 549 if i == x86.REG_SP { 550 r, i = i, r 551 } 552 p := s.Prog(v.Op.Asm()) 553 p.From.Type = obj.TYPE_REG 554 p.From.Reg = v.Args[2].Reg() 555 p.To.Type = obj.TYPE_MEM 556 p.To.Reg = r 557 p.To.Scale = 1 558 p.To.Index = i 559 gc.AddAux(&p.To, v) 560 case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst: 561 p := s.Prog(v.Op.Asm()) 562 p.From.Type = obj.TYPE_CONST 563 sc := v.AuxValAndOff() 564 p.From.Offset = sc.Val() 565 p.To.Type = obj.TYPE_MEM 566 p.To.Reg = v.Args[0].Reg() 567 gc.AddAux2(&p.To, v, sc.Off()) 568 case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1: 569 p := s.Prog(v.Op.Asm()) 570 p.From.Type = obj.TYPE_CONST 571 sc := v.AuxValAndOff() 572 p.From.Offset = sc.Val() 573 r := v.Args[0].Reg() 574 i := v.Args[1].Reg() 575 switch v.Op { 576 case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1: 577 p.To.Scale = 1 578 if i == x86.REG_SP { 579 r, i = i, r 580 } 581 case ssa.Op386MOVWstoreconstidx2: 582 p.To.Scale = 2 583 case ssa.Op386MOVLstoreconstidx4: 584 p.To.Scale = 4 585 } 586 p.To.Type = obj.TYPE_MEM 587 p.To.Reg = r 588 p.To.Index = i 589 gc.AddAux2(&p.To, v, sc.Off()) 590 case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX, 591 ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD, 592 ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL, 593 ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS: 594 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 595 case ssa.Op386DUFFZERO: 596 p := s.Prog(obj.ADUFFZERO) 597 p.To.Type = obj.TYPE_ADDR 598 p.To.Sym = gc.Duffzero 599 p.To.Offset = v.AuxInt 600 case ssa.Op386DUFFCOPY: 601 p := s.Prog(obj.ADUFFCOPY) 602 p.To.Type = obj.TYPE_ADDR 603 p.To.Sym = gc.Duffcopy 604 p.To.Offset = v.AuxInt 605 606 case ssa.OpCopy, ssa.Op386MOVLconvert: // TODO: use MOVLreg for reg->reg copies instead of OpCopy? 607 if v.Type.IsMemory() { 608 return 609 } 610 x := v.Args[0].Reg() 611 y := v.Reg() 612 if x != y { 613 opregreg(s, moveByType(v.Type), y, x) 614 } 615 case ssa.OpLoadReg: 616 if v.Type.IsFlags() { 617 v.Fatalf("load flags not implemented: %v", v.LongString()) 618 return 619 } 620 p := s.Prog(loadByType(v.Type)) 621 gc.AddrAuto(&p.From, v.Args[0]) 622 p.To.Type = obj.TYPE_REG 623 p.To.Reg = v.Reg() 624 625 case ssa.OpStoreReg: 626 if v.Type.IsFlags() { 627 v.Fatalf("store flags not implemented: %v", v.LongString()) 628 return 629 } 630 p := s.Prog(storeByType(v.Type)) 631 p.From.Type = obj.TYPE_REG 632 p.From.Reg = v.Args[0].Reg() 633 gc.AddrAuto(&p.To, v) 634 case ssa.Op386LoweredGetClosurePtr: 635 // Closure pointer is DX. 636 gc.CheckLoweredGetClosurePtr(v) 637 case ssa.Op386LoweredGetG: 638 r := v.Reg() 639 // See the comments in cmd/internal/obj/x86/obj6.go 640 // near CanUse1InsnTLS for a detailed explanation of these instructions. 641 if x86.CanUse1InsnTLS(gc.Ctxt) { 642 // MOVL (TLS), r 643 p := s.Prog(x86.AMOVL) 644 p.From.Type = obj.TYPE_MEM 645 p.From.Reg = x86.REG_TLS 646 p.To.Type = obj.TYPE_REG 647 p.To.Reg = r 648 } else { 649 // MOVL TLS, r 650 // MOVL (r)(TLS*1), r 651 p := s.Prog(x86.AMOVL) 652 p.From.Type = obj.TYPE_REG 653 p.From.Reg = x86.REG_TLS 654 p.To.Type = obj.TYPE_REG 655 p.To.Reg = r 656 q := s.Prog(x86.AMOVL) 657 q.From.Type = obj.TYPE_MEM 658 q.From.Reg = r 659 q.From.Index = x86.REG_TLS 660 q.From.Scale = 1 661 q.To.Type = obj.TYPE_REG 662 q.To.Reg = r 663 } 664 case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter: 665 s.Call(v) 666 case ssa.Op386NEGL, 667 ssa.Op386BSWAPL, 668 ssa.Op386NOTL: 669 r := v.Reg() 670 if r != v.Args[0].Reg() { 671 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 672 } 673 p := s.Prog(v.Op.Asm()) 674 p.To.Type = obj.TYPE_REG 675 p.To.Reg = r 676 case ssa.Op386BSFL, ssa.Op386BSFW, 677 ssa.Op386BSRL, ssa.Op386BSRW, 678 ssa.Op386SQRTSD: 679 p := s.Prog(v.Op.Asm()) 680 p.From.Type = obj.TYPE_REG 681 p.From.Reg = v.Args[0].Reg() 682 p.To.Type = obj.TYPE_REG 683 p.To.Reg = v.Reg() 684 case ssa.Op386SETEQ, ssa.Op386SETNE, 685 ssa.Op386SETL, ssa.Op386SETLE, 686 ssa.Op386SETG, ssa.Op386SETGE, 687 ssa.Op386SETGF, ssa.Op386SETGEF, 688 ssa.Op386SETB, ssa.Op386SETBE, 689 ssa.Op386SETORD, ssa.Op386SETNAN, 690 ssa.Op386SETA, ssa.Op386SETAE: 691 p := s.Prog(v.Op.Asm()) 692 p.To.Type = obj.TYPE_REG 693 p.To.Reg = v.Reg() 694 695 case ssa.Op386SETNEF: 696 p := s.Prog(v.Op.Asm()) 697 p.To.Type = obj.TYPE_REG 698 p.To.Reg = v.Reg() 699 q := s.Prog(x86.ASETPS) 700 q.To.Type = obj.TYPE_REG 701 q.To.Reg = x86.REG_AX 702 opregreg(s, x86.AORL, v.Reg(), x86.REG_AX) 703 704 case ssa.Op386SETEQF: 705 p := s.Prog(v.Op.Asm()) 706 p.To.Type = obj.TYPE_REG 707 p.To.Reg = v.Reg() 708 q := s.Prog(x86.ASETPC) 709 q.To.Type = obj.TYPE_REG 710 q.To.Reg = x86.REG_AX 711 opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX) 712 713 case ssa.Op386InvertFlags: 714 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 715 case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT: 716 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 717 case ssa.Op386REPSTOSL: 718 s.Prog(x86.AREP) 719 s.Prog(x86.ASTOSL) 720 case ssa.Op386REPMOVSL: 721 s.Prog(x86.AREP) 722 s.Prog(x86.AMOVSL) 723 case ssa.Op386LoweredNilCheck: 724 // Issue a load which will fault if the input is nil. 725 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 726 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 727 // but it doesn't have false dependency on AX. 728 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 729 // That trades clobbering flags for clobbering a register. 730 p := s.Prog(x86.ATESTB) 731 p.From.Type = obj.TYPE_REG 732 p.From.Reg = x86.REG_AX 733 p.To.Type = obj.TYPE_MEM 734 p.To.Reg = v.Args[0].Reg() 735 gc.AddAux(&p.To, v) 736 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 737 gc.Warnl(v.Pos, "generated nil check") 738 } 739 case ssa.Op386FCHS: 740 v.Fatalf("FCHS in non-387 mode") 741 case ssa.OpClobber: 742 p := s.Prog(x86.AMOVL) 743 p.From.Type = obj.TYPE_CONST 744 p.From.Offset = 0xdeaddead 745 p.To.Type = obj.TYPE_MEM 746 p.To.Reg = x86.REG_SP 747 gc.AddAux(&p.To, v) 748 default: 749 v.Fatalf("genValue not implemented: %s", v.LongString()) 750 } 751 } 752 753 var blockJump = [...]struct { 754 asm, invasm obj.As 755 }{ 756 ssa.Block386EQ: {x86.AJEQ, x86.AJNE}, 757 ssa.Block386NE: {x86.AJNE, x86.AJEQ}, 758 ssa.Block386LT: {x86.AJLT, x86.AJGE}, 759 ssa.Block386GE: {x86.AJGE, x86.AJLT}, 760 ssa.Block386LE: {x86.AJLE, x86.AJGT}, 761 ssa.Block386GT: {x86.AJGT, x86.AJLE}, 762 ssa.Block386ULT: {x86.AJCS, x86.AJCC}, 763 ssa.Block386UGE: {x86.AJCC, x86.AJCS}, 764 ssa.Block386UGT: {x86.AJHI, x86.AJLS}, 765 ssa.Block386ULE: {x86.AJLS, x86.AJHI}, 766 ssa.Block386ORD: {x86.AJPC, x86.AJPS}, 767 ssa.Block386NAN: {x86.AJPS, x86.AJPC}, 768 } 769 770 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 771 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 772 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 773 } 774 var nefJumps = [2][2]gc.FloatingEQNEJump{ 775 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 776 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 777 } 778 779 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 780 switch b.Kind { 781 case ssa.BlockPlain: 782 if b.Succs[0].Block() != next { 783 p := s.Prog(obj.AJMP) 784 p.To.Type = obj.TYPE_BRANCH 785 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 786 } 787 case ssa.BlockDefer: 788 // defer returns in rax: 789 // 0 if we should continue executing 790 // 1 if we should jump to deferreturn call 791 p := s.Prog(x86.ATESTL) 792 p.From.Type = obj.TYPE_REG 793 p.From.Reg = x86.REG_AX 794 p.To.Type = obj.TYPE_REG 795 p.To.Reg = x86.REG_AX 796 p = s.Prog(x86.AJNE) 797 p.To.Type = obj.TYPE_BRANCH 798 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 799 if b.Succs[0].Block() != next { 800 p := s.Prog(obj.AJMP) 801 p.To.Type = obj.TYPE_BRANCH 802 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 803 } 804 case ssa.BlockExit: 805 s.Prog(obj.AUNDEF) // tell plive.go that we never reach here 806 case ssa.BlockRet: 807 s.Prog(obj.ARET) 808 case ssa.BlockRetJmp: 809 p := s.Prog(obj.AJMP) 810 p.To.Type = obj.TYPE_MEM 811 p.To.Name = obj.NAME_EXTERN 812 p.To.Sym = b.Aux.(*obj.LSym) 813 814 case ssa.Block386EQF: 815 s.FPJump(b, next, &eqfJumps) 816 817 case ssa.Block386NEF: 818 s.FPJump(b, next, &nefJumps) 819 820 case ssa.Block386EQ, ssa.Block386NE, 821 ssa.Block386LT, ssa.Block386GE, 822 ssa.Block386LE, ssa.Block386GT, 823 ssa.Block386ULT, ssa.Block386UGT, 824 ssa.Block386ULE, ssa.Block386UGE: 825 jmp := blockJump[b.Kind] 826 var p *obj.Prog 827 switch next { 828 case b.Succs[0].Block(): 829 p = s.Prog(jmp.invasm) 830 p.To.Type = obj.TYPE_BRANCH 831 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 832 case b.Succs[1].Block(): 833 p = s.Prog(jmp.asm) 834 p.To.Type = obj.TYPE_BRANCH 835 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 836 default: 837 p = s.Prog(jmp.asm) 838 p.To.Type = obj.TYPE_BRANCH 839 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 840 q := s.Prog(obj.AJMP) 841 q.To.Type = obj.TYPE_BRANCH 842 s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) 843 } 844 845 default: 846 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 847 } 848 }