github.com/FenixAra/go@v0.0.0-20170127160404-96ea0918e670/src/cmd/compile/internal/x86/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package x86 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/internal/obj" 14 "cmd/internal/obj/x86" 15 ) 16 17 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 18 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 19 flive := b.FlagsLiveAtEnd 20 if b.Control != nil && b.Control.Type.IsFlags() { 21 flive = true 22 } 23 for i := len(b.Values) - 1; i >= 0; i-- { 24 v := b.Values[i] 25 if flive && v.Op == ssa.Op386MOVLconst { 26 // The "mark" is any non-nil Aux value. 27 v.Aux = v 28 } 29 if v.Type.IsFlags() { 30 flive = false 31 } 32 for _, a := range v.Args { 33 if a.Type.IsFlags() { 34 flive = true 35 } 36 } 37 } 38 } 39 40 // loadByType returns the load instruction of the given type. 41 func loadByType(t ssa.Type) obj.As { 42 // Avoid partial register write 43 if !t.IsFloat() && t.Size() <= 2 { 44 if t.Size() == 1 { 45 return x86.AMOVBLZX 46 } else { 47 return x86.AMOVWLZX 48 } 49 } 50 // Otherwise, there's no difference between load and store opcodes. 51 return storeByType(t) 52 } 53 54 // storeByType returns the store instruction of the given type. 55 func storeByType(t ssa.Type) obj.As { 56 width := t.Size() 57 if t.IsFloat() { 58 switch width { 59 case 4: 60 return x86.AMOVSS 61 case 8: 62 return x86.AMOVSD 63 } 64 } else { 65 switch width { 66 case 1: 67 return x86.AMOVB 68 case 2: 69 return x86.AMOVW 70 case 4: 71 return x86.AMOVL 72 } 73 } 74 panic("bad store type") 75 } 76 77 // moveByType returns the reg->reg move instruction of the given type. 78 func moveByType(t ssa.Type) obj.As { 79 if t.IsFloat() { 80 switch t.Size() { 81 case 4: 82 return x86.AMOVSS 83 case 8: 84 return x86.AMOVSD 85 default: 86 panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t)) 87 } 88 } else { 89 switch t.Size() { 90 case 1: 91 // Avoids partial register write 92 return x86.AMOVL 93 case 2: 94 return x86.AMOVL 95 case 4: 96 return x86.AMOVL 97 default: 98 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 99 } 100 } 101 } 102 103 // opregreg emits instructions for 104 // dest := dest(To) op src(From) 105 // and also returns the created obj.Prog so it 106 // may be further adjusted (offset, scale, etc). 107 func opregreg(op obj.As, dest, src int16) *obj.Prog { 108 p := gc.Prog(op) 109 p.From.Type = obj.TYPE_REG 110 p.To.Type = obj.TYPE_REG 111 p.To.Reg = dest 112 p.From.Reg = src 113 return p 114 } 115 116 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 117 s.SetLineno(v.Line) 118 119 if gc.Thearch.Use387 { 120 if ssaGenValue387(s, v) { 121 return // v was handled by 387 generation. 122 } 123 } 124 125 switch v.Op { 126 case ssa.Op386ADDL: 127 r := v.Reg() 128 r1 := v.Args[0].Reg() 129 r2 := v.Args[1].Reg() 130 switch { 131 case r == r1: 132 p := gc.Prog(v.Op.Asm()) 133 p.From.Type = obj.TYPE_REG 134 p.From.Reg = r2 135 p.To.Type = obj.TYPE_REG 136 p.To.Reg = r 137 case r == r2: 138 p := gc.Prog(v.Op.Asm()) 139 p.From.Type = obj.TYPE_REG 140 p.From.Reg = r1 141 p.To.Type = obj.TYPE_REG 142 p.To.Reg = r 143 default: 144 p := gc.Prog(x86.ALEAL) 145 p.From.Type = obj.TYPE_MEM 146 p.From.Reg = r1 147 p.From.Scale = 1 148 p.From.Index = r2 149 p.To.Type = obj.TYPE_REG 150 p.To.Reg = r 151 } 152 153 // 2-address opcode arithmetic 154 case ssa.Op386SUBL, 155 ssa.Op386MULL, 156 ssa.Op386ANDL, 157 ssa.Op386ORL, 158 ssa.Op386XORL, 159 ssa.Op386SHLL, 160 ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB, 161 ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB, 162 ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD, 163 ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD, 164 ssa.Op386PXOR, 165 ssa.Op386ADCL, 166 ssa.Op386SBBL: 167 r := v.Reg() 168 if r != v.Args[0].Reg() { 169 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 170 } 171 opregreg(v.Op.Asm(), r, v.Args[1].Reg()) 172 173 case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry: 174 // output 0 is carry/borrow, output 1 is the low 32 bits. 175 r := v.Reg0() 176 if r != v.Args[0].Reg() { 177 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 178 } 179 opregreg(v.Op.Asm(), r, v.Args[1].Reg()) 180 181 case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry: 182 // output 0 is carry/borrow, output 1 is the low 32 bits. 183 r := v.Reg0() 184 if r != v.Args[0].Reg() { 185 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 186 } 187 p := gc.Prog(v.Op.Asm()) 188 p.From.Type = obj.TYPE_CONST 189 p.From.Offset = v.AuxInt 190 p.To.Type = obj.TYPE_REG 191 p.To.Reg = r 192 193 case ssa.Op386DIVL, ssa.Op386DIVW, 194 ssa.Op386DIVLU, ssa.Op386DIVWU, 195 ssa.Op386MODL, ssa.Op386MODW, 196 ssa.Op386MODLU, ssa.Op386MODWU: 197 198 // Arg[0] is already in AX as it's the only register we allow 199 // and AX is the only output 200 x := v.Args[1].Reg() 201 202 // CPU faults upon signed overflow, which occurs when most 203 // negative int is divided by -1. 204 var j *obj.Prog 205 if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW || 206 v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW { 207 208 var c *obj.Prog 209 switch v.Op { 210 case ssa.Op386DIVL, ssa.Op386MODL: 211 c = gc.Prog(x86.ACMPL) 212 j = gc.Prog(x86.AJEQ) 213 gc.Prog(x86.ACDQ) //TODO: fix 214 215 case ssa.Op386DIVW, ssa.Op386MODW: 216 c = gc.Prog(x86.ACMPW) 217 j = gc.Prog(x86.AJEQ) 218 gc.Prog(x86.ACWD) 219 } 220 c.From.Type = obj.TYPE_REG 221 c.From.Reg = x 222 c.To.Type = obj.TYPE_CONST 223 c.To.Offset = -1 224 225 j.To.Type = obj.TYPE_BRANCH 226 } 227 228 // for unsigned ints, we sign extend by setting DX = 0 229 // signed ints were sign extended above 230 if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU || 231 v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU { 232 c := gc.Prog(x86.AXORL) 233 c.From.Type = obj.TYPE_REG 234 c.From.Reg = x86.REG_DX 235 c.To.Type = obj.TYPE_REG 236 c.To.Reg = x86.REG_DX 237 } 238 239 p := gc.Prog(v.Op.Asm()) 240 p.From.Type = obj.TYPE_REG 241 p.From.Reg = x 242 243 // signed division, rest of the check for -1 case 244 if j != nil { 245 j2 := gc.Prog(obj.AJMP) 246 j2.To.Type = obj.TYPE_BRANCH 247 248 var n *obj.Prog 249 if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW { 250 // n * -1 = -n 251 n = gc.Prog(x86.ANEGL) 252 n.To.Type = obj.TYPE_REG 253 n.To.Reg = x86.REG_AX 254 } else { 255 // n % -1 == 0 256 n = gc.Prog(x86.AXORL) 257 n.From.Type = obj.TYPE_REG 258 n.From.Reg = x86.REG_DX 259 n.To.Type = obj.TYPE_REG 260 n.To.Reg = x86.REG_DX 261 } 262 263 j.To.Val = n 264 j2.To.Val = s.Pc() 265 } 266 267 case ssa.Op386HMULL, ssa.Op386HMULW, ssa.Op386HMULB, 268 ssa.Op386HMULLU, ssa.Op386HMULWU, ssa.Op386HMULBU: 269 // the frontend rewrites constant division by 8/16/32 bit integers into 270 // HMUL by a constant 271 // SSA rewrites generate the 64 bit versions 272 273 // Arg[0] is already in AX as it's the only register we allow 274 // and DX is the only output we care about (the high bits) 275 p := gc.Prog(v.Op.Asm()) 276 p.From.Type = obj.TYPE_REG 277 p.From.Reg = v.Args[1].Reg() 278 279 // IMULB puts the high portion in AH instead of DL, 280 // so move it to DL for consistency 281 if v.Type.Size() == 1 { 282 m := gc.Prog(x86.AMOVB) 283 m.From.Type = obj.TYPE_REG 284 m.From.Reg = x86.REG_AH 285 m.To.Type = obj.TYPE_REG 286 m.To.Reg = x86.REG_DX 287 } 288 289 case ssa.Op386MULLQU: 290 // AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]). 291 p := gc.Prog(v.Op.Asm()) 292 p.From.Type = obj.TYPE_REG 293 p.From.Reg = v.Args[1].Reg() 294 295 case ssa.Op386ADDLconst: 296 r := v.Reg() 297 a := v.Args[0].Reg() 298 if r == a { 299 if v.AuxInt == 1 { 300 p := gc.Prog(x86.AINCL) 301 p.To.Type = obj.TYPE_REG 302 p.To.Reg = r 303 return 304 } 305 if v.AuxInt == -1 { 306 p := gc.Prog(x86.ADECL) 307 p.To.Type = obj.TYPE_REG 308 p.To.Reg = r 309 return 310 } 311 p := gc.Prog(v.Op.Asm()) 312 p.From.Type = obj.TYPE_CONST 313 p.From.Offset = v.AuxInt 314 p.To.Type = obj.TYPE_REG 315 p.To.Reg = r 316 return 317 } 318 p := gc.Prog(x86.ALEAL) 319 p.From.Type = obj.TYPE_MEM 320 p.From.Reg = a 321 p.From.Offset = v.AuxInt 322 p.To.Type = obj.TYPE_REG 323 p.To.Reg = r 324 325 case ssa.Op386MULLconst: 326 r := v.Reg() 327 if r != v.Args[0].Reg() { 328 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 329 } 330 p := gc.Prog(v.Op.Asm()) 331 p.From.Type = obj.TYPE_CONST 332 p.From.Offset = v.AuxInt 333 p.To.Type = obj.TYPE_REG 334 p.To.Reg = r 335 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 336 // then we don't need to use resultInArg0 for these ops. 337 //p.From3 = new(obj.Addr) 338 //p.From3.Type = obj.TYPE_REG 339 //p.From3.Reg = v.Args[0].Reg() 340 341 case ssa.Op386SUBLconst, 342 ssa.Op386ADCLconst, 343 ssa.Op386SBBLconst, 344 ssa.Op386ANDLconst, 345 ssa.Op386ORLconst, 346 ssa.Op386XORLconst, 347 ssa.Op386SHLLconst, 348 ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst, 349 ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst, 350 ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst: 351 r := v.Reg() 352 if r != v.Args[0].Reg() { 353 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 354 } 355 p := gc.Prog(v.Op.Asm()) 356 p.From.Type = obj.TYPE_CONST 357 p.From.Offset = v.AuxInt 358 p.To.Type = obj.TYPE_REG 359 p.To.Reg = r 360 case ssa.Op386SBBLcarrymask: 361 r := v.Reg() 362 p := gc.Prog(v.Op.Asm()) 363 p.From.Type = obj.TYPE_REG 364 p.From.Reg = r 365 p.To.Type = obj.TYPE_REG 366 p.To.Reg = r 367 case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8: 368 r := v.Args[0].Reg() 369 i := v.Args[1].Reg() 370 p := gc.Prog(x86.ALEAL) 371 switch v.Op { 372 case ssa.Op386LEAL1: 373 p.From.Scale = 1 374 if i == x86.REG_SP { 375 r, i = i, r 376 } 377 case ssa.Op386LEAL2: 378 p.From.Scale = 2 379 case ssa.Op386LEAL4: 380 p.From.Scale = 4 381 case ssa.Op386LEAL8: 382 p.From.Scale = 8 383 } 384 p.From.Type = obj.TYPE_MEM 385 p.From.Reg = r 386 p.From.Index = i 387 gc.AddAux(&p.From, v) 388 p.To.Type = obj.TYPE_REG 389 p.To.Reg = v.Reg() 390 case ssa.Op386LEAL: 391 p := gc.Prog(x86.ALEAL) 392 p.From.Type = obj.TYPE_MEM 393 p.From.Reg = v.Args[0].Reg() 394 gc.AddAux(&p.From, v) 395 p.To.Type = obj.TYPE_REG 396 p.To.Reg = v.Reg() 397 case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB, 398 ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB: 399 opregreg(v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 400 case ssa.Op386UCOMISS, ssa.Op386UCOMISD: 401 // Go assembler has swapped operands for UCOMISx relative to CMP, 402 // must account for that right here. 403 opregreg(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 404 case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst: 405 p := gc.Prog(v.Op.Asm()) 406 p.From.Type = obj.TYPE_REG 407 p.From.Reg = v.Args[0].Reg() 408 p.To.Type = obj.TYPE_CONST 409 p.To.Offset = v.AuxInt 410 case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst: 411 p := gc.Prog(v.Op.Asm()) 412 p.From.Type = obj.TYPE_CONST 413 p.From.Offset = v.AuxInt 414 p.To.Type = obj.TYPE_REG 415 p.To.Reg = v.Args[0].Reg() 416 case ssa.Op386MOVLconst: 417 x := v.Reg() 418 p := gc.Prog(v.Op.Asm()) 419 p.From.Type = obj.TYPE_CONST 420 p.From.Offset = v.AuxInt 421 p.To.Type = obj.TYPE_REG 422 p.To.Reg = x 423 // If flags are live at this instruction, suppress the 424 // MOV $0,AX -> XOR AX,AX optimization. 425 if v.Aux != nil { 426 p.Mark |= x86.PRESERVEFLAGS 427 } 428 case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst: 429 x := v.Reg() 430 p := gc.Prog(v.Op.Asm()) 431 p.From.Type = obj.TYPE_FCONST 432 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 433 p.To.Type = obj.TYPE_REG 434 p.To.Reg = x 435 case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1: 436 var literal string 437 if v.Op == ssa.Op386MOVSDconst1 { 438 literal = fmt.Sprintf("$f64.%016x", uint64(v.AuxInt)) 439 } else { 440 literal = fmt.Sprintf("$f32.%08x", math.Float32bits(float32(math.Float64frombits(uint64(v.AuxInt))))) 441 } 442 p := gc.Prog(x86.ALEAL) 443 p.From.Type = obj.TYPE_MEM 444 p.From.Name = obj.NAME_EXTERN 445 p.From.Sym = obj.Linklookup(gc.Ctxt, literal, 0) 446 p.From.Sym.Set(obj.AttrLocal, true) 447 p.To.Type = obj.TYPE_REG 448 p.To.Reg = v.Reg() 449 case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2: 450 p := gc.Prog(v.Op.Asm()) 451 p.From.Type = obj.TYPE_MEM 452 p.From.Reg = v.Args[0].Reg() 453 p.To.Type = obj.TYPE_REG 454 p.To.Reg = v.Reg() 455 456 case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload: 457 p := gc.Prog(v.Op.Asm()) 458 p.From.Type = obj.TYPE_MEM 459 p.From.Reg = v.Args[0].Reg() 460 gc.AddAux(&p.From, v) 461 p.To.Type = obj.TYPE_REG 462 p.To.Reg = v.Reg() 463 case ssa.Op386MOVSDloadidx8: 464 p := gc.Prog(v.Op.Asm()) 465 p.From.Type = obj.TYPE_MEM 466 p.From.Reg = v.Args[0].Reg() 467 gc.AddAux(&p.From, v) 468 p.From.Scale = 8 469 p.From.Index = v.Args[1].Reg() 470 p.To.Type = obj.TYPE_REG 471 p.To.Reg = v.Reg() 472 case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4: 473 p := gc.Prog(v.Op.Asm()) 474 p.From.Type = obj.TYPE_MEM 475 p.From.Reg = v.Args[0].Reg() 476 gc.AddAux(&p.From, v) 477 p.From.Scale = 4 478 p.From.Index = v.Args[1].Reg() 479 p.To.Type = obj.TYPE_REG 480 p.To.Reg = v.Reg() 481 case ssa.Op386MOVWloadidx2: 482 p := gc.Prog(v.Op.Asm()) 483 p.From.Type = obj.TYPE_MEM 484 p.From.Reg = v.Args[0].Reg() 485 gc.AddAux(&p.From, v) 486 p.From.Scale = 2 487 p.From.Index = v.Args[1].Reg() 488 p.To.Type = obj.TYPE_REG 489 p.To.Reg = v.Reg() 490 case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1: 491 r := v.Args[0].Reg() 492 i := v.Args[1].Reg() 493 if i == x86.REG_SP { 494 r, i = i, r 495 } 496 p := gc.Prog(v.Op.Asm()) 497 p.From.Type = obj.TYPE_MEM 498 p.From.Reg = r 499 p.From.Scale = 1 500 p.From.Index = i 501 gc.AddAux(&p.From, v) 502 p.To.Type = obj.TYPE_REG 503 p.To.Reg = v.Reg() 504 case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore: 505 p := gc.Prog(v.Op.Asm()) 506 p.From.Type = obj.TYPE_REG 507 p.From.Reg = v.Args[1].Reg() 508 p.To.Type = obj.TYPE_MEM 509 p.To.Reg = v.Args[0].Reg() 510 gc.AddAux(&p.To, v) 511 case ssa.Op386MOVSDstoreidx8: 512 p := gc.Prog(v.Op.Asm()) 513 p.From.Type = obj.TYPE_REG 514 p.From.Reg = v.Args[2].Reg() 515 p.To.Type = obj.TYPE_MEM 516 p.To.Reg = v.Args[0].Reg() 517 p.To.Scale = 8 518 p.To.Index = v.Args[1].Reg() 519 gc.AddAux(&p.To, v) 520 case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4: 521 p := gc.Prog(v.Op.Asm()) 522 p.From.Type = obj.TYPE_REG 523 p.From.Reg = v.Args[2].Reg() 524 p.To.Type = obj.TYPE_MEM 525 p.To.Reg = v.Args[0].Reg() 526 p.To.Scale = 4 527 p.To.Index = v.Args[1].Reg() 528 gc.AddAux(&p.To, v) 529 case ssa.Op386MOVWstoreidx2: 530 p := gc.Prog(v.Op.Asm()) 531 p.From.Type = obj.TYPE_REG 532 p.From.Reg = v.Args[2].Reg() 533 p.To.Type = obj.TYPE_MEM 534 p.To.Reg = v.Args[0].Reg() 535 p.To.Scale = 2 536 p.To.Index = v.Args[1].Reg() 537 gc.AddAux(&p.To, v) 538 case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1: 539 r := v.Args[0].Reg() 540 i := v.Args[1].Reg() 541 if i == x86.REG_SP { 542 r, i = i, r 543 } 544 p := gc.Prog(v.Op.Asm()) 545 p.From.Type = obj.TYPE_REG 546 p.From.Reg = v.Args[2].Reg() 547 p.To.Type = obj.TYPE_MEM 548 p.To.Reg = r 549 p.To.Scale = 1 550 p.To.Index = i 551 gc.AddAux(&p.To, v) 552 case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst: 553 p := gc.Prog(v.Op.Asm()) 554 p.From.Type = obj.TYPE_CONST 555 sc := v.AuxValAndOff() 556 p.From.Offset = sc.Val() 557 p.To.Type = obj.TYPE_MEM 558 p.To.Reg = v.Args[0].Reg() 559 gc.AddAux2(&p.To, v, sc.Off()) 560 case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1: 561 p := gc.Prog(v.Op.Asm()) 562 p.From.Type = obj.TYPE_CONST 563 sc := v.AuxValAndOff() 564 p.From.Offset = sc.Val() 565 r := v.Args[0].Reg() 566 i := v.Args[1].Reg() 567 switch v.Op { 568 case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1: 569 p.To.Scale = 1 570 if i == x86.REG_SP { 571 r, i = i, r 572 } 573 case ssa.Op386MOVWstoreconstidx2: 574 p.To.Scale = 2 575 case ssa.Op386MOVLstoreconstidx4: 576 p.To.Scale = 4 577 } 578 p.To.Type = obj.TYPE_MEM 579 p.To.Reg = r 580 p.To.Index = i 581 gc.AddAux2(&p.To, v, sc.Off()) 582 case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX, 583 ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD, 584 ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL, 585 ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS: 586 opregreg(v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 587 case ssa.Op386DUFFZERO: 588 p := gc.Prog(obj.ADUFFZERO) 589 p.To.Type = obj.TYPE_ADDR 590 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 591 p.To.Offset = v.AuxInt 592 case ssa.Op386DUFFCOPY: 593 p := gc.Prog(obj.ADUFFCOPY) 594 p.To.Type = obj.TYPE_ADDR 595 p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) 596 p.To.Offset = v.AuxInt 597 598 case ssa.OpCopy, ssa.Op386MOVLconvert: // TODO: use MOVLreg for reg->reg copies instead of OpCopy? 599 if v.Type.IsMemory() { 600 return 601 } 602 x := v.Args[0].Reg() 603 y := v.Reg() 604 if x != y { 605 opregreg(moveByType(v.Type), y, x) 606 } 607 case ssa.OpLoadReg: 608 if v.Type.IsFlags() { 609 v.Fatalf("load flags not implemented: %v", v.LongString()) 610 return 611 } 612 p := gc.Prog(loadByType(v.Type)) 613 gc.AddrAuto(&p.From, v.Args[0]) 614 p.To.Type = obj.TYPE_REG 615 p.To.Reg = v.Reg() 616 617 case ssa.OpStoreReg: 618 if v.Type.IsFlags() { 619 v.Fatalf("store flags not implemented: %v", v.LongString()) 620 return 621 } 622 p := gc.Prog(storeByType(v.Type)) 623 p.From.Type = obj.TYPE_REG 624 p.From.Reg = v.Args[0].Reg() 625 gc.AddrAuto(&p.To, v) 626 case ssa.OpPhi: 627 gc.CheckLoweredPhi(v) 628 case ssa.OpInitMem: 629 // memory arg needs no code 630 case ssa.OpArg: 631 // input args need no code 632 case ssa.Op386LoweredGetClosurePtr: 633 // Closure pointer is DX. 634 gc.CheckLoweredGetClosurePtr(v) 635 case ssa.Op386LoweredGetG: 636 r := v.Reg() 637 // See the comments in cmd/internal/obj/x86/obj6.go 638 // near CanUse1InsnTLS for a detailed explanation of these instructions. 639 if x86.CanUse1InsnTLS(gc.Ctxt) { 640 // MOVL (TLS), r 641 p := gc.Prog(x86.AMOVL) 642 p.From.Type = obj.TYPE_MEM 643 p.From.Reg = x86.REG_TLS 644 p.To.Type = obj.TYPE_REG 645 p.To.Reg = r 646 } else { 647 // MOVL TLS, r 648 // MOVL (r)(TLS*1), r 649 p := gc.Prog(x86.AMOVL) 650 p.From.Type = obj.TYPE_REG 651 p.From.Reg = x86.REG_TLS 652 p.To.Type = obj.TYPE_REG 653 p.To.Reg = r 654 q := gc.Prog(x86.AMOVL) 655 q.From.Type = obj.TYPE_MEM 656 q.From.Reg = r 657 q.From.Index = x86.REG_TLS 658 q.From.Scale = 1 659 q.To.Type = obj.TYPE_REG 660 q.To.Reg = r 661 } 662 case ssa.Op386CALLstatic: 663 if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { 664 // Deferred calls will appear to be returning to 665 // the CALL deferreturn(SB) that we are about to emit. 666 // However, the stack trace code will show the line 667 // of the instruction byte before the return PC. 668 // To avoid that being an unrelated instruction, 669 // insert an actual hardware NOP that will have the right line number. 670 // This is different from obj.ANOP, which is a virtual no-op 671 // that doesn't make it into the instruction stream. 672 ginsnop() 673 } 674 p := gc.Prog(obj.ACALL) 675 p.To.Type = obj.TYPE_MEM 676 p.To.Name = obj.NAME_EXTERN 677 p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) 678 if gc.Maxarg < v.AuxInt { 679 gc.Maxarg = v.AuxInt 680 } 681 case ssa.Op386CALLclosure: 682 p := gc.Prog(obj.ACALL) 683 p.To.Type = obj.TYPE_REG 684 p.To.Reg = v.Args[0].Reg() 685 if gc.Maxarg < v.AuxInt { 686 gc.Maxarg = v.AuxInt 687 } 688 case ssa.Op386CALLdefer: 689 p := gc.Prog(obj.ACALL) 690 p.To.Type = obj.TYPE_MEM 691 p.To.Name = obj.NAME_EXTERN 692 p.To.Sym = gc.Linksym(gc.Deferproc.Sym) 693 if gc.Maxarg < v.AuxInt { 694 gc.Maxarg = v.AuxInt 695 } 696 case ssa.Op386CALLgo: 697 p := gc.Prog(obj.ACALL) 698 p.To.Type = obj.TYPE_MEM 699 p.To.Name = obj.NAME_EXTERN 700 p.To.Sym = gc.Linksym(gc.Newproc.Sym) 701 if gc.Maxarg < v.AuxInt { 702 gc.Maxarg = v.AuxInt 703 } 704 case ssa.Op386CALLinter: 705 p := gc.Prog(obj.ACALL) 706 p.To.Type = obj.TYPE_REG 707 p.To.Reg = v.Args[0].Reg() 708 if gc.Maxarg < v.AuxInt { 709 gc.Maxarg = v.AuxInt 710 } 711 case ssa.Op386NEGL, 712 ssa.Op386BSWAPL, 713 ssa.Op386NOTL: 714 r := v.Reg() 715 if r != v.Args[0].Reg() { 716 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 717 } 718 p := gc.Prog(v.Op.Asm()) 719 p.To.Type = obj.TYPE_REG 720 p.To.Reg = r 721 case ssa.Op386BSFL, ssa.Op386BSFW, 722 ssa.Op386BSRL, ssa.Op386BSRW, 723 ssa.Op386SQRTSD: 724 p := gc.Prog(v.Op.Asm()) 725 p.From.Type = obj.TYPE_REG 726 p.From.Reg = v.Args[0].Reg() 727 p.To.Type = obj.TYPE_REG 728 p.To.Reg = v.Reg() 729 case ssa.OpSP, ssa.OpSB, ssa.OpSelect0, ssa.OpSelect1: 730 // nothing to do 731 case ssa.Op386SETEQ, ssa.Op386SETNE, 732 ssa.Op386SETL, ssa.Op386SETLE, 733 ssa.Op386SETG, ssa.Op386SETGE, 734 ssa.Op386SETGF, ssa.Op386SETGEF, 735 ssa.Op386SETB, ssa.Op386SETBE, 736 ssa.Op386SETORD, ssa.Op386SETNAN, 737 ssa.Op386SETA, ssa.Op386SETAE: 738 p := gc.Prog(v.Op.Asm()) 739 p.To.Type = obj.TYPE_REG 740 p.To.Reg = v.Reg() 741 742 case ssa.Op386SETNEF: 743 p := gc.Prog(v.Op.Asm()) 744 p.To.Type = obj.TYPE_REG 745 p.To.Reg = v.Reg() 746 q := gc.Prog(x86.ASETPS) 747 q.To.Type = obj.TYPE_REG 748 q.To.Reg = x86.REG_AX 749 opregreg(x86.AORL, v.Reg(), x86.REG_AX) 750 751 case ssa.Op386SETEQF: 752 p := gc.Prog(v.Op.Asm()) 753 p.To.Type = obj.TYPE_REG 754 p.To.Reg = v.Reg() 755 q := gc.Prog(x86.ASETPC) 756 q.To.Type = obj.TYPE_REG 757 q.To.Reg = x86.REG_AX 758 opregreg(x86.AANDL, v.Reg(), x86.REG_AX) 759 760 case ssa.Op386InvertFlags: 761 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 762 case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT: 763 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 764 case ssa.Op386REPSTOSL: 765 gc.Prog(x86.AREP) 766 gc.Prog(x86.ASTOSL) 767 case ssa.Op386REPMOVSL: 768 gc.Prog(x86.AREP) 769 gc.Prog(x86.AMOVSL) 770 case ssa.OpVarDef: 771 gc.Gvardef(v.Aux.(*gc.Node)) 772 case ssa.OpVarKill: 773 gc.Gvarkill(v.Aux.(*gc.Node)) 774 case ssa.OpVarLive: 775 gc.Gvarlive(v.Aux.(*gc.Node)) 776 case ssa.OpKeepAlive: 777 gc.KeepAlive(v) 778 case ssa.Op386LoweredNilCheck: 779 // Issue a load which will fault if the input is nil. 780 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 781 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 782 // but it doesn't have false dependency on AX. 783 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 784 // That trades clobbering flags for clobbering a register. 785 p := gc.Prog(x86.ATESTB) 786 p.From.Type = obj.TYPE_REG 787 p.From.Reg = x86.REG_AX 788 p.To.Type = obj.TYPE_MEM 789 p.To.Reg = v.Args[0].Reg() 790 gc.AddAux(&p.To, v) 791 if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers 792 gc.Warnl(v.Line, "generated nil check") 793 } 794 case ssa.Op386FCHS: 795 v.Fatalf("FCHS in non-387 mode") 796 default: 797 v.Fatalf("genValue not implemented: %s", v.LongString()) 798 } 799 } 800 801 var blockJump = [...]struct { 802 asm, invasm obj.As 803 }{ 804 ssa.Block386EQ: {x86.AJEQ, x86.AJNE}, 805 ssa.Block386NE: {x86.AJNE, x86.AJEQ}, 806 ssa.Block386LT: {x86.AJLT, x86.AJGE}, 807 ssa.Block386GE: {x86.AJGE, x86.AJLT}, 808 ssa.Block386LE: {x86.AJLE, x86.AJGT}, 809 ssa.Block386GT: {x86.AJGT, x86.AJLE}, 810 ssa.Block386ULT: {x86.AJCS, x86.AJCC}, 811 ssa.Block386UGE: {x86.AJCC, x86.AJCS}, 812 ssa.Block386UGT: {x86.AJHI, x86.AJLS}, 813 ssa.Block386ULE: {x86.AJLS, x86.AJHI}, 814 ssa.Block386ORD: {x86.AJPC, x86.AJPS}, 815 ssa.Block386NAN: {x86.AJPS, x86.AJPC}, 816 } 817 818 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 819 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 820 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 821 } 822 var nefJumps = [2][2]gc.FloatingEQNEJump{ 823 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 824 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 825 } 826 827 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 828 s.SetLineno(b.Line) 829 830 if gc.Thearch.Use387 { 831 // Empty the 387's FP stack before the block ends. 832 flush387(s) 833 } 834 835 switch b.Kind { 836 case ssa.BlockPlain: 837 if b.Succs[0].Block() != next { 838 p := gc.Prog(obj.AJMP) 839 p.To.Type = obj.TYPE_BRANCH 840 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 841 } 842 case ssa.BlockDefer: 843 // defer returns in rax: 844 // 0 if we should continue executing 845 // 1 if we should jump to deferreturn call 846 p := gc.Prog(x86.ATESTL) 847 p.From.Type = obj.TYPE_REG 848 p.From.Reg = x86.REG_AX 849 p.To.Type = obj.TYPE_REG 850 p.To.Reg = x86.REG_AX 851 p = gc.Prog(x86.AJNE) 852 p.To.Type = obj.TYPE_BRANCH 853 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 854 if b.Succs[0].Block() != next { 855 p := gc.Prog(obj.AJMP) 856 p.To.Type = obj.TYPE_BRANCH 857 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 858 } 859 case ssa.BlockExit: 860 gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here 861 case ssa.BlockRet: 862 gc.Prog(obj.ARET) 863 case ssa.BlockRetJmp: 864 p := gc.Prog(obj.AJMP) 865 p.To.Type = obj.TYPE_MEM 866 p.To.Name = obj.NAME_EXTERN 867 p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) 868 869 case ssa.Block386EQF: 870 gc.SSAGenFPJump(s, b, next, &eqfJumps) 871 872 case ssa.Block386NEF: 873 gc.SSAGenFPJump(s, b, next, &nefJumps) 874 875 case ssa.Block386EQ, ssa.Block386NE, 876 ssa.Block386LT, ssa.Block386GE, 877 ssa.Block386LE, ssa.Block386GT, 878 ssa.Block386ULT, ssa.Block386UGT, 879 ssa.Block386ULE, ssa.Block386UGE: 880 jmp := blockJump[b.Kind] 881 likely := b.Likely 882 var p *obj.Prog 883 switch next { 884 case b.Succs[0].Block(): 885 p = gc.Prog(jmp.invasm) 886 likely *= -1 887 p.To.Type = obj.TYPE_BRANCH 888 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 889 case b.Succs[1].Block(): 890 p = gc.Prog(jmp.asm) 891 p.To.Type = obj.TYPE_BRANCH 892 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 893 default: 894 p = gc.Prog(jmp.asm) 895 p.To.Type = obj.TYPE_BRANCH 896 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 897 q := gc.Prog(obj.AJMP) 898 q.To.Type = obj.TYPE_BRANCH 899 s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) 900 } 901 902 // liblink reorders the instruction stream as it sees fit. 903 // Pass along what we know so liblink can make use of it. 904 // TODO: Once we've fully switched to SSA, 905 // make liblink leave our output alone. 906 switch likely { 907 case ssa.BranchUnlikely: 908 p.From.Type = obj.TYPE_CONST 909 p.From.Offset = 0 910 case ssa.BranchLikely: 911 p.From.Type = obj.TYPE_CONST 912 p.From.Offset = 1 913 } 914 915 default: 916 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 917 } 918 }