github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/compile/internal/types" 14 "cmd/internal/obj" 15 "cmd/internal/obj/x86" 16 ) 17 18 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 19 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 20 flive := b.FlagsLiveAtEnd 21 if b.Control != nil && b.Control.Type.IsFlags() { 22 flive = true 23 } 24 for i := len(b.Values) - 1; i >= 0; i-- { 25 v := b.Values[i] 26 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 27 // The "mark" is any non-nil Aux value. 28 v.Aux = v 29 } 30 if v.Type.IsFlags() { 31 flive = false 32 } 33 for _, a := range v.Args { 34 if a.Type.IsFlags() { 35 flive = true 36 } 37 } 38 } 39 } 40 41 // loadByType returns the load instruction of the given type. 42 func loadByType(t *types.Type) obj.As { 43 // Avoid partial register write 44 if !t.IsFloat() && t.Size() <= 2 { 45 if t.Size() == 1 { 46 return x86.AMOVBLZX 47 } else { 48 return x86.AMOVWLZX 49 } 50 } 51 // Otherwise, there's no difference between load and store opcodes. 52 return storeByType(t) 53 } 54 55 // storeByType returns the store instruction of the given type. 56 func storeByType(t *types.Type) obj.As { 57 width := t.Size() 58 if t.IsFloat() { 59 switch width { 60 case 4: 61 return x86.AMOVSS 62 case 8: 63 return x86.AMOVSD 64 } 65 } else { 66 switch width { 67 case 1: 68 return x86.AMOVB 69 case 2: 70 return x86.AMOVW 71 case 4: 72 return x86.AMOVL 73 case 8: 74 return x86.AMOVQ 75 } 76 } 77 panic("bad store type") 78 } 79 80 // moveByType returns the reg->reg move instruction of the given type. 81 func moveByType(t *types.Type) obj.As { 82 if t.IsFloat() { 83 // Moving the whole sse2 register is faster 84 // than moving just the correct low portion of it. 85 // There is no xmm->xmm move with 1 byte opcode, 86 // so use movups, which has 2 byte opcode. 87 return x86.AMOVUPS 88 } else { 89 switch t.Size() { 90 case 1: 91 // Avoids partial register write 92 return x86.AMOVL 93 case 2: 94 return x86.AMOVL 95 case 4: 96 return x86.AMOVL 97 case 8: 98 return x86.AMOVQ 99 case 16: 100 return x86.AMOVUPS // int128s are in SSE registers 101 default: 102 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 103 } 104 } 105 } 106 107 // opregreg emits instructions for 108 // dest := dest(To) op src(From) 109 // and also returns the created obj.Prog so it 110 // may be further adjusted (offset, scale, etc). 111 func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog { 112 p := s.Prog(op) 113 p.From.Type = obj.TYPE_REG 114 p.To.Type = obj.TYPE_REG 115 p.To.Reg = dest 116 p.From.Reg = src 117 return p 118 } 119 120 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 121 // See runtime/mkduff.go. 122 func duffStart(size int64) int64 { 123 x, _ := duff(size) 124 return x 125 } 126 func duffAdj(size int64) int64 { 127 _, x := duff(size) 128 return x 129 } 130 131 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 132 // required to use the duffzero mechanism for a block of the given size. 133 func duff(size int64) (int64, int64) { 134 if size < 32 || size > 1024 || size%dzClearStep != 0 { 135 panic("bad duffzero size") 136 } 137 steps := size / dzClearStep 138 blocks := steps / dzBlockLen 139 steps %= dzBlockLen 140 off := dzBlockSize * (dzBlocks - blocks) 141 var adj int64 142 if steps != 0 { 143 off -= dzLeaqSize 144 off -= dzMovSize * steps 145 adj -= dzClearStep * (dzBlockLen - steps) 146 } 147 return off, adj 148 } 149 150 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 151 switch v.Op { 152 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 153 r := v.Reg() 154 r1 := v.Args[0].Reg() 155 r2 := v.Args[1].Reg() 156 switch { 157 case r == r1: 158 p := s.Prog(v.Op.Asm()) 159 p.From.Type = obj.TYPE_REG 160 p.From.Reg = r2 161 p.To.Type = obj.TYPE_REG 162 p.To.Reg = r 163 case r == r2: 164 p := s.Prog(v.Op.Asm()) 165 p.From.Type = obj.TYPE_REG 166 p.From.Reg = r1 167 p.To.Type = obj.TYPE_REG 168 p.To.Reg = r 169 default: 170 var asm obj.As 171 if v.Op == ssa.OpAMD64ADDQ { 172 asm = x86.ALEAQ 173 } else { 174 asm = x86.ALEAL 175 } 176 p := s.Prog(asm) 177 p.From.Type = obj.TYPE_MEM 178 p.From.Reg = r1 179 p.From.Scale = 1 180 p.From.Index = r2 181 p.To.Type = obj.TYPE_REG 182 p.To.Reg = r 183 } 184 // 2-address opcode arithmetic 185 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 186 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 187 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 188 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 189 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 190 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 191 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 192 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 193 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, 194 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, 195 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 196 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 197 ssa.OpAMD64PXOR: 198 r := v.Reg() 199 if r != v.Args[0].Reg() { 200 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 201 } 202 opregreg(s, v.Op.Asm(), r, v.Args[1].Reg()) 203 204 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 205 // Arg[0] (the dividend) is in AX. 206 // Arg[1] (the divisor) can be in any other register. 207 // Result[0] (the quotient) is in AX. 208 // Result[1] (the remainder) is in DX. 209 r := v.Args[1].Reg() 210 211 // Zero extend dividend. 212 c := s.Prog(x86.AXORL) 213 c.From.Type = obj.TYPE_REG 214 c.From.Reg = x86.REG_DX 215 c.To.Type = obj.TYPE_REG 216 c.To.Reg = x86.REG_DX 217 218 // Issue divide. 219 p := s.Prog(v.Op.Asm()) 220 p.From.Type = obj.TYPE_REG 221 p.From.Reg = r 222 223 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 224 // Arg[0] (the dividend) is in AX. 225 // Arg[1] (the divisor) can be in any other register. 226 // Result[0] (the quotient) is in AX. 227 // Result[1] (the remainder) is in DX. 228 r := v.Args[1].Reg() 229 230 // CPU faults upon signed overflow, which occurs when the most 231 // negative int is divided by -1. Handle divide by -1 as a special case. 232 var c *obj.Prog 233 switch v.Op { 234 case ssa.OpAMD64DIVQ: 235 c = s.Prog(x86.ACMPQ) 236 case ssa.OpAMD64DIVL: 237 c = s.Prog(x86.ACMPL) 238 case ssa.OpAMD64DIVW: 239 c = s.Prog(x86.ACMPW) 240 } 241 c.From.Type = obj.TYPE_REG 242 c.From.Reg = r 243 c.To.Type = obj.TYPE_CONST 244 c.To.Offset = -1 245 j1 := s.Prog(x86.AJEQ) 246 j1.To.Type = obj.TYPE_BRANCH 247 248 // Sign extend dividend. 249 switch v.Op { 250 case ssa.OpAMD64DIVQ: 251 s.Prog(x86.ACQO) 252 case ssa.OpAMD64DIVL: 253 s.Prog(x86.ACDQ) 254 case ssa.OpAMD64DIVW: 255 s.Prog(x86.ACWD) 256 } 257 258 // Issue divide. 259 p := s.Prog(v.Op.Asm()) 260 p.From.Type = obj.TYPE_REG 261 p.From.Reg = r 262 263 // Skip over -1 fixup code. 264 j2 := s.Prog(obj.AJMP) 265 j2.To.Type = obj.TYPE_BRANCH 266 267 // Issue -1 fixup code. 268 // n / -1 = -n 269 n1 := s.Prog(x86.ANEGQ) 270 n1.To.Type = obj.TYPE_REG 271 n1.To.Reg = x86.REG_AX 272 273 // n % -1 == 0 274 n2 := s.Prog(x86.AXORL) 275 n2.From.Type = obj.TYPE_REG 276 n2.From.Reg = x86.REG_DX 277 n2.To.Type = obj.TYPE_REG 278 n2.To.Reg = x86.REG_DX 279 280 // TODO(khr): issue only the -1 fixup code we need. 281 // For instance, if only the quotient is used, no point in zeroing the remainder. 282 283 j1.To.Val = n1 284 j2.To.Val = s.Pc() 285 286 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 287 // the frontend rewrites constant division by 8/16/32 bit integers into 288 // HMUL by a constant 289 // SSA rewrites generate the 64 bit versions 290 291 // Arg[0] is already in AX as it's the only register we allow 292 // and DX is the only output we care about (the high bits) 293 p := s.Prog(v.Op.Asm()) 294 p.From.Type = obj.TYPE_REG 295 p.From.Reg = v.Args[1].Reg() 296 297 // IMULB puts the high portion in AH instead of DL, 298 // so move it to DL for consistency 299 if v.Type.Size() == 1 { 300 m := s.Prog(x86.AMOVB) 301 m.From.Type = obj.TYPE_REG 302 m.From.Reg = x86.REG_AH 303 m.To.Type = obj.TYPE_REG 304 m.To.Reg = x86.REG_DX 305 } 306 307 case ssa.OpAMD64MULQU2: 308 // Arg[0] is already in AX as it's the only register we allow 309 // results hi in DX, lo in AX 310 p := s.Prog(v.Op.Asm()) 311 p.From.Type = obj.TYPE_REG 312 p.From.Reg = v.Args[1].Reg() 313 314 case ssa.OpAMD64DIVQU2: 315 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 316 // results q in AX, r in DX 317 p := s.Prog(v.Op.Asm()) 318 p.From.Type = obj.TYPE_REG 319 p.From.Reg = v.Args[2].Reg() 320 321 case ssa.OpAMD64AVGQU: 322 // compute (x+y)/2 unsigned. 323 // Do a 64-bit add, the overflow goes into the carry. 324 // Shift right once and pull the carry back into the 63rd bit. 325 r := v.Reg() 326 if r != v.Args[0].Reg() { 327 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 328 } 329 p := s.Prog(x86.AADDQ) 330 p.From.Type = obj.TYPE_REG 331 p.To.Type = obj.TYPE_REG 332 p.To.Reg = r 333 p.From.Reg = v.Args[1].Reg() 334 p = s.Prog(x86.ARCRQ) 335 p.From.Type = obj.TYPE_CONST 336 p.From.Offset = 1 337 p.To.Type = obj.TYPE_REG 338 p.To.Reg = r 339 340 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 341 r := v.Reg() 342 a := v.Args[0].Reg() 343 if r == a { 344 if v.AuxInt == 1 { 345 var asm obj.As 346 // Software optimization manual recommends add $1,reg. 347 // But inc/dec is 1 byte smaller. ICC always uses inc 348 // Clang/GCC choose depending on flags, but prefer add. 349 // Experiments show that inc/dec is both a little faster 350 // and make a binary a little smaller. 351 if v.Op == ssa.OpAMD64ADDQconst { 352 asm = x86.AINCQ 353 } else { 354 asm = x86.AINCL 355 } 356 p := s.Prog(asm) 357 p.To.Type = obj.TYPE_REG 358 p.To.Reg = r 359 return 360 } 361 if v.AuxInt == -1 { 362 var asm obj.As 363 if v.Op == ssa.OpAMD64ADDQconst { 364 asm = x86.ADECQ 365 } else { 366 asm = x86.ADECL 367 } 368 p := s.Prog(asm) 369 p.To.Type = obj.TYPE_REG 370 p.To.Reg = r 371 return 372 } 373 p := s.Prog(v.Op.Asm()) 374 p.From.Type = obj.TYPE_CONST 375 p.From.Offset = v.AuxInt 376 p.To.Type = obj.TYPE_REG 377 p.To.Reg = r 378 return 379 } 380 var asm obj.As 381 if v.Op == ssa.OpAMD64ADDQconst { 382 asm = x86.ALEAQ 383 } else { 384 asm = x86.ALEAL 385 } 386 p := s.Prog(asm) 387 p.From.Type = obj.TYPE_MEM 388 p.From.Reg = a 389 p.From.Offset = v.AuxInt 390 p.To.Type = obj.TYPE_REG 391 p.To.Reg = r 392 393 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ: 394 r := v.Reg() 395 if r != v.Args[0].Reg() { 396 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 397 } 398 p := s.Prog(v.Op.Asm()) 399 p.From.Type = obj.TYPE_REG 400 p.From.Reg = v.Args[1].Reg() 401 p.To.Type = obj.TYPE_REG 402 p.To.Reg = r 403 404 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 405 r := v.Reg() 406 if r != v.Args[0].Reg() { 407 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 408 } 409 p := s.Prog(v.Op.Asm()) 410 p.From.Type = obj.TYPE_CONST 411 p.From.Offset = v.AuxInt 412 p.To.Type = obj.TYPE_REG 413 p.To.Reg = r 414 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 415 // then we don't need to use resultInArg0 for these ops. 416 //p.From3 = new(obj.Addr) 417 //p.From3.Type = obj.TYPE_REG 418 //p.From3.Reg = v.Args[0].Reg() 419 420 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 421 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 422 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 423 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 424 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 425 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 426 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 427 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 428 r := v.Reg() 429 if r != v.Args[0].Reg() { 430 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 431 } 432 p := s.Prog(v.Op.Asm()) 433 p.From.Type = obj.TYPE_CONST 434 p.From.Offset = v.AuxInt 435 p.To.Type = obj.TYPE_REG 436 p.To.Reg = r 437 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 438 r := v.Reg() 439 p := s.Prog(v.Op.Asm()) 440 p.From.Type = obj.TYPE_REG 441 p.From.Reg = r 442 p.To.Type = obj.TYPE_REG 443 p.To.Reg = r 444 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 445 r := v.Args[0].Reg() 446 i := v.Args[1].Reg() 447 p := s.Prog(x86.ALEAQ) 448 switch v.Op { 449 case ssa.OpAMD64LEAQ1: 450 p.From.Scale = 1 451 if i == x86.REG_SP { 452 r, i = i, r 453 } 454 case ssa.OpAMD64LEAQ2: 455 p.From.Scale = 2 456 case ssa.OpAMD64LEAQ4: 457 p.From.Scale = 4 458 case ssa.OpAMD64LEAQ8: 459 p.From.Scale = 8 460 } 461 p.From.Type = obj.TYPE_MEM 462 p.From.Reg = r 463 p.From.Index = i 464 gc.AddAux(&p.From, v) 465 p.To.Type = obj.TYPE_REG 466 p.To.Reg = v.Reg() 467 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL: 468 p := s.Prog(v.Op.Asm()) 469 p.From.Type = obj.TYPE_MEM 470 p.From.Reg = v.Args[0].Reg() 471 gc.AddAux(&p.From, v) 472 p.To.Type = obj.TYPE_REG 473 p.To.Reg = v.Reg() 474 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 475 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 476 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 477 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 478 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 479 // Go assembler has swapped operands for UCOMISx relative to CMP, 480 // must account for that right here. 481 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 482 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 483 p := s.Prog(v.Op.Asm()) 484 p.From.Type = obj.TYPE_REG 485 p.From.Reg = v.Args[0].Reg() 486 p.To.Type = obj.TYPE_CONST 487 p.To.Offset = v.AuxInt 488 case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 489 ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst: 490 p := s.Prog(v.Op.Asm()) 491 p.From.Type = obj.TYPE_CONST 492 p.From.Offset = v.AuxInt 493 p.To.Type = obj.TYPE_REG 494 p.To.Reg = v.Args[0].Reg() 495 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 496 x := v.Reg() 497 498 // If flags aren't live (indicated by v.Aux == nil), 499 // then we can rewrite MOV $0, AX into XOR AX, AX. 500 if v.AuxInt == 0 && v.Aux == nil { 501 p := s.Prog(x86.AXORL) 502 p.From.Type = obj.TYPE_REG 503 p.From.Reg = x 504 p.To.Type = obj.TYPE_REG 505 p.To.Reg = x 506 break 507 } 508 509 asm := v.Op.Asm() 510 // Use MOVL to move a small constant into a register 511 // when the constant is positive and fits into 32 bits. 512 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 513 // The upper 32bit are zeroed automatically when using MOVL. 514 asm = x86.AMOVL 515 } 516 p := s.Prog(asm) 517 p.From.Type = obj.TYPE_CONST 518 p.From.Offset = v.AuxInt 519 p.To.Type = obj.TYPE_REG 520 p.To.Reg = x 521 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 522 x := v.Reg() 523 p := s.Prog(v.Op.Asm()) 524 p.From.Type = obj.TYPE_FCONST 525 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 526 p.To.Type = obj.TYPE_REG 527 p.To.Reg = x 528 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 529 p := s.Prog(v.Op.Asm()) 530 p.From.Type = obj.TYPE_MEM 531 p.From.Reg = v.Args[0].Reg() 532 gc.AddAux(&p.From, v) 533 p.To.Type = obj.TYPE_REG 534 p.To.Reg = v.Reg() 535 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8: 536 p := s.Prog(v.Op.Asm()) 537 p.From.Type = obj.TYPE_MEM 538 p.From.Reg = v.Args[0].Reg() 539 gc.AddAux(&p.From, v) 540 p.From.Scale = 8 541 p.From.Index = v.Args[1].Reg() 542 p.To.Type = obj.TYPE_REG 543 p.To.Reg = v.Reg() 544 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 545 p := s.Prog(v.Op.Asm()) 546 p.From.Type = obj.TYPE_MEM 547 p.From.Reg = v.Args[0].Reg() 548 gc.AddAux(&p.From, v) 549 p.From.Scale = 4 550 p.From.Index = v.Args[1].Reg() 551 p.To.Type = obj.TYPE_REG 552 p.To.Reg = v.Reg() 553 case ssa.OpAMD64MOVWloadidx2: 554 p := s.Prog(v.Op.Asm()) 555 p.From.Type = obj.TYPE_MEM 556 p.From.Reg = v.Args[0].Reg() 557 gc.AddAux(&p.From, v) 558 p.From.Scale = 2 559 p.From.Index = v.Args[1].Reg() 560 p.To.Type = obj.TYPE_REG 561 p.To.Reg = v.Reg() 562 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 563 r := v.Args[0].Reg() 564 i := v.Args[1].Reg() 565 if i == x86.REG_SP { 566 r, i = i, r 567 } 568 p := s.Prog(v.Op.Asm()) 569 p.From.Type = obj.TYPE_MEM 570 p.From.Reg = r 571 p.From.Scale = 1 572 p.From.Index = i 573 gc.AddAux(&p.From, v) 574 p.To.Type = obj.TYPE_REG 575 p.To.Reg = v.Reg() 576 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: 577 p := s.Prog(v.Op.Asm()) 578 p.From.Type = obj.TYPE_REG 579 p.From.Reg = v.Args[1].Reg() 580 p.To.Type = obj.TYPE_MEM 581 p.To.Reg = v.Args[0].Reg() 582 gc.AddAux(&p.To, v) 583 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8: 584 p := s.Prog(v.Op.Asm()) 585 p.From.Type = obj.TYPE_REG 586 p.From.Reg = v.Args[2].Reg() 587 p.To.Type = obj.TYPE_MEM 588 p.To.Reg = v.Args[0].Reg() 589 p.To.Scale = 8 590 p.To.Index = v.Args[1].Reg() 591 gc.AddAux(&p.To, v) 592 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 593 p := s.Prog(v.Op.Asm()) 594 p.From.Type = obj.TYPE_REG 595 p.From.Reg = v.Args[2].Reg() 596 p.To.Type = obj.TYPE_MEM 597 p.To.Reg = v.Args[0].Reg() 598 p.To.Scale = 4 599 p.To.Index = v.Args[1].Reg() 600 gc.AddAux(&p.To, v) 601 case ssa.OpAMD64MOVWstoreidx2: 602 p := s.Prog(v.Op.Asm()) 603 p.From.Type = obj.TYPE_REG 604 p.From.Reg = v.Args[2].Reg() 605 p.To.Type = obj.TYPE_MEM 606 p.To.Reg = v.Args[0].Reg() 607 p.To.Scale = 2 608 p.To.Index = v.Args[1].Reg() 609 gc.AddAux(&p.To, v) 610 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 611 r := v.Args[0].Reg() 612 i := v.Args[1].Reg() 613 if i == x86.REG_SP { 614 r, i = i, r 615 } 616 p := s.Prog(v.Op.Asm()) 617 p.From.Type = obj.TYPE_REG 618 p.From.Reg = v.Args[2].Reg() 619 p.To.Type = obj.TYPE_MEM 620 p.To.Reg = r 621 p.To.Scale = 1 622 p.To.Index = i 623 gc.AddAux(&p.To, v) 624 case ssa.OpAMD64ADDQconstmem, ssa.OpAMD64ADDLconstmem: 625 sc := v.AuxValAndOff() 626 off := sc.Off() 627 val := sc.Val() 628 if val == 1 { 629 var asm obj.As 630 if v.Op == ssa.OpAMD64ADDQconstmem { 631 asm = x86.AINCQ 632 } else { 633 asm = x86.AINCL 634 } 635 p := s.Prog(asm) 636 p.To.Type = obj.TYPE_MEM 637 p.To.Reg = v.Args[0].Reg() 638 gc.AddAux2(&p.To, v, off) 639 } else { 640 p := s.Prog(v.Op.Asm()) 641 p.From.Type = obj.TYPE_CONST 642 p.From.Offset = val 643 p.To.Type = obj.TYPE_MEM 644 p.To.Reg = v.Args[0].Reg() 645 gc.AddAux2(&p.To, v, off) 646 } 647 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 648 p := s.Prog(v.Op.Asm()) 649 p.From.Type = obj.TYPE_CONST 650 sc := v.AuxValAndOff() 651 p.From.Offset = sc.Val() 652 p.To.Type = obj.TYPE_MEM 653 p.To.Reg = v.Args[0].Reg() 654 gc.AddAux2(&p.To, v, sc.Off()) 655 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 656 p := s.Prog(v.Op.Asm()) 657 p.From.Type = obj.TYPE_CONST 658 sc := v.AuxValAndOff() 659 p.From.Offset = sc.Val() 660 r := v.Args[0].Reg() 661 i := v.Args[1].Reg() 662 switch v.Op { 663 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 664 p.To.Scale = 1 665 if i == x86.REG_SP { 666 r, i = i, r 667 } 668 case ssa.OpAMD64MOVWstoreconstidx2: 669 p.To.Scale = 2 670 case ssa.OpAMD64MOVLstoreconstidx4: 671 p.To.Scale = 4 672 case ssa.OpAMD64MOVQstoreconstidx8: 673 p.To.Scale = 8 674 } 675 p.To.Type = obj.TYPE_MEM 676 p.To.Reg = r 677 p.To.Index = i 678 gc.AddAux2(&p.To, v, sc.Off()) 679 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 680 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 681 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 682 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 683 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 684 r := v.Reg() 685 // Break false dependency on destination register. 686 opregreg(s, x86.AXORPS, r, r) 687 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 688 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: 689 p := s.Prog(x86.AMOVQ) 690 p.From.Type = obj.TYPE_REG 691 p.From.Reg = v.Args[0].Reg() 692 p.To.Type = obj.TYPE_REG 693 p.To.Reg = v.Reg() 694 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 695 p := s.Prog(x86.AMOVL) 696 p.From.Type = obj.TYPE_REG 697 p.From.Reg = v.Args[0].Reg() 698 p.To.Type = obj.TYPE_REG 699 p.To.Reg = v.Reg() 700 case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem, 701 ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem, 702 ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem, 703 ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem: 704 p := s.Prog(v.Op.Asm()) 705 p.From.Type = obj.TYPE_MEM 706 p.From.Reg = v.Args[1].Reg() 707 gc.AddAux(&p.From, v) 708 p.To.Type = obj.TYPE_REG 709 p.To.Reg = v.Reg() 710 if v.Reg() != v.Args[0].Reg() { 711 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 712 } 713 case ssa.OpAMD64DUFFZERO: 714 off := duffStart(v.AuxInt) 715 adj := duffAdj(v.AuxInt) 716 var p *obj.Prog 717 if adj != 0 { 718 p = s.Prog(x86.ALEAQ) 719 p.From.Type = obj.TYPE_MEM 720 p.From.Offset = adj 721 p.From.Reg = x86.REG_DI 722 p.To.Type = obj.TYPE_REG 723 p.To.Reg = x86.REG_DI 724 } 725 p = s.Prog(obj.ADUFFZERO) 726 p.To.Type = obj.TYPE_ADDR 727 p.To.Sym = gc.Duffzero 728 p.To.Offset = off 729 case ssa.OpAMD64MOVOconst: 730 if v.AuxInt != 0 { 731 v.Fatalf("MOVOconst can only do constant=0") 732 } 733 r := v.Reg() 734 opregreg(s, x86.AXORPS, r, r) 735 case ssa.OpAMD64DUFFCOPY: 736 p := s.Prog(obj.ADUFFCOPY) 737 p.To.Type = obj.TYPE_ADDR 738 p.To.Sym = gc.Duffcopy 739 p.To.Offset = v.AuxInt 740 741 case ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: 742 if v.Args[0].Reg() != v.Reg() { 743 v.Fatalf("MOVXconvert should be a no-op") 744 } 745 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 746 if v.Type.IsMemory() { 747 return 748 } 749 x := v.Args[0].Reg() 750 y := v.Reg() 751 if x != y { 752 opregreg(s, moveByType(v.Type), y, x) 753 } 754 case ssa.OpLoadReg: 755 if v.Type.IsFlags() { 756 v.Fatalf("load flags not implemented: %v", v.LongString()) 757 return 758 } 759 p := s.Prog(loadByType(v.Type)) 760 gc.AddrAuto(&p.From, v.Args[0]) 761 p.To.Type = obj.TYPE_REG 762 p.To.Reg = v.Reg() 763 764 case ssa.OpStoreReg: 765 if v.Type.IsFlags() { 766 v.Fatalf("store flags not implemented: %v", v.LongString()) 767 return 768 } 769 p := s.Prog(storeByType(v.Type)) 770 p.From.Type = obj.TYPE_REG 771 p.From.Reg = v.Args[0].Reg() 772 gc.AddrAuto(&p.To, v) 773 case ssa.OpAMD64LoweredGetClosurePtr: 774 // Closure pointer is DX. 775 gc.CheckLoweredGetClosurePtr(v) 776 case ssa.OpAMD64LoweredGetG: 777 r := v.Reg() 778 // See the comments in cmd/internal/obj/x86/obj6.go 779 // near CanUse1InsnTLS for a detailed explanation of these instructions. 780 if x86.CanUse1InsnTLS(gc.Ctxt) { 781 // MOVQ (TLS), r 782 p := s.Prog(x86.AMOVQ) 783 p.From.Type = obj.TYPE_MEM 784 p.From.Reg = x86.REG_TLS 785 p.To.Type = obj.TYPE_REG 786 p.To.Reg = r 787 } else { 788 // MOVQ TLS, r 789 // MOVQ (r)(TLS*1), r 790 p := s.Prog(x86.AMOVQ) 791 p.From.Type = obj.TYPE_REG 792 p.From.Reg = x86.REG_TLS 793 p.To.Type = obj.TYPE_REG 794 p.To.Reg = r 795 q := s.Prog(x86.AMOVQ) 796 q.From.Type = obj.TYPE_MEM 797 q.From.Reg = r 798 q.From.Index = x86.REG_TLS 799 q.From.Scale = 1 800 q.To.Type = obj.TYPE_REG 801 q.To.Reg = r 802 } 803 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 804 s.Call(v) 805 806 case ssa.OpAMD64LoweredGetCallerPC: 807 p := s.Prog(x86.AMOVQ) 808 p.From.Type = obj.TYPE_MEM 809 p.From.Offset = -8 // PC is stored 8 bytes below first parameter. 810 p.From.Name = obj.NAME_PARAM 811 p.To.Type = obj.TYPE_REG 812 p.To.Reg = v.Reg() 813 814 case ssa.OpAMD64LoweredGetCallerSP: 815 // caller's SP is the address of the first arg 816 mov := x86.AMOVQ 817 if gc.Widthptr == 4 { 818 mov = x86.AMOVL 819 } 820 p := s.Prog(mov) 821 p.From.Type = obj.TYPE_ADDR 822 p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures 823 p.From.Name = obj.NAME_PARAM 824 p.To.Type = obj.TYPE_REG 825 p.To.Reg = v.Reg() 826 827 case ssa.OpAMD64LoweredWB: 828 p := s.Prog(obj.ACALL) 829 p.To.Type = obj.TYPE_MEM 830 p.To.Name = obj.NAME_EXTERN 831 p.To.Sym = v.Aux.(*obj.LSym) 832 833 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 834 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 835 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 836 r := v.Reg() 837 if r != v.Args[0].Reg() { 838 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 839 } 840 p := s.Prog(v.Op.Asm()) 841 p.To.Type = obj.TYPE_REG 842 p.To.Reg = r 843 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL: 844 p := s.Prog(v.Op.Asm()) 845 p.From.Type = obj.TYPE_REG 846 p.From.Reg = v.Args[0].Reg() 847 p.To.Type = obj.TYPE_REG 848 p.To.Reg = v.Reg0() 849 case ssa.OpAMD64SQRTSD: 850 p := s.Prog(v.Op.Asm()) 851 p.From.Type = obj.TYPE_REG 852 p.From.Reg = v.Args[0].Reg() 853 p.To.Type = obj.TYPE_REG 854 p.To.Reg = v.Reg() 855 case ssa.OpAMD64ROUNDSD: 856 p := s.Prog(v.Op.Asm()) 857 val := v.AuxInt 858 // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc 859 if val != 0 && val != 1 && val != 2 && val != 3 { 860 v.Fatalf("Invalid rounding mode") 861 } 862 p.From.Offset = val 863 p.From.Type = obj.TYPE_CONST 864 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) 865 p.To.Type = obj.TYPE_REG 866 p.To.Reg = v.Reg() 867 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL: 868 if v.Args[0].Reg() != v.Reg() { 869 // POPCNT on Intel has a false dependency on the destination register. 870 // Zero the destination to break the dependency. 871 p := s.Prog(x86.AMOVQ) 872 p.From.Type = obj.TYPE_CONST 873 p.From.Offset = 0 874 p.To.Type = obj.TYPE_REG 875 p.To.Reg = v.Reg() 876 } 877 p := s.Prog(v.Op.Asm()) 878 p.From.Type = obj.TYPE_REG 879 p.From.Reg = v.Args[0].Reg() 880 p.To.Type = obj.TYPE_REG 881 p.To.Reg = v.Reg() 882 883 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 884 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 885 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 886 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 887 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 888 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 889 ssa.OpAMD64SETA, ssa.OpAMD64SETAE: 890 p := s.Prog(v.Op.Asm()) 891 p.To.Type = obj.TYPE_REG 892 p.To.Reg = v.Reg() 893 894 case ssa.OpAMD64SETEQmem, ssa.OpAMD64SETNEmem, 895 ssa.OpAMD64SETLmem, ssa.OpAMD64SETLEmem, 896 ssa.OpAMD64SETGmem, ssa.OpAMD64SETGEmem, 897 ssa.OpAMD64SETBmem, ssa.OpAMD64SETBEmem, 898 ssa.OpAMD64SETAmem, ssa.OpAMD64SETAEmem: 899 p := s.Prog(v.Op.Asm()) 900 p.To.Type = obj.TYPE_MEM 901 p.To.Reg = v.Args[0].Reg() 902 gc.AddAux(&p.To, v) 903 904 case ssa.OpAMD64SETNEF: 905 p := s.Prog(v.Op.Asm()) 906 p.To.Type = obj.TYPE_REG 907 p.To.Reg = v.Reg() 908 q := s.Prog(x86.ASETPS) 909 q.To.Type = obj.TYPE_REG 910 q.To.Reg = x86.REG_AX 911 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 912 opregreg(s, x86.AORL, v.Reg(), x86.REG_AX) 913 914 case ssa.OpAMD64SETEQF: 915 p := s.Prog(v.Op.Asm()) 916 p.To.Type = obj.TYPE_REG 917 p.To.Reg = v.Reg() 918 q := s.Prog(x86.ASETPC) 919 q.To.Type = obj.TYPE_REG 920 q.To.Reg = x86.REG_AX 921 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 922 opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX) 923 924 case ssa.OpAMD64InvertFlags: 925 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 926 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 927 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 928 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 929 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 930 case ssa.OpAMD64REPSTOSQ: 931 s.Prog(x86.AREP) 932 s.Prog(x86.ASTOSQ) 933 case ssa.OpAMD64REPMOVSQ: 934 s.Prog(x86.AREP) 935 s.Prog(x86.AMOVSQ) 936 case ssa.OpAMD64LoweredNilCheck: 937 // Issue a load which will fault if the input is nil. 938 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 939 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 940 // but it doesn't have false dependency on AX. 941 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 942 // That trades clobbering flags for clobbering a register. 943 p := s.Prog(x86.ATESTB) 944 p.From.Type = obj.TYPE_REG 945 p.From.Reg = x86.REG_AX 946 p.To.Type = obj.TYPE_MEM 947 p.To.Reg = v.Args[0].Reg() 948 gc.AddAux(&p.To, v) 949 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 950 gc.Warnl(v.Pos, "generated nil check") 951 } 952 case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 953 p := s.Prog(v.Op.Asm()) 954 p.From.Type = obj.TYPE_MEM 955 p.From.Reg = v.Args[0].Reg() 956 gc.AddAux(&p.From, v) 957 p.To.Type = obj.TYPE_REG 958 p.To.Reg = v.Reg0() 959 case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 960 r := v.Reg0() 961 if r != v.Args[0].Reg() { 962 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 963 } 964 p := s.Prog(v.Op.Asm()) 965 p.From.Type = obj.TYPE_REG 966 p.From.Reg = r 967 p.To.Type = obj.TYPE_MEM 968 p.To.Reg = v.Args[1].Reg() 969 gc.AddAux(&p.To, v) 970 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 971 r := v.Reg0() 972 if r != v.Args[0].Reg() { 973 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 974 } 975 s.Prog(x86.ALOCK) 976 p := s.Prog(v.Op.Asm()) 977 p.From.Type = obj.TYPE_REG 978 p.From.Reg = r 979 p.To.Type = obj.TYPE_MEM 980 p.To.Reg = v.Args[1].Reg() 981 gc.AddAux(&p.To, v) 982 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 983 if v.Args[1].Reg() != x86.REG_AX { 984 v.Fatalf("input[1] not in AX %s", v.LongString()) 985 } 986 s.Prog(x86.ALOCK) 987 p := s.Prog(v.Op.Asm()) 988 p.From.Type = obj.TYPE_REG 989 p.From.Reg = v.Args[2].Reg() 990 p.To.Type = obj.TYPE_MEM 991 p.To.Reg = v.Args[0].Reg() 992 gc.AddAux(&p.To, v) 993 p = s.Prog(x86.ASETEQ) 994 p.To.Type = obj.TYPE_REG 995 p.To.Reg = v.Reg0() 996 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: 997 s.Prog(x86.ALOCK) 998 p := s.Prog(v.Op.Asm()) 999 p.From.Type = obj.TYPE_REG 1000 p.From.Reg = v.Args[1].Reg() 1001 p.To.Type = obj.TYPE_MEM 1002 p.To.Reg = v.Args[0].Reg() 1003 gc.AddAux(&p.To, v) 1004 case ssa.OpClobber: 1005 p := s.Prog(x86.AMOVL) 1006 p.From.Type = obj.TYPE_CONST 1007 p.From.Offset = 0xdeaddead 1008 p.To.Type = obj.TYPE_MEM 1009 p.To.Reg = x86.REG_SP 1010 gc.AddAux(&p.To, v) 1011 p = s.Prog(x86.AMOVL) 1012 p.From.Type = obj.TYPE_CONST 1013 p.From.Offset = 0xdeaddead 1014 p.To.Type = obj.TYPE_MEM 1015 p.To.Reg = x86.REG_SP 1016 gc.AddAux(&p.To, v) 1017 p.To.Offset += 4 1018 default: 1019 v.Fatalf("genValue not implemented: %s", v.LongString()) 1020 } 1021 } 1022 1023 var blockJump = [...]struct { 1024 asm, invasm obj.As 1025 }{ 1026 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 1027 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 1028 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 1029 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 1030 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 1031 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 1032 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 1033 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 1034 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 1035 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 1036 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 1037 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 1038 } 1039 1040 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 1041 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 1042 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 1043 } 1044 var nefJumps = [2][2]gc.FloatingEQNEJump{ 1045 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 1046 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 1047 } 1048 1049 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1050 switch b.Kind { 1051 case ssa.BlockPlain: 1052 if b.Succs[0].Block() != next { 1053 p := s.Prog(obj.AJMP) 1054 p.To.Type = obj.TYPE_BRANCH 1055 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1056 } 1057 case ssa.BlockDefer: 1058 // defer returns in rax: 1059 // 0 if we should continue executing 1060 // 1 if we should jump to deferreturn call 1061 p := s.Prog(x86.ATESTL) 1062 p.From.Type = obj.TYPE_REG 1063 p.From.Reg = x86.REG_AX 1064 p.To.Type = obj.TYPE_REG 1065 p.To.Reg = x86.REG_AX 1066 p = s.Prog(x86.AJNE) 1067 p.To.Type = obj.TYPE_BRANCH 1068 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1069 if b.Succs[0].Block() != next { 1070 p := s.Prog(obj.AJMP) 1071 p.To.Type = obj.TYPE_BRANCH 1072 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1073 } 1074 case ssa.BlockExit: 1075 s.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1076 case ssa.BlockRet: 1077 s.Prog(obj.ARET) 1078 case ssa.BlockRetJmp: 1079 p := s.Prog(obj.AJMP) 1080 p.To.Type = obj.TYPE_MEM 1081 p.To.Name = obj.NAME_EXTERN 1082 p.To.Sym = b.Aux.(*obj.LSym) 1083 1084 case ssa.BlockAMD64EQF: 1085 s.FPJump(b, next, &eqfJumps) 1086 1087 case ssa.BlockAMD64NEF: 1088 s.FPJump(b, next, &nefJumps) 1089 1090 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1091 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1092 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1093 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1094 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1095 jmp := blockJump[b.Kind] 1096 var p *obj.Prog 1097 switch next { 1098 case b.Succs[0].Block(): 1099 p = s.Prog(jmp.invasm) 1100 p.To.Type = obj.TYPE_BRANCH 1101 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1102 case b.Succs[1].Block(): 1103 p = s.Prog(jmp.asm) 1104 p.To.Type = obj.TYPE_BRANCH 1105 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1106 default: 1107 p = s.Prog(jmp.asm) 1108 p.To.Type = obj.TYPE_BRANCH 1109 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1110 q := s.Prog(obj.AJMP) 1111 q.To.Type = obj.TYPE_BRANCH 1112 s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) 1113 } 1114 1115 default: 1116 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1117 } 1118 }