github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/cmd/compile/internal/amd64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "fmt" 9 "math" 10 11 "cmd/compile/internal/gc" 12 "cmd/compile/internal/ssa" 13 "cmd/compile/internal/types" 14 "cmd/internal/obj" 15 "cmd/internal/obj/x86" 16 ) 17 18 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 19 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 20 flive := b.FlagsLiveAtEnd 21 if b.Control != nil && b.Control.Type.IsFlags() { 22 flive = true 23 } 24 for i := len(b.Values) - 1; i >= 0; i-- { 25 v := b.Values[i] 26 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { 27 // The "mark" is any non-nil Aux value. 28 v.Aux = v 29 } 30 if v.Type.IsFlags() { 31 flive = false 32 } 33 for _, a := range v.Args { 34 if a.Type.IsFlags() { 35 flive = true 36 } 37 } 38 } 39 } 40 41 // loadByType returns the load instruction of the given type. 42 func loadByType(t *types.Type) obj.As { 43 // Avoid partial register write 44 if !t.IsFloat() && t.Size() <= 2 { 45 if t.Size() == 1 { 46 return x86.AMOVBLZX 47 } else { 48 return x86.AMOVWLZX 49 } 50 } 51 // Otherwise, there's no difference between load and store opcodes. 52 return storeByType(t) 53 } 54 55 // storeByType returns the store instruction of the given type. 56 func storeByType(t *types.Type) obj.As { 57 width := t.Size() 58 if t.IsFloat() { 59 switch width { 60 case 4: 61 return x86.AMOVSS 62 case 8: 63 return x86.AMOVSD 64 } 65 } else { 66 switch width { 67 case 1: 68 return x86.AMOVB 69 case 2: 70 return x86.AMOVW 71 case 4: 72 return x86.AMOVL 73 case 8: 74 return x86.AMOVQ 75 } 76 } 77 panic("bad store type") 78 } 79 80 // moveByType returns the reg->reg move instruction of the given type. 81 func moveByType(t *types.Type) obj.As { 82 if t.IsFloat() { 83 // Moving the whole sse2 register is faster 84 // than moving just the correct low portion of it. 85 // There is no xmm->xmm move with 1 byte opcode, 86 // so use movups, which has 2 byte opcode. 87 return x86.AMOVUPS 88 } else { 89 switch t.Size() { 90 case 1: 91 // Avoids partial register write 92 return x86.AMOVL 93 case 2: 94 return x86.AMOVL 95 case 4: 96 return x86.AMOVL 97 case 8: 98 return x86.AMOVQ 99 case 16: 100 return x86.AMOVUPS // int128s are in SSE registers 101 default: 102 panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t)) 103 } 104 } 105 } 106 107 // opregreg emits instructions for 108 // dest := dest(To) op src(From) 109 // and also returns the created obj.Prog so it 110 // may be further adjusted (offset, scale, etc). 111 func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog { 112 p := s.Prog(op) 113 p.From.Type = obj.TYPE_REG 114 p.To.Type = obj.TYPE_REG 115 p.To.Reg = dest 116 p.From.Reg = src 117 return p 118 } 119 120 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 121 // See runtime/mkduff.go. 122 func duffStart(size int64) int64 { 123 x, _ := duff(size) 124 return x 125 } 126 func duffAdj(size int64) int64 { 127 _, x := duff(size) 128 return x 129 } 130 131 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes) 132 // required to use the duffzero mechanism for a block of the given size. 133 func duff(size int64) (int64, int64) { 134 if size < 32 || size > 1024 || size%dzClearStep != 0 { 135 panic("bad duffzero size") 136 } 137 steps := size / dzClearStep 138 blocks := steps / dzBlockLen 139 steps %= dzBlockLen 140 off := dzBlockSize * (dzBlocks - blocks) 141 var adj int64 142 if steps != 0 { 143 off -= dzLeaqSize 144 off -= dzMovSize * steps 145 adj -= dzClearStep * (dzBlockLen - steps) 146 } 147 return off, adj 148 } 149 150 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 151 switch v.Op { 152 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: 153 r := v.Reg() 154 r1 := v.Args[0].Reg() 155 r2 := v.Args[1].Reg() 156 switch { 157 case r == r1: 158 p := s.Prog(v.Op.Asm()) 159 p.From.Type = obj.TYPE_REG 160 p.From.Reg = r2 161 p.To.Type = obj.TYPE_REG 162 p.To.Reg = r 163 case r == r2: 164 p := s.Prog(v.Op.Asm()) 165 p.From.Type = obj.TYPE_REG 166 p.From.Reg = r1 167 p.To.Type = obj.TYPE_REG 168 p.To.Reg = r 169 default: 170 var asm obj.As 171 if v.Op == ssa.OpAMD64ADDQ { 172 asm = x86.ALEAQ 173 } else { 174 asm = x86.ALEAL 175 } 176 p := s.Prog(asm) 177 p.From.Type = obj.TYPE_MEM 178 p.From.Reg = r1 179 p.From.Scale = 1 180 p.From.Index = r2 181 p.To.Type = obj.TYPE_REG 182 p.To.Reg = r 183 } 184 // 2-address opcode arithmetic 185 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, 186 ssa.OpAMD64MULQ, ssa.OpAMD64MULL, 187 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, 188 ssa.OpAMD64ORQ, ssa.OpAMD64ORL, 189 ssa.OpAMD64XORQ, ssa.OpAMD64XORL, 190 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, 191 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, 192 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, 193 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB, 194 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB, 195 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, 196 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, 197 ssa.OpAMD64PXOR: 198 r := v.Reg() 199 if r != v.Args[0].Reg() { 200 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 201 } 202 opregreg(s, v.Op.Asm(), r, v.Args[1].Reg()) 203 204 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: 205 // Arg[0] (the dividend) is in AX. 206 // Arg[1] (the divisor) can be in any other register. 207 // Result[0] (the quotient) is in AX. 208 // Result[1] (the remainder) is in DX. 209 r := v.Args[1].Reg() 210 211 // Zero extend dividend. 212 c := s.Prog(x86.AXORL) 213 c.From.Type = obj.TYPE_REG 214 c.From.Reg = x86.REG_DX 215 c.To.Type = obj.TYPE_REG 216 c.To.Reg = x86.REG_DX 217 218 // Issue divide. 219 p := s.Prog(v.Op.Asm()) 220 p.From.Type = obj.TYPE_REG 221 p.From.Reg = r 222 223 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: 224 // Arg[0] (the dividend) is in AX. 225 // Arg[1] (the divisor) can be in any other register. 226 // Result[0] (the quotient) is in AX. 227 // Result[1] (the remainder) is in DX. 228 r := v.Args[1].Reg() 229 230 // CPU faults upon signed overflow, which occurs when the most 231 // negative int is divided by -1. Handle divide by -1 as a special case. 232 var c *obj.Prog 233 switch v.Op { 234 case ssa.OpAMD64DIVQ: 235 c = s.Prog(x86.ACMPQ) 236 case ssa.OpAMD64DIVL: 237 c = s.Prog(x86.ACMPL) 238 case ssa.OpAMD64DIVW: 239 c = s.Prog(x86.ACMPW) 240 } 241 c.From.Type = obj.TYPE_REG 242 c.From.Reg = r 243 c.To.Type = obj.TYPE_CONST 244 c.To.Offset = -1 245 j1 := s.Prog(x86.AJEQ) 246 j1.To.Type = obj.TYPE_BRANCH 247 248 // Sign extend dividend. 249 switch v.Op { 250 case ssa.OpAMD64DIVQ: 251 s.Prog(x86.ACQO) 252 case ssa.OpAMD64DIVL: 253 s.Prog(x86.ACDQ) 254 case ssa.OpAMD64DIVW: 255 s.Prog(x86.ACWD) 256 } 257 258 // Issue divide. 259 p := s.Prog(v.Op.Asm()) 260 p.From.Type = obj.TYPE_REG 261 p.From.Reg = r 262 263 // Skip over -1 fixup code. 264 j2 := s.Prog(obj.AJMP) 265 j2.To.Type = obj.TYPE_BRANCH 266 267 // Issue -1 fixup code. 268 // n / -1 = -n 269 n1 := s.Prog(x86.ANEGQ) 270 n1.To.Type = obj.TYPE_REG 271 n1.To.Reg = x86.REG_AX 272 273 // n % -1 == 0 274 n2 := s.Prog(x86.AXORL) 275 n2.From.Type = obj.TYPE_REG 276 n2.From.Reg = x86.REG_DX 277 n2.To.Type = obj.TYPE_REG 278 n2.To.Reg = x86.REG_DX 279 280 // TODO(khr): issue only the -1 fixup code we need. 281 // For instance, if only the quotient is used, no point in zeroing the remainder. 282 283 j1.To.Val = n1 284 j2.To.Val = s.Pc() 285 286 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU: 287 // the frontend rewrites constant division by 8/16/32 bit integers into 288 // HMUL by a constant 289 // SSA rewrites generate the 64 bit versions 290 291 // Arg[0] is already in AX as it's the only register we allow 292 // and DX is the only output we care about (the high bits) 293 p := s.Prog(v.Op.Asm()) 294 p.From.Type = obj.TYPE_REG 295 p.From.Reg = v.Args[1].Reg() 296 297 // IMULB puts the high portion in AH instead of DL, 298 // so move it to DL for consistency 299 if v.Type.Size() == 1 { 300 m := s.Prog(x86.AMOVB) 301 m.From.Type = obj.TYPE_REG 302 m.From.Reg = x86.REG_AH 303 m.To.Type = obj.TYPE_REG 304 m.To.Reg = x86.REG_DX 305 } 306 307 case ssa.OpAMD64MULQU2: 308 // Arg[0] is already in AX as it's the only register we allow 309 // results hi in DX, lo in AX 310 p := s.Prog(v.Op.Asm()) 311 p.From.Type = obj.TYPE_REG 312 p.From.Reg = v.Args[1].Reg() 313 314 case ssa.OpAMD64DIVQU2: 315 // Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow 316 // results q in AX, r in DX 317 p := s.Prog(v.Op.Asm()) 318 p.From.Type = obj.TYPE_REG 319 p.From.Reg = v.Args[2].Reg() 320 321 case ssa.OpAMD64AVGQU: 322 // compute (x+y)/2 unsigned. 323 // Do a 64-bit add, the overflow goes into the carry. 324 // Shift right once and pull the carry back into the 63rd bit. 325 r := v.Reg() 326 if r != v.Args[0].Reg() { 327 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 328 } 329 p := s.Prog(x86.AADDQ) 330 p.From.Type = obj.TYPE_REG 331 p.To.Type = obj.TYPE_REG 332 p.To.Reg = r 333 p.From.Reg = v.Args[1].Reg() 334 p = s.Prog(x86.ARCRQ) 335 p.From.Type = obj.TYPE_CONST 336 p.From.Offset = 1 337 p.To.Type = obj.TYPE_REG 338 p.To.Reg = r 339 340 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: 341 r := v.Reg() 342 a := v.Args[0].Reg() 343 if r == a { 344 if v.AuxInt == 1 { 345 var asm obj.As 346 // Software optimization manual recommends add $1,reg. 347 // But inc/dec is 1 byte smaller. ICC always uses inc 348 // Clang/GCC choose depending on flags, but prefer add. 349 // Experiments show that inc/dec is both a little faster 350 // and make a binary a little smaller. 351 if v.Op == ssa.OpAMD64ADDQconst { 352 asm = x86.AINCQ 353 } else { 354 asm = x86.AINCL 355 } 356 p := s.Prog(asm) 357 p.To.Type = obj.TYPE_REG 358 p.To.Reg = r 359 return 360 } 361 if v.AuxInt == -1 { 362 var asm obj.As 363 if v.Op == ssa.OpAMD64ADDQconst { 364 asm = x86.ADECQ 365 } else { 366 asm = x86.ADECL 367 } 368 p := s.Prog(asm) 369 p.To.Type = obj.TYPE_REG 370 p.To.Reg = r 371 return 372 } 373 p := s.Prog(v.Op.Asm()) 374 p.From.Type = obj.TYPE_CONST 375 p.From.Offset = v.AuxInt 376 p.To.Type = obj.TYPE_REG 377 p.To.Reg = r 378 return 379 } 380 var asm obj.As 381 if v.Op == ssa.OpAMD64ADDQconst { 382 asm = x86.ALEAQ 383 } else { 384 asm = x86.ALEAL 385 } 386 p := s.Prog(asm) 387 p.From.Type = obj.TYPE_MEM 388 p.From.Reg = a 389 p.From.Offset = v.AuxInt 390 p.To.Type = obj.TYPE_REG 391 p.To.Reg = r 392 393 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ: 394 r := v.Reg() 395 if r != v.Args[0].Reg() { 396 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 397 } 398 p := s.Prog(v.Op.Asm()) 399 p.From.Type = obj.TYPE_REG 400 p.From.Reg = v.Args[1].Reg() 401 p.To.Type = obj.TYPE_REG 402 p.To.Reg = r 403 404 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: 405 r := v.Reg() 406 if r != v.Args[0].Reg() { 407 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 408 } 409 p := s.Prog(v.Op.Asm()) 410 p.From.Type = obj.TYPE_CONST 411 p.From.Offset = v.AuxInt 412 p.To.Type = obj.TYPE_REG 413 p.To.Reg = r 414 // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 415 // then we don't need to use resultInArg0 for these ops. 416 //p.From3 = new(obj.Addr) 417 //p.From3.Type = obj.TYPE_REG 418 //p.From3.Reg = v.Args[0].Reg() 419 420 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, 421 ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, 422 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, 423 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, 424 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, 425 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, 426 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, 427 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: 428 r := v.Reg() 429 if r != v.Args[0].Reg() { 430 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 431 } 432 p := s.Prog(v.Op.Asm()) 433 p.From.Type = obj.TYPE_CONST 434 p.From.Offset = v.AuxInt 435 p.To.Type = obj.TYPE_REG 436 p.To.Reg = r 437 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: 438 r := v.Reg() 439 p := s.Prog(v.Op.Asm()) 440 p.From.Type = obj.TYPE_REG 441 p.From.Reg = r 442 p.To.Type = obj.TYPE_REG 443 p.To.Reg = r 444 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: 445 r := v.Args[0].Reg() 446 i := v.Args[1].Reg() 447 p := s.Prog(x86.ALEAQ) 448 switch v.Op { 449 case ssa.OpAMD64LEAQ1: 450 p.From.Scale = 1 451 if i == x86.REG_SP { 452 r, i = i, r 453 } 454 case ssa.OpAMD64LEAQ2: 455 p.From.Scale = 2 456 case ssa.OpAMD64LEAQ4: 457 p.From.Scale = 4 458 case ssa.OpAMD64LEAQ8: 459 p.From.Scale = 8 460 } 461 p.From.Type = obj.TYPE_MEM 462 p.From.Reg = r 463 p.From.Index = i 464 gc.AddAux(&p.From, v) 465 p.To.Type = obj.TYPE_REG 466 p.To.Reg = v.Reg() 467 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL: 468 p := s.Prog(v.Op.Asm()) 469 p.From.Type = obj.TYPE_MEM 470 p.From.Reg = v.Args[0].Reg() 471 gc.AddAux(&p.From, v) 472 p.To.Type = obj.TYPE_REG 473 p.To.Reg = v.Reg() 474 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, 475 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB, 476 ssa.OpAMD64BTL, ssa.OpAMD64BTQ: 477 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) 478 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: 479 // Go assembler has swapped operands for UCOMISx relative to CMP, 480 // must account for that right here. 481 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg()) 482 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: 483 p := s.Prog(v.Op.Asm()) 484 p.From.Type = obj.TYPE_REG 485 p.From.Reg = v.Args[0].Reg() 486 p.To.Type = obj.TYPE_CONST 487 p.To.Offset = v.AuxInt 488 case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst, 489 ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst: 490 p := s.Prog(v.Op.Asm()) 491 p.From.Type = obj.TYPE_CONST 492 p.From.Offset = v.AuxInt 493 p.To.Type = obj.TYPE_REG 494 p.To.Reg = v.Args[0].Reg() 495 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: 496 x := v.Reg() 497 asm := v.Op.Asm() 498 // Use MOVL to move a small constant into a register 499 // when the constant is positive and fits into 32 bits. 500 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) { 501 // The upper 32bit are zeroed automatically when using MOVL. 502 asm = x86.AMOVL 503 } 504 p := s.Prog(asm) 505 p.From.Type = obj.TYPE_CONST 506 p.From.Offset = v.AuxInt 507 p.To.Type = obj.TYPE_REG 508 p.To.Reg = x 509 // If flags are live at this instruction, suppress the 510 // MOV $0,AX -> XOR AX,AX optimization. 511 if v.Aux != nil { 512 p.Mark |= x86.PRESERVEFLAGS 513 } 514 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: 515 x := v.Reg() 516 p := s.Prog(v.Op.Asm()) 517 p.From.Type = obj.TYPE_FCONST 518 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 519 p.To.Type = obj.TYPE_REG 520 p.To.Reg = x 521 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: 522 p := s.Prog(v.Op.Asm()) 523 p.From.Type = obj.TYPE_MEM 524 p.From.Reg = v.Args[0].Reg() 525 gc.AddAux(&p.From, v) 526 p.To.Type = obj.TYPE_REG 527 p.To.Reg = v.Reg() 528 case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8: 529 p := s.Prog(v.Op.Asm()) 530 p.From.Type = obj.TYPE_MEM 531 p.From.Reg = v.Args[0].Reg() 532 gc.AddAux(&p.From, v) 533 p.From.Scale = 8 534 p.From.Index = v.Args[1].Reg() 535 p.To.Type = obj.TYPE_REG 536 p.To.Reg = v.Reg() 537 case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: 538 p := s.Prog(v.Op.Asm()) 539 p.From.Type = obj.TYPE_MEM 540 p.From.Reg = v.Args[0].Reg() 541 gc.AddAux(&p.From, v) 542 p.From.Scale = 4 543 p.From.Index = v.Args[1].Reg() 544 p.To.Type = obj.TYPE_REG 545 p.To.Reg = v.Reg() 546 case ssa.OpAMD64MOVWloadidx2: 547 p := s.Prog(v.Op.Asm()) 548 p.From.Type = obj.TYPE_MEM 549 p.From.Reg = v.Args[0].Reg() 550 gc.AddAux(&p.From, v) 551 p.From.Scale = 2 552 p.From.Index = v.Args[1].Reg() 553 p.To.Type = obj.TYPE_REG 554 p.To.Reg = v.Reg() 555 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: 556 r := v.Args[0].Reg() 557 i := v.Args[1].Reg() 558 if i == x86.REG_SP { 559 r, i = i, r 560 } 561 p := s.Prog(v.Op.Asm()) 562 p.From.Type = obj.TYPE_MEM 563 p.From.Reg = r 564 p.From.Scale = 1 565 p.From.Index = i 566 gc.AddAux(&p.From, v) 567 p.To.Type = obj.TYPE_REG 568 p.To.Reg = v.Reg() 569 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: 570 p := s.Prog(v.Op.Asm()) 571 p.From.Type = obj.TYPE_REG 572 p.From.Reg = v.Args[1].Reg() 573 p.To.Type = obj.TYPE_MEM 574 p.To.Reg = v.Args[0].Reg() 575 gc.AddAux(&p.To, v) 576 case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8: 577 p := s.Prog(v.Op.Asm()) 578 p.From.Type = obj.TYPE_REG 579 p.From.Reg = v.Args[2].Reg() 580 p.To.Type = obj.TYPE_MEM 581 p.To.Reg = v.Args[0].Reg() 582 p.To.Scale = 8 583 p.To.Index = v.Args[1].Reg() 584 gc.AddAux(&p.To, v) 585 case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: 586 p := s.Prog(v.Op.Asm()) 587 p.From.Type = obj.TYPE_REG 588 p.From.Reg = v.Args[2].Reg() 589 p.To.Type = obj.TYPE_MEM 590 p.To.Reg = v.Args[0].Reg() 591 p.To.Scale = 4 592 p.To.Index = v.Args[1].Reg() 593 gc.AddAux(&p.To, v) 594 case ssa.OpAMD64MOVWstoreidx2: 595 p := s.Prog(v.Op.Asm()) 596 p.From.Type = obj.TYPE_REG 597 p.From.Reg = v.Args[2].Reg() 598 p.To.Type = obj.TYPE_MEM 599 p.To.Reg = v.Args[0].Reg() 600 p.To.Scale = 2 601 p.To.Index = v.Args[1].Reg() 602 gc.AddAux(&p.To, v) 603 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: 604 r := v.Args[0].Reg() 605 i := v.Args[1].Reg() 606 if i == x86.REG_SP { 607 r, i = i, r 608 } 609 p := s.Prog(v.Op.Asm()) 610 p.From.Type = obj.TYPE_REG 611 p.From.Reg = v.Args[2].Reg() 612 p.To.Type = obj.TYPE_MEM 613 p.To.Reg = r 614 p.To.Scale = 1 615 p.To.Index = i 616 gc.AddAux(&p.To, v) 617 case ssa.OpAMD64ADDQconstmem, ssa.OpAMD64ADDLconstmem: 618 sc := v.AuxValAndOff() 619 off := sc.Off() 620 val := sc.Val() 621 if val == 1 { 622 var asm obj.As 623 if v.Op == ssa.OpAMD64ADDQconstmem { 624 asm = x86.AINCQ 625 } else { 626 asm = x86.AINCL 627 } 628 p := s.Prog(asm) 629 p.To.Type = obj.TYPE_MEM 630 p.To.Reg = v.Args[0].Reg() 631 gc.AddAux2(&p.To, v, off) 632 } else { 633 p := s.Prog(v.Op.Asm()) 634 p.From.Type = obj.TYPE_CONST 635 p.From.Offset = val 636 p.To.Type = obj.TYPE_MEM 637 p.To.Reg = v.Args[0].Reg() 638 gc.AddAux2(&p.To, v, off) 639 } 640 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: 641 p := s.Prog(v.Op.Asm()) 642 p.From.Type = obj.TYPE_CONST 643 sc := v.AuxValAndOff() 644 p.From.Offset = sc.Val() 645 p.To.Type = obj.TYPE_MEM 646 p.To.Reg = v.Args[0].Reg() 647 gc.AddAux2(&p.To, v, sc.Off()) 648 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: 649 p := s.Prog(v.Op.Asm()) 650 p.From.Type = obj.TYPE_CONST 651 sc := v.AuxValAndOff() 652 p.From.Offset = sc.Val() 653 r := v.Args[0].Reg() 654 i := v.Args[1].Reg() 655 switch v.Op { 656 case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: 657 p.To.Scale = 1 658 if i == x86.REG_SP { 659 r, i = i, r 660 } 661 case ssa.OpAMD64MOVWstoreconstidx2: 662 p.To.Scale = 2 663 case ssa.OpAMD64MOVLstoreconstidx4: 664 p.To.Scale = 4 665 case ssa.OpAMD64MOVQstoreconstidx8: 666 p.To.Scale = 8 667 } 668 p.To.Type = obj.TYPE_MEM 669 p.To.Reg = r 670 p.To.Index = i 671 gc.AddAux2(&p.To, v, sc.Off()) 672 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, 673 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, 674 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: 675 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg()) 676 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: 677 r := v.Reg() 678 // Break false dependency on destination register. 679 opregreg(s, x86.AXORPS, r, r) 680 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg()) 681 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i: 682 p := s.Prog(x86.AMOVQ) 683 p.From.Type = obj.TYPE_REG 684 p.From.Reg = v.Args[0].Reg() 685 p.To.Type = obj.TYPE_REG 686 p.To.Reg = v.Reg() 687 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i: 688 p := s.Prog(x86.AMOVL) 689 p.From.Type = obj.TYPE_REG 690 p.From.Reg = v.Args[0].Reg() 691 p.To.Type = obj.TYPE_REG 692 p.To.Reg = v.Reg() 693 case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem, 694 ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem, 695 ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem, 696 ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem: 697 p := s.Prog(v.Op.Asm()) 698 p.From.Type = obj.TYPE_MEM 699 p.From.Reg = v.Args[1].Reg() 700 gc.AddAux(&p.From, v) 701 p.To.Type = obj.TYPE_REG 702 p.To.Reg = v.Reg() 703 if v.Reg() != v.Args[0].Reg() { 704 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 705 } 706 case ssa.OpAMD64DUFFZERO: 707 off := duffStart(v.AuxInt) 708 adj := duffAdj(v.AuxInt) 709 var p *obj.Prog 710 if adj != 0 { 711 p = s.Prog(x86.ALEAQ) 712 p.From.Type = obj.TYPE_MEM 713 p.From.Offset = adj 714 p.From.Reg = x86.REG_DI 715 p.To.Type = obj.TYPE_REG 716 p.To.Reg = x86.REG_DI 717 } 718 p = s.Prog(obj.ADUFFZERO) 719 p.To.Type = obj.TYPE_ADDR 720 p.To.Sym = gc.Duffzero 721 p.To.Offset = off 722 case ssa.OpAMD64MOVOconst: 723 if v.AuxInt != 0 { 724 v.Fatalf("MOVOconst can only do constant=0") 725 } 726 r := v.Reg() 727 opregreg(s, x86.AXORPS, r, r) 728 case ssa.OpAMD64DUFFCOPY: 729 p := s.Prog(obj.ADUFFCOPY) 730 p.To.Type = obj.TYPE_ADDR 731 p.To.Sym = gc.Duffcopy 732 p.To.Offset = v.AuxInt 733 734 case ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: 735 if v.Args[0].Reg() != v.Reg() { 736 v.Fatalf("MOVXconvert should be a no-op") 737 } 738 case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? 739 if v.Type.IsMemory() { 740 return 741 } 742 x := v.Args[0].Reg() 743 y := v.Reg() 744 if x != y { 745 opregreg(s, moveByType(v.Type), y, x) 746 } 747 case ssa.OpLoadReg: 748 if v.Type.IsFlags() { 749 v.Fatalf("load flags not implemented: %v", v.LongString()) 750 return 751 } 752 p := s.Prog(loadByType(v.Type)) 753 gc.AddrAuto(&p.From, v.Args[0]) 754 p.To.Type = obj.TYPE_REG 755 p.To.Reg = v.Reg() 756 757 case ssa.OpStoreReg: 758 if v.Type.IsFlags() { 759 v.Fatalf("store flags not implemented: %v", v.LongString()) 760 return 761 } 762 p := s.Prog(storeByType(v.Type)) 763 p.From.Type = obj.TYPE_REG 764 p.From.Reg = v.Args[0].Reg() 765 gc.AddrAuto(&p.To, v) 766 case ssa.OpAMD64LoweredGetClosurePtr: 767 // Closure pointer is DX. 768 gc.CheckLoweredGetClosurePtr(v) 769 case ssa.OpAMD64LoweredGetG: 770 r := v.Reg() 771 // See the comments in cmd/internal/obj/x86/obj6.go 772 // near CanUse1InsnTLS for a detailed explanation of these instructions. 773 if x86.CanUse1InsnTLS(gc.Ctxt) { 774 // MOVQ (TLS), r 775 p := s.Prog(x86.AMOVQ) 776 p.From.Type = obj.TYPE_MEM 777 p.From.Reg = x86.REG_TLS 778 p.To.Type = obj.TYPE_REG 779 p.To.Reg = r 780 } else { 781 // MOVQ TLS, r 782 // MOVQ (r)(TLS*1), r 783 p := s.Prog(x86.AMOVQ) 784 p.From.Type = obj.TYPE_REG 785 p.From.Reg = x86.REG_TLS 786 p.To.Type = obj.TYPE_REG 787 p.To.Reg = r 788 q := s.Prog(x86.AMOVQ) 789 q.From.Type = obj.TYPE_MEM 790 q.From.Reg = r 791 q.From.Index = x86.REG_TLS 792 q.From.Scale = 1 793 q.To.Type = obj.TYPE_REG 794 q.To.Reg = r 795 } 796 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter: 797 s.Call(v) 798 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, 799 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, 800 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: 801 r := v.Reg() 802 if r != v.Args[0].Reg() { 803 v.Fatalf("input[0] and output not in same register %s", v.LongString()) 804 } 805 p := s.Prog(v.Op.Asm()) 806 p.To.Type = obj.TYPE_REG 807 p.To.Reg = r 808 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL: 809 p := s.Prog(v.Op.Asm()) 810 p.From.Type = obj.TYPE_REG 811 p.From.Reg = v.Args[0].Reg() 812 p.To.Type = obj.TYPE_REG 813 p.To.Reg = v.Reg0() 814 case ssa.OpAMD64SQRTSD: 815 p := s.Prog(v.Op.Asm()) 816 p.From.Type = obj.TYPE_REG 817 p.From.Reg = v.Args[0].Reg() 818 p.To.Type = obj.TYPE_REG 819 p.To.Reg = v.Reg() 820 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL: 821 if v.Args[0].Reg() != v.Reg() { 822 // POPCNT on Intel has a false dependency on the destination register. 823 // Zero the destination to break the dependency. 824 p := s.Prog(x86.AMOVQ) 825 p.From.Type = obj.TYPE_CONST 826 p.From.Offset = 0 827 p.To.Type = obj.TYPE_REG 828 p.To.Reg = v.Reg() 829 } 830 p := s.Prog(v.Op.Asm()) 831 p.From.Type = obj.TYPE_REG 832 p.From.Reg = v.Args[0].Reg() 833 p.To.Type = obj.TYPE_REG 834 p.To.Reg = v.Reg() 835 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, 836 ssa.OpAMD64SETL, ssa.OpAMD64SETLE, 837 ssa.OpAMD64SETG, ssa.OpAMD64SETGE, 838 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, 839 ssa.OpAMD64SETB, ssa.OpAMD64SETBE, 840 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, 841 ssa.OpAMD64SETA, ssa.OpAMD64SETAE: 842 p := s.Prog(v.Op.Asm()) 843 p.To.Type = obj.TYPE_REG 844 p.To.Reg = v.Reg() 845 846 case ssa.OpAMD64SETNEF: 847 p := s.Prog(v.Op.Asm()) 848 p.To.Type = obj.TYPE_REG 849 p.To.Reg = v.Reg() 850 q := s.Prog(x86.ASETPS) 851 q.To.Type = obj.TYPE_REG 852 q.To.Reg = x86.REG_AX 853 // ORL avoids partial register write and is smaller than ORQ, used by old compiler 854 opregreg(s, x86.AORL, v.Reg(), x86.REG_AX) 855 856 case ssa.OpAMD64SETEQF: 857 p := s.Prog(v.Op.Asm()) 858 p.To.Type = obj.TYPE_REG 859 p.To.Reg = v.Reg() 860 q := s.Prog(x86.ASETPC) 861 q.To.Type = obj.TYPE_REG 862 q.To.Reg = x86.REG_AX 863 // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler 864 opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX) 865 866 case ssa.OpAMD64InvertFlags: 867 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 868 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: 869 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 870 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64: 871 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString()) 872 case ssa.OpAMD64REPSTOSQ: 873 s.Prog(x86.AREP) 874 s.Prog(x86.ASTOSQ) 875 case ssa.OpAMD64REPMOVSQ: 876 s.Prog(x86.AREP) 877 s.Prog(x86.AMOVSQ) 878 case ssa.OpAMD64LoweredNilCheck: 879 // Issue a load which will fault if the input is nil. 880 // TODO: We currently use the 2-byte instruction TESTB AX, (reg). 881 // Should we use the 3-byte TESTB $0, (reg) instead? It is larger 882 // but it doesn't have false dependency on AX. 883 // Or maybe allocate an output register and use MOVL (reg),reg2 ? 884 // That trades clobbering flags for clobbering a register. 885 p := s.Prog(x86.ATESTB) 886 p.From.Type = obj.TYPE_REG 887 p.From.Reg = x86.REG_AX 888 p.To.Type = obj.TYPE_MEM 889 p.To.Reg = v.Args[0].Reg() 890 gc.AddAux(&p.To, v) 891 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 892 gc.Warnl(v.Pos, "generated nil check") 893 } 894 case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload: 895 p := s.Prog(v.Op.Asm()) 896 p.From.Type = obj.TYPE_MEM 897 p.From.Reg = v.Args[0].Reg() 898 gc.AddAux(&p.From, v) 899 p.To.Type = obj.TYPE_REG 900 p.To.Reg = v.Reg0() 901 case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ: 902 r := v.Reg0() 903 if r != v.Args[0].Reg() { 904 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 905 } 906 p := s.Prog(v.Op.Asm()) 907 p.From.Type = obj.TYPE_REG 908 p.From.Reg = r 909 p.To.Type = obj.TYPE_MEM 910 p.To.Reg = v.Args[1].Reg() 911 gc.AddAux(&p.To, v) 912 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock: 913 r := v.Reg0() 914 if r != v.Args[0].Reg() { 915 v.Fatalf("input[0] and output[0] not in same register %s", v.LongString()) 916 } 917 s.Prog(x86.ALOCK) 918 p := s.Prog(v.Op.Asm()) 919 p.From.Type = obj.TYPE_REG 920 p.From.Reg = r 921 p.To.Type = obj.TYPE_MEM 922 p.To.Reg = v.Args[1].Reg() 923 gc.AddAux(&p.To, v) 924 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock: 925 if v.Args[1].Reg() != x86.REG_AX { 926 v.Fatalf("input[1] not in AX %s", v.LongString()) 927 } 928 s.Prog(x86.ALOCK) 929 p := s.Prog(v.Op.Asm()) 930 p.From.Type = obj.TYPE_REG 931 p.From.Reg = v.Args[2].Reg() 932 p.To.Type = obj.TYPE_MEM 933 p.To.Reg = v.Args[0].Reg() 934 gc.AddAux(&p.To, v) 935 p = s.Prog(x86.ASETEQ) 936 p.To.Type = obj.TYPE_REG 937 p.To.Reg = v.Reg0() 938 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock: 939 s.Prog(x86.ALOCK) 940 p := s.Prog(v.Op.Asm()) 941 p.From.Type = obj.TYPE_REG 942 p.From.Reg = v.Args[1].Reg() 943 p.To.Type = obj.TYPE_MEM 944 p.To.Reg = v.Args[0].Reg() 945 gc.AddAux(&p.To, v) 946 case ssa.OpClobber: 947 p := s.Prog(x86.AMOVL) 948 p.From.Type = obj.TYPE_CONST 949 p.From.Offset = 0xdeaddead 950 p.To.Type = obj.TYPE_MEM 951 p.To.Reg = x86.REG_SP 952 gc.AddAux(&p.To, v) 953 p = s.Prog(x86.AMOVL) 954 p.From.Type = obj.TYPE_CONST 955 p.From.Offset = 0xdeaddead 956 p.To.Type = obj.TYPE_MEM 957 p.To.Reg = x86.REG_SP 958 gc.AddAux(&p.To, v) 959 p.To.Offset += 4 960 default: 961 v.Fatalf("genValue not implemented: %s", v.LongString()) 962 } 963 } 964 965 var blockJump = [...]struct { 966 asm, invasm obj.As 967 }{ 968 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE}, 969 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ}, 970 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE}, 971 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT}, 972 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT}, 973 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE}, 974 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC}, 975 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS}, 976 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS}, 977 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI}, 978 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS}, 979 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC}, 980 } 981 982 var eqfJumps = [2][2]gc.FloatingEQNEJump{ 983 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0] 984 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1] 985 } 986 var nefJumps = [2][2]gc.FloatingEQNEJump{ 987 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0] 988 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1] 989 } 990 991 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 992 switch b.Kind { 993 case ssa.BlockPlain: 994 if b.Succs[0].Block() != next { 995 p := s.Prog(obj.AJMP) 996 p.To.Type = obj.TYPE_BRANCH 997 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 998 } 999 case ssa.BlockDefer: 1000 // defer returns in rax: 1001 // 0 if we should continue executing 1002 // 1 if we should jump to deferreturn call 1003 p := s.Prog(x86.ATESTL) 1004 p.From.Type = obj.TYPE_REG 1005 p.From.Reg = x86.REG_AX 1006 p.To.Type = obj.TYPE_REG 1007 p.To.Reg = x86.REG_AX 1008 p = s.Prog(x86.AJNE) 1009 p.To.Type = obj.TYPE_BRANCH 1010 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1011 if b.Succs[0].Block() != next { 1012 p := s.Prog(obj.AJMP) 1013 p.To.Type = obj.TYPE_BRANCH 1014 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1015 } 1016 case ssa.BlockExit: 1017 s.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1018 case ssa.BlockRet: 1019 s.Prog(obj.ARET) 1020 case ssa.BlockRetJmp: 1021 p := s.Prog(obj.AJMP) 1022 p.To.Type = obj.TYPE_MEM 1023 p.To.Name = obj.NAME_EXTERN 1024 p.To.Sym = b.Aux.(*obj.LSym) 1025 1026 case ssa.BlockAMD64EQF: 1027 s.FPJump(b, next, &eqfJumps) 1028 1029 case ssa.BlockAMD64NEF: 1030 s.FPJump(b, next, &nefJumps) 1031 1032 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, 1033 ssa.BlockAMD64LT, ssa.BlockAMD64GE, 1034 ssa.BlockAMD64LE, ssa.BlockAMD64GT, 1035 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, 1036 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: 1037 jmp := blockJump[b.Kind] 1038 var p *obj.Prog 1039 switch next { 1040 case b.Succs[0].Block(): 1041 p = s.Prog(jmp.invasm) 1042 p.To.Type = obj.TYPE_BRANCH 1043 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1044 case b.Succs[1].Block(): 1045 p = s.Prog(jmp.asm) 1046 p.To.Type = obj.TYPE_BRANCH 1047 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1048 default: 1049 p = s.Prog(jmp.asm) 1050 p.To.Type = obj.TYPE_BRANCH 1051 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1052 q := s.Prog(obj.AJMP) 1053 q.To.Type = obj.TYPE_BRANCH 1054 s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) 1055 } 1056 1057 default: 1058 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1059 } 1060 }