github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/cmd/compile/internal/ppc64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ppc64 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/compile/internal/ssa" 10 "cmd/compile/internal/types" 11 "cmd/internal/obj" 12 "cmd/internal/obj/ppc64" 13 "math" 14 "strings" 15 ) 16 17 // iselOp encodes mapping of comparison operations onto ISEL operands 18 type iselOp struct { 19 cond int64 20 valueIfCond int // if cond is true, the value to return (0 or 1) 21 } 22 23 // Input registers to ISEL used for comparison. Index 0 is zero, 1 is (will be) 1 24 var iselRegs = [2]int16{ppc64.REG_R0, ppc64.REGTMP} 25 26 var iselOps = map[ssa.Op]iselOp{ 27 ssa.OpPPC64Equal: iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 1}, 28 ssa.OpPPC64NotEqual: iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 0}, 29 ssa.OpPPC64LessThan: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, 30 ssa.OpPPC64GreaterEqual: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 0}, 31 ssa.OpPPC64GreaterThan: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, 32 ssa.OpPPC64LessEqual: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 0}, 33 ssa.OpPPC64FLessThan: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, 34 ssa.OpPPC64FGreaterThan: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, 35 ssa.OpPPC64FLessEqual: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ 36 ssa.OpPPC64FGreaterEqual: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ 37 } 38 39 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 40 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 41 // flive := b.FlagsLiveAtEnd 42 // if b.Control != nil && b.Control.Type.IsFlags() { 43 // flive = true 44 // } 45 // for i := len(b.Values) - 1; i >= 0; i-- { 46 // v := b.Values[i] 47 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) { 48 // // The "mark" is any non-nil Aux value. 49 // v.Aux = v 50 // } 51 // if v.Type.IsFlags() { 52 // flive = false 53 // } 54 // for _, a := range v.Args { 55 // if a.Type.IsFlags() { 56 // flive = true 57 // } 58 // } 59 // } 60 } 61 62 // loadByType returns the load instruction of the given type. 63 func loadByType(t *types.Type) obj.As { 64 if t.IsFloat() { 65 switch t.Size() { 66 case 4: 67 return ppc64.AFMOVS 68 case 8: 69 return ppc64.AFMOVD 70 } 71 } else { 72 switch t.Size() { 73 case 1: 74 if t.IsSigned() { 75 return ppc64.AMOVB 76 } else { 77 return ppc64.AMOVBZ 78 } 79 case 2: 80 if t.IsSigned() { 81 return ppc64.AMOVH 82 } else { 83 return ppc64.AMOVHZ 84 } 85 case 4: 86 if t.IsSigned() { 87 return ppc64.AMOVW 88 } else { 89 return ppc64.AMOVWZ 90 } 91 case 8: 92 return ppc64.AMOVD 93 } 94 } 95 panic("bad load type") 96 } 97 98 // storeByType returns the store instruction of the given type. 99 func storeByType(t *types.Type) obj.As { 100 if t.IsFloat() { 101 switch t.Size() { 102 case 4: 103 return ppc64.AFMOVS 104 case 8: 105 return ppc64.AFMOVD 106 } 107 } else { 108 switch t.Size() { 109 case 1: 110 return ppc64.AMOVB 111 case 2: 112 return ppc64.AMOVH 113 case 4: 114 return ppc64.AMOVW 115 case 8: 116 return ppc64.AMOVD 117 } 118 } 119 panic("bad store type") 120 } 121 122 func ssaGenISEL(s *gc.SSAGenState, v *ssa.Value, cr int64, r1, r2 int16) { 123 r := v.Reg() 124 p := s.Prog(ppc64.AISEL) 125 p.To.Type = obj.TYPE_REG 126 p.To.Reg = r 127 p.Reg = r1 128 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2}) 129 p.From.Type = obj.TYPE_CONST 130 p.From.Offset = cr 131 } 132 133 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 134 switch v.Op { 135 case ssa.OpCopy: 136 t := v.Type 137 if t.IsMemory() { 138 return 139 } 140 x := v.Args[0].Reg() 141 y := v.Reg() 142 if x != y { 143 rt := obj.TYPE_REG 144 op := ppc64.AMOVD 145 146 if t.IsFloat() { 147 op = ppc64.AFMOVD 148 } 149 p := s.Prog(op) 150 p.From.Type = rt 151 p.From.Reg = x 152 p.To.Type = rt 153 p.To.Reg = y 154 } 155 156 case ssa.OpPPC64LoweredMuluhilo: 157 // MULHDU Rarg1, Rarg0, Reg0 158 // MULLD Rarg1, Rarg0, Reg1 159 r0 := v.Args[0].Reg() 160 r1 := v.Args[1].Reg() 161 p := s.Prog(ppc64.AMULHDU) 162 p.From.Type = obj.TYPE_REG 163 p.From.Reg = r1 164 p.Reg = r0 165 p.To.Type = obj.TYPE_REG 166 p.To.Reg = v.Reg0() 167 p1 := s.Prog(ppc64.AMULLD) 168 p1.From.Type = obj.TYPE_REG 169 p1.From.Reg = r1 170 p1.Reg = r0 171 p1.To.Type = obj.TYPE_REG 172 p1.To.Reg = v.Reg1() 173 174 case ssa.OpPPC64LoweredAtomicAnd8, 175 ssa.OpPPC64LoweredAtomicOr8: 176 // LWSYNC 177 // LBAR (Rarg0), Rtmp 178 // AND/OR Rarg1, Rtmp 179 // STBCCC Rtmp, (Rarg0) 180 // BNE -3(PC) 181 r0 := v.Args[0].Reg() 182 r1 := v.Args[1].Reg() 183 // LWSYNC - Assuming shared data not write-through-required nor 184 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 185 plwsync := s.Prog(ppc64.ALWSYNC) 186 plwsync.To.Type = obj.TYPE_NONE 187 p := s.Prog(ppc64.ALBAR) 188 p.From.Type = obj.TYPE_MEM 189 p.From.Reg = r0 190 p.To.Type = obj.TYPE_REG 191 p.To.Reg = ppc64.REGTMP 192 p1 := s.Prog(v.Op.Asm()) 193 p1.From.Type = obj.TYPE_REG 194 p1.From.Reg = r1 195 p1.To.Type = obj.TYPE_REG 196 p1.To.Reg = ppc64.REGTMP 197 p2 := s.Prog(ppc64.ASTBCCC) 198 p2.From.Type = obj.TYPE_REG 199 p2.From.Reg = ppc64.REGTMP 200 p2.To.Type = obj.TYPE_MEM 201 p2.To.Reg = r0 202 p2.RegTo2 = ppc64.REGTMP 203 p3 := s.Prog(ppc64.ABNE) 204 p3.To.Type = obj.TYPE_BRANCH 205 gc.Patch(p3, p) 206 207 case ssa.OpPPC64LoweredAtomicAdd32, 208 ssa.OpPPC64LoweredAtomicAdd64: 209 // LWSYNC 210 // LDAR/LWAR (Rarg0), Rout 211 // ADD Rarg1, Rout 212 // STDCCC/STWCCC Rout, (Rarg0) 213 // BNE -3(PC) 214 // MOVW Rout,Rout (if Add32) 215 ld := ppc64.ALDAR 216 st := ppc64.ASTDCCC 217 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 218 ld = ppc64.ALWAR 219 st = ppc64.ASTWCCC 220 } 221 r0 := v.Args[0].Reg() 222 r1 := v.Args[1].Reg() 223 out := v.Reg0() 224 // LWSYNC - Assuming shared data not write-through-required nor 225 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 226 plwsync := s.Prog(ppc64.ALWSYNC) 227 plwsync.To.Type = obj.TYPE_NONE 228 // LDAR or LWAR 229 p := s.Prog(ld) 230 p.From.Type = obj.TYPE_MEM 231 p.From.Reg = r0 232 p.To.Type = obj.TYPE_REG 233 p.To.Reg = out 234 // ADD reg1,out 235 p1 := s.Prog(ppc64.AADD) 236 p1.From.Type = obj.TYPE_REG 237 p1.From.Reg = r1 238 p1.To.Reg = out 239 p1.To.Type = obj.TYPE_REG 240 // STDCCC or STWCCC 241 p3 := s.Prog(st) 242 p3.From.Type = obj.TYPE_REG 243 p3.From.Reg = out 244 p3.To.Type = obj.TYPE_MEM 245 p3.To.Reg = r0 246 // BNE retry 247 p4 := s.Prog(ppc64.ABNE) 248 p4.To.Type = obj.TYPE_BRANCH 249 gc.Patch(p4, p) 250 251 // Ensure a 32 bit result 252 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 253 p5 := s.Prog(ppc64.AMOVWZ) 254 p5.To.Type = obj.TYPE_REG 255 p5.To.Reg = out 256 p5.From.Type = obj.TYPE_REG 257 p5.From.Reg = out 258 } 259 260 case ssa.OpPPC64LoweredAtomicExchange32, 261 ssa.OpPPC64LoweredAtomicExchange64: 262 // LWSYNC 263 // LDAR/LWAR (Rarg0), Rout 264 // STDCCC/STWCCC Rout, (Rarg0) 265 // BNE -2(PC) 266 // ISYNC 267 ld := ppc64.ALDAR 268 st := ppc64.ASTDCCC 269 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 { 270 ld = ppc64.ALWAR 271 st = ppc64.ASTWCCC 272 } 273 r0 := v.Args[0].Reg() 274 r1 := v.Args[1].Reg() 275 out := v.Reg0() 276 // LWSYNC - Assuming shared data not write-through-required nor 277 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 278 plwsync := s.Prog(ppc64.ALWSYNC) 279 plwsync.To.Type = obj.TYPE_NONE 280 // LDAR or LWAR 281 p := s.Prog(ld) 282 p.From.Type = obj.TYPE_MEM 283 p.From.Reg = r0 284 p.To.Type = obj.TYPE_REG 285 p.To.Reg = out 286 // STDCCC or STWCCC 287 p1 := s.Prog(st) 288 p1.From.Type = obj.TYPE_REG 289 p1.From.Reg = r1 290 p1.To.Type = obj.TYPE_MEM 291 p1.To.Reg = r0 292 // BNE retry 293 p2 := s.Prog(ppc64.ABNE) 294 p2.To.Type = obj.TYPE_BRANCH 295 gc.Patch(p2, p) 296 // ISYNC 297 pisync := s.Prog(ppc64.AISYNC) 298 pisync.To.Type = obj.TYPE_NONE 299 300 case ssa.OpPPC64LoweredAtomicLoad32, 301 ssa.OpPPC64LoweredAtomicLoad64, 302 ssa.OpPPC64LoweredAtomicLoadPtr: 303 // SYNC 304 // MOVD/MOVW (Rarg0), Rout 305 // CMP Rout,Rout 306 // BNE 1(PC) 307 // ISYNC 308 ld := ppc64.AMOVD 309 cmp := ppc64.ACMP 310 if v.Op == ssa.OpPPC64LoweredAtomicLoad32 { 311 ld = ppc64.AMOVW 312 cmp = ppc64.ACMPW 313 } 314 arg0 := v.Args[0].Reg() 315 out := v.Reg0() 316 // SYNC when AuxInt == 1; otherwise, load-acquire 317 if v.AuxInt == 1 { 318 psync := s.Prog(ppc64.ASYNC) 319 psync.To.Type = obj.TYPE_NONE 320 } 321 // Load 322 p := s.Prog(ld) 323 p.From.Type = obj.TYPE_MEM 324 p.From.Reg = arg0 325 p.To.Type = obj.TYPE_REG 326 p.To.Reg = out 327 // CMP 328 p1 := s.Prog(cmp) 329 p1.From.Type = obj.TYPE_REG 330 p1.From.Reg = out 331 p1.To.Type = obj.TYPE_REG 332 p1.To.Reg = out 333 // BNE 334 p2 := s.Prog(ppc64.ABNE) 335 p2.To.Type = obj.TYPE_BRANCH 336 // ISYNC 337 pisync := s.Prog(ppc64.AISYNC) 338 pisync.To.Type = obj.TYPE_NONE 339 gc.Patch(p2, pisync) 340 341 case ssa.OpPPC64LoweredAtomicStore32, 342 ssa.OpPPC64LoweredAtomicStore64: 343 // SYNC or LWSYNC 344 // MOVD/MOVW arg1,(arg0) 345 st := ppc64.AMOVD 346 if v.Op == ssa.OpPPC64LoweredAtomicStore32 { 347 st = ppc64.AMOVW 348 } 349 arg0 := v.Args[0].Reg() 350 arg1 := v.Args[1].Reg() 351 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC 352 // SYNC 353 syncOp := ppc64.ASYNC 354 if v.AuxInt == 0 { 355 syncOp = ppc64.ALWSYNC 356 } 357 psync := s.Prog(syncOp) 358 psync.To.Type = obj.TYPE_NONE 359 // Store 360 p := s.Prog(st) 361 p.To.Type = obj.TYPE_MEM 362 p.To.Reg = arg0 363 p.From.Type = obj.TYPE_REG 364 p.From.Reg = arg1 365 366 case ssa.OpPPC64LoweredAtomicCas64, 367 ssa.OpPPC64LoweredAtomicCas32: 368 // LWSYNC 369 // loop: 370 // LDAR (Rarg0), MutexHint, Rtmp 371 // CMP Rarg1, Rtmp 372 // BNE fail 373 // STDCCC Rarg2, (Rarg0) 374 // BNE loop 375 // LWSYNC // Only for sequential consistency; not required in CasRel. 376 // MOVD $1, Rout 377 // BR end 378 // fail: 379 // MOVD $0, Rout 380 // end: 381 ld := ppc64.ALDAR 382 st := ppc64.ASTDCCC 383 cmp := ppc64.ACMP 384 if v.Op == ssa.OpPPC64LoweredAtomicCas32 { 385 ld = ppc64.ALWAR 386 st = ppc64.ASTWCCC 387 cmp = ppc64.ACMPW 388 } 389 r0 := v.Args[0].Reg() 390 r1 := v.Args[1].Reg() 391 r2 := v.Args[2].Reg() 392 out := v.Reg0() 393 // LWSYNC - Assuming shared data not write-through-required nor 394 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 395 plwsync1 := s.Prog(ppc64.ALWSYNC) 396 plwsync1.To.Type = obj.TYPE_NONE 397 // LDAR or LWAR 398 p := s.Prog(ld) 399 p.From.Type = obj.TYPE_MEM 400 p.From.Reg = r0 401 p.To.Type = obj.TYPE_REG 402 p.To.Reg = ppc64.REGTMP 403 // If it is a Compare-and-Swap-Release operation, set the EH field with 404 // the release hint. 405 if v.AuxInt == 0 { 406 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0}) 407 } 408 // CMP reg1,reg2 409 p1 := s.Prog(cmp) 410 p1.From.Type = obj.TYPE_REG 411 p1.From.Reg = r1 412 p1.To.Reg = ppc64.REGTMP 413 p1.To.Type = obj.TYPE_REG 414 // BNE cas_fail 415 p2 := s.Prog(ppc64.ABNE) 416 p2.To.Type = obj.TYPE_BRANCH 417 // STDCCC or STWCCC 418 p3 := s.Prog(st) 419 p3.From.Type = obj.TYPE_REG 420 p3.From.Reg = r2 421 p3.To.Type = obj.TYPE_MEM 422 p3.To.Reg = r0 423 // BNE retry 424 p4 := s.Prog(ppc64.ABNE) 425 p4.To.Type = obj.TYPE_BRANCH 426 gc.Patch(p4, p) 427 // LWSYNC - Assuming shared data not write-through-required nor 428 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b. 429 // If the operation is a CAS-Release, then synchronization is not necessary. 430 if v.AuxInt != 0 { 431 plwsync2 := s.Prog(ppc64.ALWSYNC) 432 plwsync2.To.Type = obj.TYPE_NONE 433 } 434 // return true 435 p5 := s.Prog(ppc64.AMOVD) 436 p5.From.Type = obj.TYPE_CONST 437 p5.From.Offset = 1 438 p5.To.Type = obj.TYPE_REG 439 p5.To.Reg = out 440 // BR done 441 p6 := s.Prog(obj.AJMP) 442 p6.To.Type = obj.TYPE_BRANCH 443 // return false 444 p7 := s.Prog(ppc64.AMOVD) 445 p7.From.Type = obj.TYPE_CONST 446 p7.From.Offset = 0 447 p7.To.Type = obj.TYPE_REG 448 p7.To.Reg = out 449 gc.Patch(p2, p7) 450 // done (label) 451 p8 := s.Prog(obj.ANOP) 452 gc.Patch(p6, p8) 453 454 case ssa.OpPPC64LoweredGetClosurePtr: 455 // Closure pointer is R11 (already) 456 gc.CheckLoweredGetClosurePtr(v) 457 458 case ssa.OpPPC64LoweredGetCallerSP: 459 // caller's SP is FixedFrameSize below the address of the first arg 460 p := s.Prog(ppc64.AMOVD) 461 p.From.Type = obj.TYPE_ADDR 462 p.From.Offset = -gc.Ctxt.FixedFrameSize() 463 p.From.Name = obj.NAME_PARAM 464 p.To.Type = obj.TYPE_REG 465 p.To.Reg = v.Reg() 466 467 case ssa.OpPPC64LoweredGetCallerPC: 468 p := s.Prog(obj.AGETCALLERPC) 469 p.To.Type = obj.TYPE_REG 470 p.To.Reg = v.Reg() 471 472 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F: 473 // input is already rounded 474 475 case ssa.OpLoadReg: 476 loadOp := loadByType(v.Type) 477 p := s.Prog(loadOp) 478 gc.AddrAuto(&p.From, v.Args[0]) 479 p.To.Type = obj.TYPE_REG 480 p.To.Reg = v.Reg() 481 482 case ssa.OpStoreReg: 483 storeOp := storeByType(v.Type) 484 p := s.Prog(storeOp) 485 p.From.Type = obj.TYPE_REG 486 p.From.Reg = v.Args[0].Reg() 487 gc.AddrAuto(&p.To, v) 488 489 case ssa.OpPPC64DIVD: 490 // For now, 491 // 492 // cmp arg1, -1 493 // be ahead 494 // v = arg0 / arg1 495 // b over 496 // ahead: v = - arg0 497 // over: nop 498 r := v.Reg() 499 r0 := v.Args[0].Reg() 500 r1 := v.Args[1].Reg() 501 502 p := s.Prog(ppc64.ACMP) 503 p.From.Type = obj.TYPE_REG 504 p.From.Reg = r1 505 p.To.Type = obj.TYPE_CONST 506 p.To.Offset = -1 507 508 pbahead := s.Prog(ppc64.ABEQ) 509 pbahead.To.Type = obj.TYPE_BRANCH 510 511 p = s.Prog(v.Op.Asm()) 512 p.From.Type = obj.TYPE_REG 513 p.From.Reg = r1 514 p.Reg = r0 515 p.To.Type = obj.TYPE_REG 516 p.To.Reg = r 517 518 pbover := s.Prog(obj.AJMP) 519 pbover.To.Type = obj.TYPE_BRANCH 520 521 p = s.Prog(ppc64.ANEG) 522 p.To.Type = obj.TYPE_REG 523 p.To.Reg = r 524 p.From.Type = obj.TYPE_REG 525 p.From.Reg = r0 526 gc.Patch(pbahead, p) 527 528 p = s.Prog(obj.ANOP) 529 gc.Patch(pbover, p) 530 531 case ssa.OpPPC64DIVW: 532 // word-width version of above 533 r := v.Reg() 534 r0 := v.Args[0].Reg() 535 r1 := v.Args[1].Reg() 536 537 p := s.Prog(ppc64.ACMPW) 538 p.From.Type = obj.TYPE_REG 539 p.From.Reg = r1 540 p.To.Type = obj.TYPE_CONST 541 p.To.Offset = -1 542 543 pbahead := s.Prog(ppc64.ABEQ) 544 pbahead.To.Type = obj.TYPE_BRANCH 545 546 p = s.Prog(v.Op.Asm()) 547 p.From.Type = obj.TYPE_REG 548 p.From.Reg = r1 549 p.Reg = r0 550 p.To.Type = obj.TYPE_REG 551 p.To.Reg = r 552 553 pbover := s.Prog(obj.AJMP) 554 pbover.To.Type = obj.TYPE_BRANCH 555 556 p = s.Prog(ppc64.ANEG) 557 p.To.Type = obj.TYPE_REG 558 p.To.Reg = r 559 p.From.Type = obj.TYPE_REG 560 p.From.Reg = r0 561 gc.Patch(pbahead, p) 562 563 p = s.Prog(obj.ANOP) 564 gc.Patch(pbover, p) 565 566 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, 567 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, 568 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, 569 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW, 570 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, 571 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN, 572 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV: 573 r := v.Reg() 574 r1 := v.Args[0].Reg() 575 r2 := v.Args[1].Reg() 576 p := s.Prog(v.Op.Asm()) 577 p.From.Type = obj.TYPE_REG 578 p.From.Reg = r2 579 p.Reg = r1 580 p.To.Type = obj.TYPE_REG 581 p.To.Reg = r 582 583 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst: 584 p := s.Prog(v.Op.Asm()) 585 p.From.Type = obj.TYPE_CONST 586 p.From.Offset = v.AuxInt 587 p.Reg = v.Args[0].Reg() 588 p.To.Type = obj.TYPE_REG 589 p.To.Reg = v.Reg() 590 591 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS: 592 r := v.Reg() 593 r1 := v.Args[0].Reg() 594 r2 := v.Args[1].Reg() 595 r3 := v.Args[2].Reg() 596 // r = r1*r2 ± r3 597 p := s.Prog(v.Op.Asm()) 598 p.From.Type = obj.TYPE_REG 599 p.From.Reg = r1 600 p.Reg = r3 601 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2}) 602 p.To.Type = obj.TYPE_REG 603 p.To.Reg = r 604 605 case ssa.OpPPC64MaskIfNotCarry: 606 r := v.Reg() 607 p := s.Prog(v.Op.Asm()) 608 p.From.Type = obj.TYPE_REG 609 p.From.Reg = ppc64.REGZERO 610 p.To.Type = obj.TYPE_REG 611 p.To.Reg = r 612 613 case ssa.OpPPC64ADDconstForCarry: 614 r1 := v.Args[0].Reg() 615 p := s.Prog(v.Op.Asm()) 616 p.Reg = r1 617 p.From.Type = obj.TYPE_CONST 618 p.From.Offset = v.AuxInt 619 p.To.Type = obj.TYPE_REG 620 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect. 621 622 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, ssa.OpPPC64FROUND: 623 r := v.Reg() 624 p := s.Prog(v.Op.Asm()) 625 p.To.Type = obj.TYPE_REG 626 p.To.Reg = r 627 p.From.Type = obj.TYPE_REG 628 p.From.Reg = v.Args[0].Reg() 629 630 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, 631 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: 632 p := s.Prog(v.Op.Asm()) 633 p.Reg = v.Args[0].Reg() 634 p.From.Type = obj.TYPE_CONST 635 p.From.Offset = v.AuxInt 636 p.To.Type = obj.TYPE_REG 637 p.To.Reg = v.Reg() 638 639 case ssa.OpPPC64ANDCCconst: 640 p := s.Prog(v.Op.Asm()) 641 p.Reg = v.Args[0].Reg() 642 643 if v.Aux != nil { 644 p.From.Type = obj.TYPE_CONST 645 p.From.Offset = gc.AuxOffset(v) 646 } else { 647 p.From.Type = obj.TYPE_CONST 648 p.From.Offset = v.AuxInt 649 } 650 651 p.To.Type = obj.TYPE_REG 652 p.To.Reg = ppc64.REGTMP // discard result 653 654 case ssa.OpPPC64MOVDaddr: 655 switch v.Aux.(type) { 656 default: 657 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux) 658 case nil: 659 // If aux offset and aux int are both 0, and the same 660 // input and output regs are used, no instruction 661 // needs to be generated, since it would just be 662 // addi rx, rx, 0. 663 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() { 664 p := s.Prog(ppc64.AMOVD) 665 p.From.Type = obj.TYPE_ADDR 666 p.From.Reg = v.Args[0].Reg() 667 p.From.Offset = v.AuxInt 668 p.To.Type = obj.TYPE_REG 669 p.To.Reg = v.Reg() 670 } 671 672 case *obj.LSym, *gc.Node: 673 p := s.Prog(ppc64.AMOVD) 674 p.From.Type = obj.TYPE_ADDR 675 p.From.Reg = v.Args[0].Reg() 676 p.To.Type = obj.TYPE_REG 677 p.To.Reg = v.Reg() 678 gc.AddAux(&p.From, v) 679 680 } 681 682 case ssa.OpPPC64MOVDconst: 683 p := s.Prog(v.Op.Asm()) 684 p.From.Type = obj.TYPE_CONST 685 p.From.Offset = v.AuxInt 686 p.To.Type = obj.TYPE_REG 687 p.To.Reg = v.Reg() 688 689 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: 690 p := s.Prog(v.Op.Asm()) 691 p.From.Type = obj.TYPE_FCONST 692 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 693 p.To.Type = obj.TYPE_REG 694 p.To.Reg = v.Reg() 695 696 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: 697 p := s.Prog(v.Op.Asm()) 698 p.From.Type = obj.TYPE_REG 699 p.From.Reg = v.Args[0].Reg() 700 p.To.Type = obj.TYPE_REG 701 p.To.Reg = v.Args[1].Reg() 702 703 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: 704 p := s.Prog(v.Op.Asm()) 705 p.From.Type = obj.TYPE_REG 706 p.From.Reg = v.Args[0].Reg() 707 p.To.Type = obj.TYPE_CONST 708 p.To.Offset = v.AuxInt 709 710 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: 711 // Shift in register to required size 712 p := s.Prog(v.Op.Asm()) 713 p.From.Type = obj.TYPE_REG 714 p.From.Reg = v.Args[0].Reg() 715 p.To.Reg = v.Reg() 716 p.To.Type = obj.TYPE_REG 717 718 case ssa.OpPPC64MOVDload: 719 720 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4. 721 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string, 722 // the offset is not known until link time. If the load of a go.string uses relocation for the 723 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur. 724 // To avoid this problem, the full address of the go.string is computed and loaded into the base register, 725 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with 726 // go.string types because other types will have proper alignment. 727 728 gostring := false 729 switch n := v.Aux.(type) { 730 case *obj.LSym: 731 gostring = strings.HasPrefix(n.Name, "go.string.") 732 } 733 if gostring { 734 // Generate full addr of the go.string const 735 // including AuxInt 736 p := s.Prog(ppc64.AMOVD) 737 p.From.Type = obj.TYPE_ADDR 738 p.From.Reg = v.Args[0].Reg() 739 gc.AddAux(&p.From, v) 740 p.To.Type = obj.TYPE_REG 741 p.To.Reg = v.Reg() 742 // Load go.string using 0 offset 743 p = s.Prog(v.Op.Asm()) 744 p.From.Type = obj.TYPE_MEM 745 p.From.Reg = v.Reg() 746 p.To.Type = obj.TYPE_REG 747 p.To.Reg = v.Reg() 748 break 749 } 750 // Not a go.string, generate a normal load 751 fallthrough 752 753 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: 754 p := s.Prog(v.Op.Asm()) 755 p.From.Type = obj.TYPE_MEM 756 p.From.Reg = v.Args[0].Reg() 757 gc.AddAux(&p.From, v) 758 p.To.Type = obj.TYPE_REG 759 p.To.Reg = v.Reg() 760 761 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload: 762 p := s.Prog(v.Op.Asm()) 763 p.From.Type = obj.TYPE_MEM 764 p.From.Reg = v.Args[0].Reg() 765 p.To.Type = obj.TYPE_REG 766 p.To.Reg = v.Reg() 767 768 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore: 769 p := s.Prog(v.Op.Asm()) 770 p.To.Type = obj.TYPE_MEM 771 p.To.Reg = v.Args[0].Reg() 772 p.From.Type = obj.TYPE_REG 773 p.From.Reg = v.Args[1].Reg() 774 775 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx, 776 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx, 777 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx: 778 p := s.Prog(v.Op.Asm()) 779 p.From.Type = obj.TYPE_MEM 780 p.From.Reg = v.Args[0].Reg() 781 p.From.Index = v.Args[1].Reg() 782 gc.AddAux(&p.From, v) 783 p.To.Type = obj.TYPE_REG 784 p.To.Reg = v.Reg() 785 786 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: 787 p := s.Prog(v.Op.Asm()) 788 p.From.Type = obj.TYPE_REG 789 p.From.Reg = ppc64.REGZERO 790 p.To.Type = obj.TYPE_MEM 791 p.To.Reg = v.Args[0].Reg() 792 gc.AddAux(&p.To, v) 793 794 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: 795 p := s.Prog(v.Op.Asm()) 796 p.From.Type = obj.TYPE_REG 797 p.From.Reg = v.Args[1].Reg() 798 p.To.Type = obj.TYPE_MEM 799 p.To.Reg = v.Args[0].Reg() 800 gc.AddAux(&p.To, v) 801 802 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx, 803 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx, 804 ssa.OpPPC64MOVHBRstoreidx: 805 p := s.Prog(v.Op.Asm()) 806 p.From.Type = obj.TYPE_REG 807 p.From.Reg = v.Args[2].Reg() 808 p.To.Index = v.Args[1].Reg() 809 p.To.Type = obj.TYPE_MEM 810 p.To.Reg = v.Args[0].Reg() 811 gc.AddAux(&p.To, v) 812 813 case ssa.OpPPC64Equal, 814 ssa.OpPPC64NotEqual, 815 ssa.OpPPC64LessThan, 816 ssa.OpPPC64FLessThan, 817 ssa.OpPPC64LessEqual, 818 ssa.OpPPC64GreaterThan, 819 ssa.OpPPC64FGreaterThan, 820 ssa.OpPPC64GreaterEqual: 821 822 // On Power7 or later, can use isel instruction: 823 // for a < b, a > b, a = b: 824 // rtmp := 1 825 // isel rt,rtmp,r0,cond // rt is target in ppc asm 826 827 // for a >= b, a <= b, a != b: 828 // rtmp := 1 829 // isel rt,0,rtmp,!cond // rt is target in ppc asm 830 831 p := s.Prog(ppc64.AMOVD) 832 p.From.Type = obj.TYPE_CONST 833 p.From.Offset = 1 834 p.To.Type = obj.TYPE_REG 835 p.To.Reg = iselRegs[1] 836 iop := iselOps[v.Op] 837 ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond]) 838 839 case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion 840 ssa.OpPPC64FGreaterEqual: 841 842 p := s.Prog(ppc64.AMOVD) 843 p.From.Type = obj.TYPE_CONST 844 p.From.Offset = 1 845 p.To.Type = obj.TYPE_REG 846 p.To.Reg = iselRegs[1] 847 iop := iselOps[v.Op] 848 ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond]) 849 ssaGenISEL(s, v, ppc64.C_COND_EQ, iselRegs[1], v.Reg()) 850 851 case ssa.OpPPC64LoweredZero: 852 853 // unaligned data doesn't hurt performance 854 // for these instructions on power8 or later 855 856 // for sizes >= 64 generate a loop as follows: 857 858 // set up loop counter in CTR, used by BC 859 // MOVD len/32,REG_TMP 860 // MOVD REG_TMP,CTR 861 // loop: 862 // MOVD R0,(R3) 863 // MOVD R0,8(R3) 864 // MOVD R0,16(R3) 865 // MOVD R0,24(R3) 866 // ADD $32,R3 867 // BC 16, 0, loop 868 // 869 // any remainder is done as described below 870 871 // for sizes < 64 bytes, first clear as many doublewords as possible, 872 // then handle the remainder 873 // MOVD R0,(R3) 874 // MOVD R0,8(R3) 875 // .... etc. 876 // 877 // the remainder bytes are cleared using one or more 878 // of the following instructions with the appropriate 879 // offsets depending which instructions are needed 880 // 881 // MOVW R0,n1(R3) 4 bytes 882 // MOVH R0,n2(R3) 2 bytes 883 // MOVB R0,n3(R3) 1 byte 884 // 885 // 7 bytes: MOVW, MOVH, MOVB 886 // 6 bytes: MOVW, MOVH 887 // 5 bytes: MOVW, MOVB 888 // 3 bytes: MOVH, MOVB 889 890 // each loop iteration does 32 bytes 891 ctr := v.AuxInt / 32 892 893 // remainder bytes 894 rem := v.AuxInt % 32 895 896 // only generate a loop if there is more 897 // than 1 iteration. 898 if ctr > 1 { 899 // Set up CTR loop counter 900 p := s.Prog(ppc64.AMOVD) 901 p.From.Type = obj.TYPE_CONST 902 p.From.Offset = ctr 903 p.To.Type = obj.TYPE_REG 904 p.To.Reg = ppc64.REGTMP 905 906 p = s.Prog(ppc64.AMOVD) 907 p.From.Type = obj.TYPE_REG 908 p.From.Reg = ppc64.REGTMP 909 p.To.Type = obj.TYPE_REG 910 p.To.Reg = ppc64.REG_CTR 911 912 // generate 4 MOVDs 913 // when this is a loop then the top must be saved 914 var top *obj.Prog 915 for offset := int64(0); offset < 32; offset += 8 { 916 // This is the top of loop 917 p := s.Prog(ppc64.AMOVD) 918 p.From.Type = obj.TYPE_REG 919 p.From.Reg = ppc64.REG_R0 920 p.To.Type = obj.TYPE_MEM 921 p.To.Reg = v.Args[0].Reg() 922 p.To.Offset = offset 923 // Save the top of loop 924 if top == nil { 925 top = p 926 } 927 } 928 929 // Increment address for the 930 // 4 doublewords just zeroed. 931 p = s.Prog(ppc64.AADD) 932 p.Reg = v.Args[0].Reg() 933 p.From.Type = obj.TYPE_CONST 934 p.From.Offset = 32 935 p.To.Type = obj.TYPE_REG 936 p.To.Reg = v.Args[0].Reg() 937 938 // Branch back to top of loop 939 // based on CTR 940 // BC with BO_BCTR generates bdnz 941 p = s.Prog(ppc64.ABC) 942 p.From.Type = obj.TYPE_CONST 943 p.From.Offset = ppc64.BO_BCTR 944 p.Reg = ppc64.REG_R0 945 p.To.Type = obj.TYPE_BRANCH 946 gc.Patch(p, top) 947 } 948 949 // when ctr == 1 the loop was not generated but 950 // there are at least 32 bytes to clear, so add 951 // that to the remainder to generate the code 952 // to clear those doublewords 953 if ctr == 1 { 954 rem += 32 955 } 956 957 // clear the remainder starting at offset zero 958 offset := int64(0) 959 960 // first clear as many doublewords as possible 961 // then clear remaining sizes as available 962 for rem > 0 { 963 op, size := ppc64.AMOVB, int64(1) 964 switch { 965 case rem >= 8: 966 op, size = ppc64.AMOVD, 8 967 case rem >= 4: 968 op, size = ppc64.AMOVW, 4 969 case rem >= 2: 970 op, size = ppc64.AMOVH, 2 971 } 972 p := s.Prog(op) 973 p.From.Type = obj.TYPE_REG 974 p.From.Reg = ppc64.REG_R0 975 p.To.Type = obj.TYPE_MEM 976 p.To.Reg = v.Args[0].Reg() 977 p.To.Offset = offset 978 rem -= size 979 offset += size 980 } 981 982 case ssa.OpPPC64LoweredMove: 983 984 // This will be used when moving more 985 // than 8 bytes. Moves start with 986 // as many 8 byte moves as possible, then 987 // 4, 2, or 1 byte(s) as remaining. This will 988 // work and be efficient for power8 or later. 989 // If there are 64 or more bytes, then a 990 // loop is generated to move 32 bytes and 991 // update the src and dst addresses on each 992 // iteration. When < 64 bytes, the appropriate 993 // number of moves are generated based on the 994 // size. 995 // When moving >= 64 bytes a loop is used 996 // MOVD len/32,REG_TMP 997 // MOVD REG_TMP,CTR 998 // top: 999 // MOVD (R4),R7 1000 // MOVD 8(R4),R8 1001 // MOVD 16(R4),R9 1002 // MOVD 24(R4),R10 1003 // ADD R4,$32 1004 // MOVD R7,(R3) 1005 // MOVD R8,8(R3) 1006 // MOVD R9,16(R3) 1007 // MOVD R10,24(R3) 1008 // ADD R3,$32 1009 // BC 16,0,top 1010 // Bytes not moved by this loop are moved 1011 // with a combination of the following instructions, 1012 // starting with the largest sizes and generating as 1013 // many as needed, using the appropriate offset value. 1014 // MOVD n(R4),R7 1015 // MOVD R7,n(R3) 1016 // MOVW n1(R4),R7 1017 // MOVW R7,n1(R3) 1018 // MOVH n2(R4),R7 1019 // MOVH R7,n2(R3) 1020 // MOVB n3(R4),R7 1021 // MOVB R7,n3(R3) 1022 1023 // Each loop iteration moves 32 bytes 1024 ctr := v.AuxInt / 32 1025 1026 // Remainder after the loop 1027 rem := v.AuxInt % 32 1028 1029 dst_reg := v.Args[0].Reg() 1030 src_reg := v.Args[1].Reg() 1031 1032 // The set of registers used here, must match the clobbered reg list 1033 // in PPC64Ops.go. 1034 useregs := []int16{ppc64.REG_R7, ppc64.REG_R8, ppc64.REG_R9, ppc64.REG_R10} 1035 offset := int64(0) 1036 1037 // top of the loop 1038 var top *obj.Prog 1039 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1040 if ctr > 1 { 1041 // Set up the CTR 1042 p := s.Prog(ppc64.AMOVD) 1043 p.From.Type = obj.TYPE_CONST 1044 p.From.Offset = ctr 1045 p.To.Type = obj.TYPE_REG 1046 p.To.Reg = ppc64.REGTMP 1047 1048 p = s.Prog(ppc64.AMOVD) 1049 p.From.Type = obj.TYPE_REG 1050 p.From.Reg = ppc64.REGTMP 1051 p.To.Type = obj.TYPE_REG 1052 p.To.Reg = ppc64.REG_CTR 1053 1054 // Generate all the MOVDs for loads 1055 // based off the same register, increasing 1056 // the offset by 8 for each instruction 1057 for _, rg := range useregs { 1058 p := s.Prog(ppc64.AMOVD) 1059 p.From.Type = obj.TYPE_MEM 1060 p.From.Reg = src_reg 1061 p.From.Offset = offset 1062 p.To.Type = obj.TYPE_REG 1063 p.To.Reg = rg 1064 if top == nil { 1065 top = p 1066 } 1067 offset += 8 1068 } 1069 // increment the src_reg for next iteration 1070 p = s.Prog(ppc64.AADD) 1071 p.Reg = src_reg 1072 p.From.Type = obj.TYPE_CONST 1073 p.From.Offset = 32 1074 p.To.Type = obj.TYPE_REG 1075 p.To.Reg = src_reg 1076 1077 // generate the MOVDs for stores, based 1078 // off the same register, using the same 1079 // offsets as in the loads. 1080 offset = int64(0) 1081 for _, rg := range useregs { 1082 p := s.Prog(ppc64.AMOVD) 1083 p.From.Type = obj.TYPE_REG 1084 p.From.Reg = rg 1085 p.To.Type = obj.TYPE_MEM 1086 p.To.Reg = dst_reg 1087 p.To.Offset = offset 1088 offset += 8 1089 } 1090 // increment the dst_reg for next iteration 1091 p = s.Prog(ppc64.AADD) 1092 p.Reg = dst_reg 1093 p.From.Type = obj.TYPE_CONST 1094 p.From.Offset = 32 1095 p.To.Type = obj.TYPE_REG 1096 p.To.Reg = dst_reg 1097 1098 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1099 // to loop top. 1100 p = s.Prog(ppc64.ABC) 1101 p.From.Type = obj.TYPE_CONST 1102 p.From.Offset = ppc64.BO_BCTR 1103 p.Reg = ppc64.REG_R0 1104 p.To.Type = obj.TYPE_BRANCH 1105 gc.Patch(p, top) 1106 1107 // src_reg and dst_reg were incremented in the loop, so 1108 // later instructions start with offset 0. 1109 offset = int64(0) 1110 } 1111 1112 // No loop was generated for one iteration, so 1113 // add 32 bytes to the remainder to move those bytes. 1114 if ctr == 1 { 1115 rem += 32 1116 } 1117 1118 // Generate all the remaining load and store pairs, starting with 1119 // as many 8 byte moves as possible, then 4, 2, 1. 1120 for rem > 0 { 1121 op, size := ppc64.AMOVB, int64(1) 1122 switch { 1123 case rem >= 8: 1124 op, size = ppc64.AMOVD, 8 1125 case rem >= 4: 1126 op, size = ppc64.AMOVW, 4 1127 case rem >= 2: 1128 op, size = ppc64.AMOVH, 2 1129 } 1130 // Load 1131 p := s.Prog(op) 1132 p.To.Type = obj.TYPE_REG 1133 p.To.Reg = ppc64.REG_R7 1134 p.From.Type = obj.TYPE_MEM 1135 p.From.Reg = src_reg 1136 p.From.Offset = offset 1137 1138 // Store 1139 p = s.Prog(op) 1140 p.From.Type = obj.TYPE_REG 1141 p.From.Reg = ppc64.REG_R7 1142 p.To.Type = obj.TYPE_MEM 1143 p.To.Reg = dst_reg 1144 p.To.Offset = offset 1145 rem -= size 1146 offset += size 1147 } 1148 1149 case ssa.OpPPC64CALLstatic: 1150 s.Call(v) 1151 1152 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: 1153 p := s.Prog(ppc64.AMOVD) 1154 p.From.Type = obj.TYPE_REG 1155 p.From.Reg = v.Args[0].Reg() 1156 p.To.Type = obj.TYPE_REG 1157 p.To.Reg = ppc64.REG_CTR 1158 1159 if v.Args[0].Reg() != ppc64.REG_R12 { 1160 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg) 1161 } 1162 1163 pp := s.Call(v) 1164 pp.To.Reg = ppc64.REG_CTR 1165 1166 if gc.Ctxt.Flag_shared { 1167 // When compiling Go into PIC, the function we just 1168 // called via pointer might have been implemented in 1169 // a separate module and so overwritten the TOC 1170 // pointer in R2; reload it. 1171 q := s.Prog(ppc64.AMOVD) 1172 q.From.Type = obj.TYPE_MEM 1173 q.From.Offset = 24 1174 q.From.Reg = ppc64.REGSP 1175 q.To.Type = obj.TYPE_REG 1176 q.To.Reg = ppc64.REG_R2 1177 } 1178 1179 case ssa.OpPPC64LoweredWB: 1180 p := s.Prog(obj.ACALL) 1181 p.To.Type = obj.TYPE_MEM 1182 p.To.Name = obj.NAME_EXTERN 1183 p.To.Sym = v.Aux.(*obj.LSym) 1184 1185 case ssa.OpPPC64LoweredNilCheck: 1186 // Issue a load which will fault if arg is nil. 1187 p := s.Prog(ppc64.AMOVBZ) 1188 p.From.Type = obj.TYPE_MEM 1189 p.From.Reg = v.Args[0].Reg() 1190 gc.AddAux(&p.From, v) 1191 p.To.Type = obj.TYPE_REG 1192 p.To.Reg = ppc64.REGTMP 1193 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1194 gc.Warnl(v.Pos, "generated nil check") 1195 } 1196 1197 case ssa.OpPPC64InvertFlags: 1198 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1199 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: 1200 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1201 case ssa.OpClobber: 1202 // TODO: implement for clobberdead experiment. Nop is ok for now. 1203 default: 1204 v.Fatalf("genValue not implemented: %s", v.LongString()) 1205 } 1206 } 1207 1208 var blockJump = [...]struct { 1209 asm, invasm obj.As 1210 asmeq, invasmun bool 1211 }{ 1212 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false}, 1213 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false}, 1214 1215 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1216 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false}, 1217 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false}, 1218 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1219 1220 // TODO: need to work FP comparisons into block jumps 1221 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1222 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN 1223 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN 1224 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1225 } 1226 1227 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1228 switch b.Kind { 1229 case ssa.BlockDefer: 1230 // defer returns in R3: 1231 // 0 if we should continue executing 1232 // 1 if we should jump to deferreturn call 1233 p := s.Prog(ppc64.ACMP) 1234 p.From.Type = obj.TYPE_REG 1235 p.From.Reg = ppc64.REG_R3 1236 p.To.Type = obj.TYPE_REG 1237 p.To.Reg = ppc64.REG_R0 1238 1239 p = s.Prog(ppc64.ABNE) 1240 p.To.Type = obj.TYPE_BRANCH 1241 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1242 if b.Succs[0].Block() != next { 1243 p := s.Prog(obj.AJMP) 1244 p.To.Type = obj.TYPE_BRANCH 1245 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1246 } 1247 1248 case ssa.BlockPlain: 1249 if b.Succs[0].Block() != next { 1250 p := s.Prog(obj.AJMP) 1251 p.To.Type = obj.TYPE_BRANCH 1252 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1253 } 1254 case ssa.BlockExit: 1255 s.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1256 case ssa.BlockRet: 1257 s.Prog(obj.ARET) 1258 case ssa.BlockRetJmp: 1259 p := s.Prog(obj.AJMP) 1260 p.To.Type = obj.TYPE_MEM 1261 p.To.Name = obj.NAME_EXTERN 1262 p.To.Sym = b.Aux.(*obj.LSym) 1263 1264 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE, 1265 ssa.BlockPPC64LT, ssa.BlockPPC64GE, 1266 ssa.BlockPPC64LE, ssa.BlockPPC64GT, 1267 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE, 1268 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT: 1269 jmp := blockJump[b.Kind] 1270 switch next { 1271 case b.Succs[0].Block(): 1272 s.Br(jmp.invasm, b.Succs[1].Block()) 1273 if jmp.invasmun { 1274 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 1275 s.Br(ppc64.ABVS, b.Succs[1].Block()) 1276 } 1277 case b.Succs[1].Block(): 1278 s.Br(jmp.asm, b.Succs[0].Block()) 1279 if jmp.asmeq { 1280 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 1281 } 1282 default: 1283 if b.Likely != ssa.BranchUnlikely { 1284 s.Br(jmp.asm, b.Succs[0].Block()) 1285 if jmp.asmeq { 1286 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 1287 } 1288 s.Br(obj.AJMP, b.Succs[1].Block()) 1289 } else { 1290 s.Br(jmp.invasm, b.Succs[1].Block()) 1291 if jmp.invasmun { 1292 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 1293 s.Br(ppc64.ABVS, b.Succs[1].Block()) 1294 } 1295 s.Br(obj.AJMP, b.Succs[0].Block()) 1296 } 1297 } 1298 default: 1299 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1300 } 1301 }