github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/ppc64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ppc64 6 7 import ( 8 "github.com/gagliardetto/golang-go/cmd/compile/internal/gc" 9 "github.com/gagliardetto/golang-go/cmd/compile/internal/logopt" 10 "github.com/gagliardetto/golang-go/cmd/compile/internal/ssa" 11 "github.com/gagliardetto/golang-go/cmd/compile/internal/types" 12 "github.com/gagliardetto/golang-go/cmd/internal/obj" 13 "github.com/gagliardetto/golang-go/cmd/internal/obj/ppc64" 14 "github.com/gagliardetto/golang-go/cmd/internal/objabi" 15 "math" 16 "strings" 17 ) 18 19 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 20 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 21 // flive := b.FlagsLiveAtEnd 22 // if b.Control != nil && b.Control.Type.IsFlags() { 23 // flive = true 24 // } 25 // for i := len(b.Values) - 1; i >= 0; i-- { 26 // v := b.Values[i] 27 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) { 28 // // The "mark" is any non-nil Aux value. 29 // v.Aux = v 30 // } 31 // if v.Type.IsFlags() { 32 // flive = false 33 // } 34 // for _, a := range v.Args { 35 // if a.Type.IsFlags() { 36 // flive = true 37 // } 38 // } 39 // } 40 } 41 42 // loadByType returns the load instruction of the given type. 43 func loadByType(t *types.Type) obj.As { 44 if t.IsFloat() { 45 switch t.Size() { 46 case 4: 47 return ppc64.AFMOVS 48 case 8: 49 return ppc64.AFMOVD 50 } 51 } else { 52 switch t.Size() { 53 case 1: 54 if t.IsSigned() { 55 return ppc64.AMOVB 56 } else { 57 return ppc64.AMOVBZ 58 } 59 case 2: 60 if t.IsSigned() { 61 return ppc64.AMOVH 62 } else { 63 return ppc64.AMOVHZ 64 } 65 case 4: 66 if t.IsSigned() { 67 return ppc64.AMOVW 68 } else { 69 return ppc64.AMOVWZ 70 } 71 case 8: 72 return ppc64.AMOVD 73 } 74 } 75 panic("bad load type") 76 } 77 78 // storeByType returns the store instruction of the given type. 79 func storeByType(t *types.Type) obj.As { 80 if t.IsFloat() { 81 switch t.Size() { 82 case 4: 83 return ppc64.AFMOVS 84 case 8: 85 return ppc64.AFMOVD 86 } 87 } else { 88 switch t.Size() { 89 case 1: 90 return ppc64.AMOVB 91 case 2: 92 return ppc64.AMOVH 93 case 4: 94 return ppc64.AMOVW 95 case 8: 96 return ppc64.AMOVD 97 } 98 } 99 panic("bad store type") 100 } 101 102 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 103 switch v.Op { 104 case ssa.OpCopy: 105 t := v.Type 106 if t.IsMemory() { 107 return 108 } 109 x := v.Args[0].Reg() 110 y := v.Reg() 111 if x != y { 112 rt := obj.TYPE_REG 113 op := ppc64.AMOVD 114 115 if t.IsFloat() { 116 op = ppc64.AFMOVD 117 } 118 p := s.Prog(op) 119 p.From.Type = rt 120 p.From.Reg = x 121 p.To.Type = rt 122 p.To.Reg = y 123 } 124 125 case ssa.OpPPC64LoweredMuluhilo: 126 // MULHDU Rarg1, Rarg0, Reg0 127 // MULLD Rarg1, Rarg0, Reg1 128 r0 := v.Args[0].Reg() 129 r1 := v.Args[1].Reg() 130 p := s.Prog(ppc64.AMULHDU) 131 p.From.Type = obj.TYPE_REG 132 p.From.Reg = r1 133 p.Reg = r0 134 p.To.Type = obj.TYPE_REG 135 p.To.Reg = v.Reg0() 136 p1 := s.Prog(ppc64.AMULLD) 137 p1.From.Type = obj.TYPE_REG 138 p1.From.Reg = r1 139 p1.Reg = r0 140 p1.To.Type = obj.TYPE_REG 141 p1.To.Reg = v.Reg1() 142 143 case ssa.OpPPC64LoweredAdd64Carry: 144 // ADDC Rarg2, -1, Rtmp 145 // ADDE Rarg1, Rarg0, Reg0 146 // ADDZE Rzero, Reg1 147 r0 := v.Args[0].Reg() 148 r1 := v.Args[1].Reg() 149 r2 := v.Args[2].Reg() 150 p := s.Prog(ppc64.AADDC) 151 p.From.Type = obj.TYPE_CONST 152 p.From.Offset = -1 153 p.Reg = r2 154 p.To.Type = obj.TYPE_REG 155 p.To.Reg = ppc64.REGTMP 156 p1 := s.Prog(ppc64.AADDE) 157 p1.From.Type = obj.TYPE_REG 158 p1.From.Reg = r1 159 p1.Reg = r0 160 p1.To.Type = obj.TYPE_REG 161 p1.To.Reg = v.Reg0() 162 p2 := s.Prog(ppc64.AADDZE) 163 p2.From.Type = obj.TYPE_REG 164 p2.From.Reg = ppc64.REGZERO 165 p2.To.Type = obj.TYPE_REG 166 p2.To.Reg = v.Reg1() 167 168 case ssa.OpPPC64LoweredAtomicAnd8, 169 ssa.OpPPC64LoweredAtomicOr8: 170 // LWSYNC 171 // LBAR (Rarg0), Rtmp 172 // AND/OR Rarg1, Rtmp 173 // STBCCC Rtmp, (Rarg0) 174 // BNE -3(PC) 175 r0 := v.Args[0].Reg() 176 r1 := v.Args[1].Reg() 177 // LWSYNC - Assuming shared data not write-through-required nor 178 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 179 plwsync := s.Prog(ppc64.ALWSYNC) 180 plwsync.To.Type = obj.TYPE_NONE 181 p := s.Prog(ppc64.ALBAR) 182 p.From.Type = obj.TYPE_MEM 183 p.From.Reg = r0 184 p.To.Type = obj.TYPE_REG 185 p.To.Reg = ppc64.REGTMP 186 p1 := s.Prog(v.Op.Asm()) 187 p1.From.Type = obj.TYPE_REG 188 p1.From.Reg = r1 189 p1.To.Type = obj.TYPE_REG 190 p1.To.Reg = ppc64.REGTMP 191 p2 := s.Prog(ppc64.ASTBCCC) 192 p2.From.Type = obj.TYPE_REG 193 p2.From.Reg = ppc64.REGTMP 194 p2.To.Type = obj.TYPE_MEM 195 p2.To.Reg = r0 196 p2.RegTo2 = ppc64.REGTMP 197 p3 := s.Prog(ppc64.ABNE) 198 p3.To.Type = obj.TYPE_BRANCH 199 gc.Patch(p3, p) 200 201 case ssa.OpPPC64LoweredAtomicAdd32, 202 ssa.OpPPC64LoweredAtomicAdd64: 203 // LWSYNC 204 // LDAR/LWAR (Rarg0), Rout 205 // ADD Rarg1, Rout 206 // STDCCC/STWCCC Rout, (Rarg0) 207 // BNE -3(PC) 208 // MOVW Rout,Rout (if Add32) 209 ld := ppc64.ALDAR 210 st := ppc64.ASTDCCC 211 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 212 ld = ppc64.ALWAR 213 st = ppc64.ASTWCCC 214 } 215 r0 := v.Args[0].Reg() 216 r1 := v.Args[1].Reg() 217 out := v.Reg0() 218 // LWSYNC - Assuming shared data not write-through-required nor 219 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 220 plwsync := s.Prog(ppc64.ALWSYNC) 221 plwsync.To.Type = obj.TYPE_NONE 222 // LDAR or LWAR 223 p := s.Prog(ld) 224 p.From.Type = obj.TYPE_MEM 225 p.From.Reg = r0 226 p.To.Type = obj.TYPE_REG 227 p.To.Reg = out 228 // ADD reg1,out 229 p1 := s.Prog(ppc64.AADD) 230 p1.From.Type = obj.TYPE_REG 231 p1.From.Reg = r1 232 p1.To.Reg = out 233 p1.To.Type = obj.TYPE_REG 234 // STDCCC or STWCCC 235 p3 := s.Prog(st) 236 p3.From.Type = obj.TYPE_REG 237 p3.From.Reg = out 238 p3.To.Type = obj.TYPE_MEM 239 p3.To.Reg = r0 240 // BNE retry 241 p4 := s.Prog(ppc64.ABNE) 242 p4.To.Type = obj.TYPE_BRANCH 243 gc.Patch(p4, p) 244 245 // Ensure a 32 bit result 246 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 247 p5 := s.Prog(ppc64.AMOVWZ) 248 p5.To.Type = obj.TYPE_REG 249 p5.To.Reg = out 250 p5.From.Type = obj.TYPE_REG 251 p5.From.Reg = out 252 } 253 254 case ssa.OpPPC64LoweredAtomicExchange32, 255 ssa.OpPPC64LoweredAtomicExchange64: 256 // LWSYNC 257 // LDAR/LWAR (Rarg0), Rout 258 // STDCCC/STWCCC Rout, (Rarg0) 259 // BNE -2(PC) 260 // ISYNC 261 ld := ppc64.ALDAR 262 st := ppc64.ASTDCCC 263 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 { 264 ld = ppc64.ALWAR 265 st = ppc64.ASTWCCC 266 } 267 r0 := v.Args[0].Reg() 268 r1 := v.Args[1].Reg() 269 out := v.Reg0() 270 // LWSYNC - Assuming shared data not write-through-required nor 271 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 272 plwsync := s.Prog(ppc64.ALWSYNC) 273 plwsync.To.Type = obj.TYPE_NONE 274 // LDAR or LWAR 275 p := s.Prog(ld) 276 p.From.Type = obj.TYPE_MEM 277 p.From.Reg = r0 278 p.To.Type = obj.TYPE_REG 279 p.To.Reg = out 280 // STDCCC or STWCCC 281 p1 := s.Prog(st) 282 p1.From.Type = obj.TYPE_REG 283 p1.From.Reg = r1 284 p1.To.Type = obj.TYPE_MEM 285 p1.To.Reg = r0 286 // BNE retry 287 p2 := s.Prog(ppc64.ABNE) 288 p2.To.Type = obj.TYPE_BRANCH 289 gc.Patch(p2, p) 290 // ISYNC 291 pisync := s.Prog(ppc64.AISYNC) 292 pisync.To.Type = obj.TYPE_NONE 293 294 case ssa.OpPPC64LoweredAtomicLoad8, 295 ssa.OpPPC64LoweredAtomicLoad32, 296 ssa.OpPPC64LoweredAtomicLoad64, 297 ssa.OpPPC64LoweredAtomicLoadPtr: 298 // SYNC 299 // MOVB/MOVD/MOVW (Rarg0), Rout 300 // CMP Rout,Rout 301 // BNE 1(PC) 302 // ISYNC 303 ld := ppc64.AMOVD 304 cmp := ppc64.ACMP 305 switch v.Op { 306 case ssa.OpPPC64LoweredAtomicLoad8: 307 ld = ppc64.AMOVBZ 308 case ssa.OpPPC64LoweredAtomicLoad32: 309 ld = ppc64.AMOVWZ 310 cmp = ppc64.ACMPW 311 } 312 arg0 := v.Args[0].Reg() 313 out := v.Reg0() 314 // SYNC when AuxInt == 1; otherwise, load-acquire 315 if v.AuxInt == 1 { 316 psync := s.Prog(ppc64.ASYNC) 317 psync.To.Type = obj.TYPE_NONE 318 } 319 // Load 320 p := s.Prog(ld) 321 p.From.Type = obj.TYPE_MEM 322 p.From.Reg = arg0 323 p.To.Type = obj.TYPE_REG 324 p.To.Reg = out 325 // CMP 326 p1 := s.Prog(cmp) 327 p1.From.Type = obj.TYPE_REG 328 p1.From.Reg = out 329 p1.To.Type = obj.TYPE_REG 330 p1.To.Reg = out 331 // BNE 332 p2 := s.Prog(ppc64.ABNE) 333 p2.To.Type = obj.TYPE_BRANCH 334 // ISYNC 335 pisync := s.Prog(ppc64.AISYNC) 336 pisync.To.Type = obj.TYPE_NONE 337 gc.Patch(p2, pisync) 338 339 case ssa.OpPPC64LoweredAtomicStore8, 340 ssa.OpPPC64LoweredAtomicStore32, 341 ssa.OpPPC64LoweredAtomicStore64: 342 // SYNC or LWSYNC 343 // MOVB/MOVW/MOVD arg1,(arg0) 344 st := ppc64.AMOVD 345 switch v.Op { 346 case ssa.OpPPC64LoweredAtomicStore8: 347 st = ppc64.AMOVB 348 case ssa.OpPPC64LoweredAtomicStore32: 349 st = ppc64.AMOVW 350 } 351 arg0 := v.Args[0].Reg() 352 arg1 := v.Args[1].Reg() 353 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC 354 // SYNC 355 syncOp := ppc64.ASYNC 356 if v.AuxInt == 0 { 357 syncOp = ppc64.ALWSYNC 358 } 359 psync := s.Prog(syncOp) 360 psync.To.Type = obj.TYPE_NONE 361 // Store 362 p := s.Prog(st) 363 p.To.Type = obj.TYPE_MEM 364 p.To.Reg = arg0 365 p.From.Type = obj.TYPE_REG 366 p.From.Reg = arg1 367 368 case ssa.OpPPC64LoweredAtomicCas64, 369 ssa.OpPPC64LoweredAtomicCas32: 370 // LWSYNC 371 // loop: 372 // LDAR (Rarg0), MutexHint, Rtmp 373 // CMP Rarg1, Rtmp 374 // BNE fail 375 // STDCCC Rarg2, (Rarg0) 376 // BNE loop 377 // LWSYNC // Only for sequential consistency; not required in CasRel. 378 // MOVD $1, Rout 379 // BR end 380 // fail: 381 // MOVD $0, Rout 382 // end: 383 ld := ppc64.ALDAR 384 st := ppc64.ASTDCCC 385 cmp := ppc64.ACMP 386 if v.Op == ssa.OpPPC64LoweredAtomicCas32 { 387 ld = ppc64.ALWAR 388 st = ppc64.ASTWCCC 389 cmp = ppc64.ACMPW 390 } 391 r0 := v.Args[0].Reg() 392 r1 := v.Args[1].Reg() 393 r2 := v.Args[2].Reg() 394 out := v.Reg0() 395 // LWSYNC - Assuming shared data not write-through-required nor 396 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 397 plwsync1 := s.Prog(ppc64.ALWSYNC) 398 plwsync1.To.Type = obj.TYPE_NONE 399 // LDAR or LWAR 400 p := s.Prog(ld) 401 p.From.Type = obj.TYPE_MEM 402 p.From.Reg = r0 403 p.To.Type = obj.TYPE_REG 404 p.To.Reg = ppc64.REGTMP 405 // If it is a Compare-and-Swap-Release operation, set the EH field with 406 // the release hint. 407 if v.AuxInt == 0 { 408 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0}) 409 } 410 // CMP reg1,reg2 411 p1 := s.Prog(cmp) 412 p1.From.Type = obj.TYPE_REG 413 p1.From.Reg = r1 414 p1.To.Reg = ppc64.REGTMP 415 p1.To.Type = obj.TYPE_REG 416 // BNE cas_fail 417 p2 := s.Prog(ppc64.ABNE) 418 p2.To.Type = obj.TYPE_BRANCH 419 // STDCCC or STWCCC 420 p3 := s.Prog(st) 421 p3.From.Type = obj.TYPE_REG 422 p3.From.Reg = r2 423 p3.To.Type = obj.TYPE_MEM 424 p3.To.Reg = r0 425 // BNE retry 426 p4 := s.Prog(ppc64.ABNE) 427 p4.To.Type = obj.TYPE_BRANCH 428 gc.Patch(p4, p) 429 // LWSYNC - Assuming shared data not write-through-required nor 430 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b. 431 // If the operation is a CAS-Release, then synchronization is not necessary. 432 if v.AuxInt != 0 { 433 plwsync2 := s.Prog(ppc64.ALWSYNC) 434 plwsync2.To.Type = obj.TYPE_NONE 435 } 436 // return true 437 p5 := s.Prog(ppc64.AMOVD) 438 p5.From.Type = obj.TYPE_CONST 439 p5.From.Offset = 1 440 p5.To.Type = obj.TYPE_REG 441 p5.To.Reg = out 442 // BR done 443 p6 := s.Prog(obj.AJMP) 444 p6.To.Type = obj.TYPE_BRANCH 445 // return false 446 p7 := s.Prog(ppc64.AMOVD) 447 p7.From.Type = obj.TYPE_CONST 448 p7.From.Offset = 0 449 p7.To.Type = obj.TYPE_REG 450 p7.To.Reg = out 451 gc.Patch(p2, p7) 452 // done (label) 453 p8 := s.Prog(obj.ANOP) 454 gc.Patch(p6, p8) 455 456 case ssa.OpPPC64LoweredGetClosurePtr: 457 // Closure pointer is R11 (already) 458 gc.CheckLoweredGetClosurePtr(v) 459 460 case ssa.OpPPC64LoweredGetCallerSP: 461 // caller's SP is FixedFrameSize below the address of the first arg 462 p := s.Prog(ppc64.AMOVD) 463 p.From.Type = obj.TYPE_ADDR 464 p.From.Offset = -gc.Ctxt.FixedFrameSize() 465 p.From.Name = obj.NAME_PARAM 466 p.To.Type = obj.TYPE_REG 467 p.To.Reg = v.Reg() 468 469 case ssa.OpPPC64LoweredGetCallerPC: 470 p := s.Prog(obj.AGETCALLERPC) 471 p.To.Type = obj.TYPE_REG 472 p.To.Reg = v.Reg() 473 474 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F: 475 // input is already rounded 476 477 case ssa.OpLoadReg: 478 loadOp := loadByType(v.Type) 479 p := s.Prog(loadOp) 480 gc.AddrAuto(&p.From, v.Args[0]) 481 p.To.Type = obj.TYPE_REG 482 p.To.Reg = v.Reg() 483 484 case ssa.OpStoreReg: 485 storeOp := storeByType(v.Type) 486 p := s.Prog(storeOp) 487 p.From.Type = obj.TYPE_REG 488 p.From.Reg = v.Args[0].Reg() 489 gc.AddrAuto(&p.To, v) 490 491 case ssa.OpPPC64DIVD: 492 // For now, 493 // 494 // cmp arg1, -1 495 // be ahead 496 // v = arg0 / arg1 497 // b over 498 // ahead: v = - arg0 499 // over: nop 500 r := v.Reg() 501 r0 := v.Args[0].Reg() 502 r1 := v.Args[1].Reg() 503 504 p := s.Prog(ppc64.ACMP) 505 p.From.Type = obj.TYPE_REG 506 p.From.Reg = r1 507 p.To.Type = obj.TYPE_CONST 508 p.To.Offset = -1 509 510 pbahead := s.Prog(ppc64.ABEQ) 511 pbahead.To.Type = obj.TYPE_BRANCH 512 513 p = s.Prog(v.Op.Asm()) 514 p.From.Type = obj.TYPE_REG 515 p.From.Reg = r1 516 p.Reg = r0 517 p.To.Type = obj.TYPE_REG 518 p.To.Reg = r 519 520 pbover := s.Prog(obj.AJMP) 521 pbover.To.Type = obj.TYPE_BRANCH 522 523 p = s.Prog(ppc64.ANEG) 524 p.To.Type = obj.TYPE_REG 525 p.To.Reg = r 526 p.From.Type = obj.TYPE_REG 527 p.From.Reg = r0 528 gc.Patch(pbahead, p) 529 530 p = s.Prog(obj.ANOP) 531 gc.Patch(pbover, p) 532 533 case ssa.OpPPC64DIVW: 534 // word-width version of above 535 r := v.Reg() 536 r0 := v.Args[0].Reg() 537 r1 := v.Args[1].Reg() 538 539 p := s.Prog(ppc64.ACMPW) 540 p.From.Type = obj.TYPE_REG 541 p.From.Reg = r1 542 p.To.Type = obj.TYPE_CONST 543 p.To.Offset = -1 544 545 pbahead := s.Prog(ppc64.ABEQ) 546 pbahead.To.Type = obj.TYPE_BRANCH 547 548 p = s.Prog(v.Op.Asm()) 549 p.From.Type = obj.TYPE_REG 550 p.From.Reg = r1 551 p.Reg = r0 552 p.To.Type = obj.TYPE_REG 553 p.To.Reg = r 554 555 pbover := s.Prog(obj.AJMP) 556 pbover.To.Type = obj.TYPE_BRANCH 557 558 p = s.Prog(ppc64.ANEG) 559 p.To.Type = obj.TYPE_REG 560 p.To.Reg = r 561 p.From.Type = obj.TYPE_REG 562 p.From.Reg = r0 563 gc.Patch(pbahead, p) 564 565 p = s.Prog(obj.ANOP) 566 gc.Patch(pbover, p) 567 568 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, 569 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, 570 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, 571 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW, 572 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, 573 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN, 574 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV: 575 r := v.Reg() 576 r1 := v.Args[0].Reg() 577 r2 := v.Args[1].Reg() 578 p := s.Prog(v.Op.Asm()) 579 p.From.Type = obj.TYPE_REG 580 p.From.Reg = r2 581 p.Reg = r1 582 p.To.Type = obj.TYPE_REG 583 p.To.Reg = r 584 585 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC: 586 r1 := v.Args[0].Reg() 587 r2 := v.Args[1].Reg() 588 p := s.Prog(v.Op.Asm()) 589 p.From.Type = obj.TYPE_REG 590 p.From.Reg = r2 591 p.Reg = r1 592 p.To.Type = obj.TYPE_REG 593 p.To.Reg = ppc64.REGTMP // result is not needed 594 595 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst: 596 p := s.Prog(v.Op.Asm()) 597 p.From.Type = obj.TYPE_CONST 598 p.From.Offset = v.AuxInt 599 p.Reg = v.Args[0].Reg() 600 p.To.Type = obj.TYPE_REG 601 p.To.Reg = v.Reg() 602 603 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS: 604 r := v.Reg() 605 r1 := v.Args[0].Reg() 606 r2 := v.Args[1].Reg() 607 r3 := v.Args[2].Reg() 608 // r = r1*r2 ± r3 609 p := s.Prog(v.Op.Asm()) 610 p.From.Type = obj.TYPE_REG 611 p.From.Reg = r1 612 p.Reg = r3 613 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2}) 614 p.To.Type = obj.TYPE_REG 615 p.To.Reg = r 616 617 case ssa.OpPPC64MaskIfNotCarry: 618 r := v.Reg() 619 p := s.Prog(v.Op.Asm()) 620 p.From.Type = obj.TYPE_REG 621 p.From.Reg = ppc64.REGZERO 622 p.To.Type = obj.TYPE_REG 623 p.To.Reg = r 624 625 case ssa.OpPPC64ADDconstForCarry: 626 r1 := v.Args[0].Reg() 627 p := s.Prog(v.Op.Asm()) 628 p.Reg = r1 629 p.From.Type = obj.TYPE_CONST 630 p.From.Offset = v.AuxInt 631 p.To.Type = obj.TYPE_REG 632 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect. 633 634 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, 635 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, 636 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, 637 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD: 638 r := v.Reg() 639 p := s.Prog(v.Op.Asm()) 640 p.To.Type = obj.TYPE_REG 641 p.To.Reg = r 642 p.From.Type = obj.TYPE_REG 643 p.From.Reg = v.Args[0].Reg() 644 645 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, 646 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: 647 p := s.Prog(v.Op.Asm()) 648 p.Reg = v.Args[0].Reg() 649 p.From.Type = obj.TYPE_CONST 650 p.From.Offset = v.AuxInt 651 p.To.Type = obj.TYPE_REG 652 p.To.Reg = v.Reg() 653 654 case ssa.OpPPC64ANDCCconst: 655 p := s.Prog(v.Op.Asm()) 656 p.Reg = v.Args[0].Reg() 657 658 if v.Aux != nil { 659 p.From.Type = obj.TYPE_CONST 660 p.From.Offset = gc.AuxOffset(v) 661 } else { 662 p.From.Type = obj.TYPE_CONST 663 p.From.Offset = v.AuxInt 664 } 665 666 p.To.Type = obj.TYPE_REG 667 p.To.Reg = ppc64.REGTMP // discard result 668 669 case ssa.OpPPC64MOVDaddr: 670 switch v.Aux.(type) { 671 default: 672 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux) 673 case nil: 674 // If aux offset and aux int are both 0, and the same 675 // input and output regs are used, no instruction 676 // needs to be generated, since it would just be 677 // addi rx, rx, 0. 678 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() { 679 p := s.Prog(ppc64.AMOVD) 680 p.From.Type = obj.TYPE_ADDR 681 p.From.Reg = v.Args[0].Reg() 682 p.From.Offset = v.AuxInt 683 p.To.Type = obj.TYPE_REG 684 p.To.Reg = v.Reg() 685 } 686 687 case *obj.LSym, *gc.Node: 688 p := s.Prog(ppc64.AMOVD) 689 p.From.Type = obj.TYPE_ADDR 690 p.From.Reg = v.Args[0].Reg() 691 p.To.Type = obj.TYPE_REG 692 p.To.Reg = v.Reg() 693 gc.AddAux(&p.From, v) 694 695 } 696 697 case ssa.OpPPC64MOVDconst: 698 p := s.Prog(v.Op.Asm()) 699 p.From.Type = obj.TYPE_CONST 700 p.From.Offset = v.AuxInt 701 p.To.Type = obj.TYPE_REG 702 p.To.Reg = v.Reg() 703 704 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: 705 p := s.Prog(v.Op.Asm()) 706 p.From.Type = obj.TYPE_FCONST 707 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 708 p.To.Type = obj.TYPE_REG 709 p.To.Reg = v.Reg() 710 711 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: 712 p := s.Prog(v.Op.Asm()) 713 p.From.Type = obj.TYPE_REG 714 p.From.Reg = v.Args[0].Reg() 715 p.To.Type = obj.TYPE_REG 716 p.To.Reg = v.Args[1].Reg() 717 718 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: 719 p := s.Prog(v.Op.Asm()) 720 p.From.Type = obj.TYPE_REG 721 p.From.Reg = v.Args[0].Reg() 722 p.To.Type = obj.TYPE_CONST 723 p.To.Offset = v.AuxInt 724 725 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: 726 // Shift in register to required size 727 p := s.Prog(v.Op.Asm()) 728 p.From.Type = obj.TYPE_REG 729 p.From.Reg = v.Args[0].Reg() 730 p.To.Reg = v.Reg() 731 p.To.Type = obj.TYPE_REG 732 733 case ssa.OpPPC64MOVDload: 734 735 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4. 736 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string, 737 // the offset is not known until link time. If the load of a go.string uses relocation for the 738 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur. 739 // To avoid this problem, the full address of the go.string is computed and loaded into the base register, 740 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with 741 // go.string types because other types will have proper alignment. 742 743 gostring := false 744 switch n := v.Aux.(type) { 745 case *obj.LSym: 746 gostring = strings.HasPrefix(n.Name, "go.string.") 747 } 748 if gostring { 749 // Generate full addr of the go.string const 750 // including AuxInt 751 p := s.Prog(ppc64.AMOVD) 752 p.From.Type = obj.TYPE_ADDR 753 p.From.Reg = v.Args[0].Reg() 754 gc.AddAux(&p.From, v) 755 p.To.Type = obj.TYPE_REG 756 p.To.Reg = v.Reg() 757 // Load go.string using 0 offset 758 p = s.Prog(v.Op.Asm()) 759 p.From.Type = obj.TYPE_MEM 760 p.From.Reg = v.Reg() 761 p.To.Type = obj.TYPE_REG 762 p.To.Reg = v.Reg() 763 break 764 } 765 // Not a go.string, generate a normal load 766 fallthrough 767 768 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: 769 p := s.Prog(v.Op.Asm()) 770 p.From.Type = obj.TYPE_MEM 771 p.From.Reg = v.Args[0].Reg() 772 gc.AddAux(&p.From, v) 773 p.To.Type = obj.TYPE_REG 774 p.To.Reg = v.Reg() 775 776 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload: 777 p := s.Prog(v.Op.Asm()) 778 p.From.Type = obj.TYPE_MEM 779 p.From.Reg = v.Args[0].Reg() 780 p.To.Type = obj.TYPE_REG 781 p.To.Reg = v.Reg() 782 783 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore: 784 p := s.Prog(v.Op.Asm()) 785 p.To.Type = obj.TYPE_MEM 786 p.To.Reg = v.Args[0].Reg() 787 p.From.Type = obj.TYPE_REG 788 p.From.Reg = v.Args[1].Reg() 789 790 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx, 791 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx, 792 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx: 793 p := s.Prog(v.Op.Asm()) 794 p.From.Type = obj.TYPE_MEM 795 p.From.Reg = v.Args[0].Reg() 796 p.From.Index = v.Args[1].Reg() 797 gc.AddAux(&p.From, v) 798 p.To.Type = obj.TYPE_REG 799 p.To.Reg = v.Reg() 800 801 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: 802 p := s.Prog(v.Op.Asm()) 803 p.From.Type = obj.TYPE_REG 804 p.From.Reg = ppc64.REGZERO 805 p.To.Type = obj.TYPE_MEM 806 p.To.Reg = v.Args[0].Reg() 807 gc.AddAux(&p.To, v) 808 809 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: 810 p := s.Prog(v.Op.Asm()) 811 p.From.Type = obj.TYPE_REG 812 p.From.Reg = v.Args[1].Reg() 813 p.To.Type = obj.TYPE_MEM 814 p.To.Reg = v.Args[0].Reg() 815 gc.AddAux(&p.To, v) 816 817 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx, 818 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx, 819 ssa.OpPPC64MOVHBRstoreidx: 820 p := s.Prog(v.Op.Asm()) 821 p.From.Type = obj.TYPE_REG 822 p.From.Reg = v.Args[2].Reg() 823 p.To.Index = v.Args[1].Reg() 824 p.To.Type = obj.TYPE_MEM 825 p.To.Reg = v.Args[0].Reg() 826 gc.AddAux(&p.To, v) 827 828 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB: 829 // ISEL, ISELB 830 // AuxInt value indicates condition: 0=LT 1=GT 2=EQ 4=GE 5=LE 6=NE 831 // ISEL only accepts 0, 1, 2 condition values but the others can be 832 // achieved by swapping operand order. 833 // arg0 ? arg1 : arg2 with conditions LT, GT, EQ 834 // arg0 ? arg2 : arg1 for conditions GE, LE, NE 835 // ISELB is used when a boolean result is needed, returning 0 or 1 836 p := s.Prog(ppc64.AISEL) 837 p.To.Type = obj.TYPE_REG 838 p.To.Reg = v.Reg() 839 // For ISELB, boolean result 0 or 1. Use R0 for 0 operand to avoid load. 840 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0} 841 if v.Op == ssa.OpPPC64ISEL { 842 r.Reg = v.Args[1].Reg() 843 } 844 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2 845 if v.AuxInt > 3 { 846 p.Reg = r.Reg 847 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()}) 848 } else { 849 p.Reg = v.Args[0].Reg() 850 p.SetFrom3(r) 851 } 852 p.From.Type = obj.TYPE_CONST 853 p.From.Offset = v.AuxInt & 3 854 855 case ssa.OpPPC64LoweredZero: 856 857 // unaligned data doesn't hurt performance 858 // for these instructions on power8 or later 859 860 // for sizes >= 64 generate a loop as follows: 861 862 // set up loop counter in CTR, used by BC 863 // XXLXOR VS32,VS32,VS32 864 // MOVD len/32,REG_TMP 865 // MOVD REG_TMP,CTR 866 // MOVD $16,REG_TMP 867 // loop: 868 // STXVD2X VS32,(R0)(R3) 869 // STXVD2X VS32,(R31)(R3) 870 // ADD $32,R3 871 // BC 16, 0, loop 872 // 873 // any remainder is done as described below 874 875 // for sizes < 64 bytes, first clear as many doublewords as possible, 876 // then handle the remainder 877 // MOVD R0,(R3) 878 // MOVD R0,8(R3) 879 // .... etc. 880 // 881 // the remainder bytes are cleared using one or more 882 // of the following instructions with the appropriate 883 // offsets depending which instructions are needed 884 // 885 // MOVW R0,n1(R3) 4 bytes 886 // MOVH R0,n2(R3) 2 bytes 887 // MOVB R0,n3(R3) 1 byte 888 // 889 // 7 bytes: MOVW, MOVH, MOVB 890 // 6 bytes: MOVW, MOVH 891 // 5 bytes: MOVW, MOVB 892 // 3 bytes: MOVH, MOVB 893 894 // each loop iteration does 32 bytes 895 ctr := v.AuxInt / 32 896 897 // remainder bytes 898 rem := v.AuxInt % 32 899 900 // only generate a loop if there is more 901 // than 1 iteration. 902 if ctr > 1 { 903 // Set up VS32 (V0) to hold 0s 904 p := s.Prog(ppc64.AXXLXOR) 905 p.From.Type = obj.TYPE_REG 906 p.From.Reg = ppc64.REG_VS32 907 p.To.Type = obj.TYPE_REG 908 p.To.Reg = ppc64.REG_VS32 909 p.Reg = ppc64.REG_VS32 910 911 // Set up CTR loop counter 912 p = s.Prog(ppc64.AMOVD) 913 p.From.Type = obj.TYPE_CONST 914 p.From.Offset = ctr 915 p.To.Type = obj.TYPE_REG 916 p.To.Reg = ppc64.REGTMP 917 918 p = s.Prog(ppc64.AMOVD) 919 p.From.Type = obj.TYPE_REG 920 p.From.Reg = ppc64.REGTMP 921 p.To.Type = obj.TYPE_REG 922 p.To.Reg = ppc64.REG_CTR 923 924 // Set up R31 to hold index value 16 925 p = s.Prog(ppc64.AMOVD) 926 p.From.Type = obj.TYPE_CONST 927 p.From.Offset = 16 928 p.To.Type = obj.TYPE_REG 929 p.To.Reg = ppc64.REGTMP 930 931 // generate 2 STXVD2Xs to store 16 bytes 932 // when this is a loop then the top must be saved 933 var top *obj.Prog 934 // This is the top of loop 935 p = s.Prog(ppc64.ASTXVD2X) 936 p.From.Type = obj.TYPE_REG 937 p.From.Reg = ppc64.REG_VS32 938 p.To.Type = obj.TYPE_MEM 939 p.To.Reg = v.Args[0].Reg() 940 p.To.Index = ppc64.REGZERO 941 // Save the top of loop 942 if top == nil { 943 top = p 944 } 945 946 p = s.Prog(ppc64.ASTXVD2X) 947 p.From.Type = obj.TYPE_REG 948 p.From.Reg = ppc64.REG_VS32 949 p.To.Type = obj.TYPE_MEM 950 p.To.Reg = v.Args[0].Reg() 951 p.To.Index = ppc64.REGTMP 952 953 // Increment address for the 954 // 4 doublewords just zeroed. 955 p = s.Prog(ppc64.AADD) 956 p.Reg = v.Args[0].Reg() 957 p.From.Type = obj.TYPE_CONST 958 p.From.Offset = 32 959 p.To.Type = obj.TYPE_REG 960 p.To.Reg = v.Args[0].Reg() 961 962 // Branch back to top of loop 963 // based on CTR 964 // BC with BO_BCTR generates bdnz 965 p = s.Prog(ppc64.ABC) 966 p.From.Type = obj.TYPE_CONST 967 p.From.Offset = ppc64.BO_BCTR 968 p.Reg = ppc64.REG_R0 969 p.To.Type = obj.TYPE_BRANCH 970 gc.Patch(p, top) 971 } 972 973 // when ctr == 1 the loop was not generated but 974 // there are at least 32 bytes to clear, so add 975 // that to the remainder to generate the code 976 // to clear those doublewords 977 if ctr == 1 { 978 rem += 32 979 } 980 981 // clear the remainder starting at offset zero 982 offset := int64(0) 983 984 // first clear as many doublewords as possible 985 // then clear remaining sizes as available 986 for rem > 0 { 987 op, size := ppc64.AMOVB, int64(1) 988 switch { 989 case rem >= 8: 990 op, size = ppc64.AMOVD, 8 991 case rem >= 4: 992 op, size = ppc64.AMOVW, 4 993 case rem >= 2: 994 op, size = ppc64.AMOVH, 2 995 } 996 p := s.Prog(op) 997 p.From.Type = obj.TYPE_REG 998 p.From.Reg = ppc64.REG_R0 999 p.To.Type = obj.TYPE_MEM 1000 p.To.Reg = v.Args[0].Reg() 1001 p.To.Offset = offset 1002 rem -= size 1003 offset += size 1004 } 1005 1006 case ssa.OpPPC64LoweredMove: 1007 1008 // This will be used when moving more 1009 // than 8 bytes. Moves start with 1010 // as many 8 byte moves as possible, then 1011 // 4, 2, or 1 byte(s) as remaining. This will 1012 // work and be efficient for power8 or later. 1013 // If there are 64 or more bytes, then a 1014 // loop is generated to move 32 bytes and 1015 // update the src and dst addresses on each 1016 // iteration. When < 64 bytes, the appropriate 1017 // number of moves are generated based on the 1018 // size. 1019 // When moving >= 64 bytes a loop is used 1020 // MOVD len/32,REG_TMP 1021 // MOVD REG_TMP,CTR 1022 // MOVD $16,REG_TMP 1023 // top: 1024 // LXVD2X (R0)(R4),VS32 1025 // LXVD2X (R31)(R4),VS33 1026 // ADD $32,R4 1027 // STXVD2X VS32,(R0)(R3) 1028 // STXVD2X VS33,(R31)(R4) 1029 // ADD $32,R3 1030 // BC 16,0,top 1031 // Bytes not moved by this loop are moved 1032 // with a combination of the following instructions, 1033 // starting with the largest sizes and generating as 1034 // many as needed, using the appropriate offset value. 1035 // MOVD n(R4),R14 1036 // MOVD R14,n(R3) 1037 // MOVW n1(R4),R14 1038 // MOVW R14,n1(R3) 1039 // MOVH n2(R4),R14 1040 // MOVH R14,n2(R3) 1041 // MOVB n3(R4),R14 1042 // MOVB R14,n3(R3) 1043 1044 // Each loop iteration moves 32 bytes 1045 ctr := v.AuxInt / 32 1046 1047 // Remainder after the loop 1048 rem := v.AuxInt % 32 1049 1050 dst_reg := v.Args[0].Reg() 1051 src_reg := v.Args[1].Reg() 1052 1053 // The set of registers used here, must match the clobbered reg list 1054 // in PPC64Ops.go. 1055 offset := int64(0) 1056 1057 // top of the loop 1058 var top *obj.Prog 1059 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1060 if ctr > 1 { 1061 // Set up the CTR 1062 p := s.Prog(ppc64.AMOVD) 1063 p.From.Type = obj.TYPE_CONST 1064 p.From.Offset = ctr 1065 p.To.Type = obj.TYPE_REG 1066 p.To.Reg = ppc64.REGTMP 1067 1068 p = s.Prog(ppc64.AMOVD) 1069 p.From.Type = obj.TYPE_REG 1070 p.From.Reg = ppc64.REGTMP 1071 p.To.Type = obj.TYPE_REG 1072 p.To.Reg = ppc64.REG_CTR 1073 1074 // Use REGTMP as index reg 1075 p = s.Prog(ppc64.AMOVD) 1076 p.From.Type = obj.TYPE_CONST 1077 p.From.Offset = 16 1078 p.To.Type = obj.TYPE_REG 1079 p.To.Reg = ppc64.REGTMP 1080 1081 // Generate 16 byte loads and stores. 1082 // Use temp register for index (16) 1083 // on the second one. 1084 p = s.Prog(ppc64.ALXVD2X) 1085 p.From.Type = obj.TYPE_MEM 1086 p.From.Reg = src_reg 1087 p.From.Index = ppc64.REGZERO 1088 p.To.Type = obj.TYPE_REG 1089 p.To.Reg = ppc64.REG_VS32 1090 1091 if top == nil { 1092 top = p 1093 } 1094 1095 p = s.Prog(ppc64.ALXVD2X) 1096 p.From.Type = obj.TYPE_MEM 1097 p.From.Reg = src_reg 1098 p.From.Index = ppc64.REGTMP 1099 p.To.Type = obj.TYPE_REG 1100 p.To.Reg = ppc64.REG_VS33 1101 1102 // increment the src reg for next iteration 1103 p = s.Prog(ppc64.AADD) 1104 p.Reg = src_reg 1105 p.From.Type = obj.TYPE_CONST 1106 p.From.Offset = 32 1107 p.To.Type = obj.TYPE_REG 1108 p.To.Reg = src_reg 1109 1110 // generate 16 byte stores 1111 p = s.Prog(ppc64.ASTXVD2X) 1112 p.From.Type = obj.TYPE_REG 1113 p.From.Reg = ppc64.REG_VS32 1114 p.To.Type = obj.TYPE_MEM 1115 p.To.Reg = dst_reg 1116 p.To.Index = ppc64.REGZERO 1117 1118 p = s.Prog(ppc64.ASTXVD2X) 1119 p.From.Type = obj.TYPE_REG 1120 p.From.Reg = ppc64.REG_VS33 1121 p.To.Type = obj.TYPE_MEM 1122 p.To.Reg = dst_reg 1123 p.To.Index = ppc64.REGTMP 1124 1125 // increment the dst reg for next iteration 1126 p = s.Prog(ppc64.AADD) 1127 p.Reg = dst_reg 1128 p.From.Type = obj.TYPE_CONST 1129 p.From.Offset = 32 1130 p.To.Type = obj.TYPE_REG 1131 p.To.Reg = dst_reg 1132 1133 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1134 // to loop top. 1135 p = s.Prog(ppc64.ABC) 1136 p.From.Type = obj.TYPE_CONST 1137 p.From.Offset = ppc64.BO_BCTR 1138 p.Reg = ppc64.REG_R0 1139 p.To.Type = obj.TYPE_BRANCH 1140 gc.Patch(p, top) 1141 1142 // src_reg and dst_reg were incremented in the loop, so 1143 // later instructions start with offset 0. 1144 offset = int64(0) 1145 } 1146 1147 // No loop was generated for one iteration, so 1148 // add 32 bytes to the remainder to move those bytes. 1149 if ctr == 1 { 1150 rem += 32 1151 } 1152 1153 if rem >= 16 { 1154 // Generate 16 byte loads and stores. 1155 // Use temp register for index (value 16) 1156 // on the second one. 1157 p := s.Prog(ppc64.ALXVD2X) 1158 p.From.Type = obj.TYPE_MEM 1159 p.From.Reg = src_reg 1160 p.From.Index = ppc64.REGZERO 1161 p.To.Type = obj.TYPE_REG 1162 p.To.Reg = ppc64.REG_VS32 1163 1164 p = s.Prog(ppc64.ASTXVD2X) 1165 p.From.Type = obj.TYPE_REG 1166 p.From.Reg = ppc64.REG_VS32 1167 p.To.Type = obj.TYPE_MEM 1168 p.To.Reg = dst_reg 1169 p.To.Index = ppc64.REGZERO 1170 1171 offset = 16 1172 rem -= 16 1173 1174 if rem >= 16 { 1175 // Use REGTMP as index reg 1176 p = s.Prog(ppc64.AMOVD) 1177 p.From.Type = obj.TYPE_CONST 1178 p.From.Offset = 16 1179 p.To.Type = obj.TYPE_REG 1180 p.To.Reg = ppc64.REGTMP 1181 1182 // Generate 16 byte loads and stores. 1183 // Use temp register for index (16) 1184 // on the second one. 1185 p = s.Prog(ppc64.ALXVD2X) 1186 p.From.Type = obj.TYPE_MEM 1187 p.From.Reg = src_reg 1188 p.From.Index = ppc64.REGTMP 1189 p.To.Type = obj.TYPE_REG 1190 p.To.Reg = ppc64.REG_VS32 1191 1192 p = s.Prog(ppc64.ASTXVD2X) 1193 p.From.Type = obj.TYPE_REG 1194 p.From.Reg = ppc64.REG_VS32 1195 p.To.Type = obj.TYPE_MEM 1196 p.To.Reg = dst_reg 1197 p.To.Index = ppc64.REGTMP 1198 1199 offset = 32 1200 rem -= 16 1201 } 1202 } 1203 1204 // Generate all the remaining load and store pairs, starting with 1205 // as many 8 byte moves as possible, then 4, 2, 1. 1206 for rem > 0 { 1207 op, size := ppc64.AMOVB, int64(1) 1208 switch { 1209 case rem >= 8: 1210 op, size = ppc64.AMOVD, 8 1211 case rem >= 4: 1212 op, size = ppc64.AMOVW, 4 1213 case rem >= 2: 1214 op, size = ppc64.AMOVH, 2 1215 } 1216 // Load 1217 p := s.Prog(op) 1218 p.To.Type = obj.TYPE_REG 1219 p.To.Reg = ppc64.REG_R14 1220 p.From.Type = obj.TYPE_MEM 1221 p.From.Reg = src_reg 1222 p.From.Offset = offset 1223 1224 // Store 1225 p = s.Prog(op) 1226 p.From.Type = obj.TYPE_REG 1227 p.From.Reg = ppc64.REG_R14 1228 p.To.Type = obj.TYPE_MEM 1229 p.To.Reg = dst_reg 1230 p.To.Offset = offset 1231 rem -= size 1232 offset += size 1233 } 1234 1235 case ssa.OpPPC64CALLstatic: 1236 s.Call(v) 1237 1238 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: 1239 p := s.Prog(ppc64.AMOVD) 1240 p.From.Type = obj.TYPE_REG 1241 p.From.Reg = v.Args[0].Reg() 1242 p.To.Type = obj.TYPE_REG 1243 p.To.Reg = ppc64.REG_LR 1244 1245 if v.Args[0].Reg() != ppc64.REG_R12 { 1246 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg) 1247 } 1248 1249 pp := s.Call(v) 1250 pp.To.Reg = ppc64.REG_LR 1251 1252 if gc.Ctxt.Flag_shared { 1253 // When compiling Go into PIC, the function we just 1254 // called via pointer might have been implemented in 1255 // a separate module and so overwritten the TOC 1256 // pointer in R2; reload it. 1257 q := s.Prog(ppc64.AMOVD) 1258 q.From.Type = obj.TYPE_MEM 1259 q.From.Offset = 24 1260 q.From.Reg = ppc64.REGSP 1261 q.To.Type = obj.TYPE_REG 1262 q.To.Reg = ppc64.REG_R2 1263 } 1264 1265 case ssa.OpPPC64LoweredWB: 1266 p := s.Prog(obj.ACALL) 1267 p.To.Type = obj.TYPE_MEM 1268 p.To.Name = obj.NAME_EXTERN 1269 p.To.Sym = v.Aux.(*obj.LSym) 1270 1271 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC: 1272 p := s.Prog(obj.ACALL) 1273 p.To.Type = obj.TYPE_MEM 1274 p.To.Name = obj.NAME_EXTERN 1275 p.To.Sym = gc.BoundsCheckFunc[v.AuxInt] 1276 s.UseArgs(16) // space used in callee args area by assembly stubs 1277 1278 case ssa.OpPPC64LoweredNilCheck: 1279 if objabi.GOOS == "aix" { 1280 // CMP Rarg0, R0 1281 // BNE 2(PC) 1282 // STW R0, 0(R0) 1283 // NOP (so the BNE has somewhere to land) 1284 1285 // CMP Rarg0, R0 1286 p := s.Prog(ppc64.ACMP) 1287 p.From.Type = obj.TYPE_REG 1288 p.From.Reg = v.Args[0].Reg() 1289 p.To.Type = obj.TYPE_REG 1290 p.To.Reg = ppc64.REG_R0 1291 1292 // BNE 2(PC) 1293 p2 := s.Prog(ppc64.ABNE) 1294 p2.To.Type = obj.TYPE_BRANCH 1295 1296 // STW R0, 0(R0) 1297 // Write at 0 is forbidden and will trigger a SIGSEGV 1298 p = s.Prog(ppc64.AMOVW) 1299 p.From.Type = obj.TYPE_REG 1300 p.From.Reg = ppc64.REG_R0 1301 p.To.Type = obj.TYPE_MEM 1302 p.To.Reg = ppc64.REG_R0 1303 1304 // NOP (so the BNE has somewhere to land) 1305 nop := s.Prog(obj.ANOP) 1306 gc.Patch(p2, nop) 1307 1308 } else { 1309 // Issue a load which will fault if arg is nil. 1310 p := s.Prog(ppc64.AMOVBZ) 1311 p.From.Type = obj.TYPE_MEM 1312 p.From.Reg = v.Args[0].Reg() 1313 gc.AddAux(&p.From, v) 1314 p.To.Type = obj.TYPE_REG 1315 p.To.Reg = ppc64.REGTMP 1316 } 1317 if logopt.Enabled() { 1318 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 1319 } 1320 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1321 gc.Warnl(v.Pos, "generated nil check") 1322 } 1323 1324 // These should be resolved by rules and not make it here. 1325 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan, 1326 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual, 1327 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual: 1328 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString()) 1329 case ssa.OpPPC64InvertFlags: 1330 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1331 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: 1332 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1333 case ssa.OpClobber: 1334 // TODO: implement for clobberdead experiment. Nop is ok for now. 1335 default: 1336 v.Fatalf("genValue not implemented: %s", v.LongString()) 1337 } 1338 } 1339 1340 var blockJump = [...]struct { 1341 asm, invasm obj.As 1342 asmeq, invasmun bool 1343 }{ 1344 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false}, 1345 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false}, 1346 1347 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1348 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false}, 1349 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false}, 1350 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1351 1352 // TODO: need to work FP comparisons into block jumps 1353 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1354 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN 1355 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN 1356 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1357 } 1358 1359 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1360 switch b.Kind { 1361 case ssa.BlockDefer: 1362 // defer returns in R3: 1363 // 0 if we should continue executing 1364 // 1 if we should jump to deferreturn call 1365 p := s.Prog(ppc64.ACMP) 1366 p.From.Type = obj.TYPE_REG 1367 p.From.Reg = ppc64.REG_R3 1368 p.To.Type = obj.TYPE_REG 1369 p.To.Reg = ppc64.REG_R0 1370 1371 p = s.Prog(ppc64.ABNE) 1372 p.To.Type = obj.TYPE_BRANCH 1373 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1374 if b.Succs[0].Block() != next { 1375 p := s.Prog(obj.AJMP) 1376 p.To.Type = obj.TYPE_BRANCH 1377 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1378 } 1379 1380 case ssa.BlockPlain: 1381 if b.Succs[0].Block() != next { 1382 p := s.Prog(obj.AJMP) 1383 p.To.Type = obj.TYPE_BRANCH 1384 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1385 } 1386 case ssa.BlockExit: 1387 case ssa.BlockRet: 1388 s.Prog(obj.ARET) 1389 case ssa.BlockRetJmp: 1390 p := s.Prog(obj.AJMP) 1391 p.To.Type = obj.TYPE_MEM 1392 p.To.Name = obj.NAME_EXTERN 1393 p.To.Sym = b.Aux.(*obj.LSym) 1394 1395 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE, 1396 ssa.BlockPPC64LT, ssa.BlockPPC64GE, 1397 ssa.BlockPPC64LE, ssa.BlockPPC64GT, 1398 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE, 1399 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT: 1400 jmp := blockJump[b.Kind] 1401 switch next { 1402 case b.Succs[0].Block(): 1403 s.Br(jmp.invasm, b.Succs[1].Block()) 1404 if jmp.invasmun { 1405 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 1406 s.Br(ppc64.ABVS, b.Succs[1].Block()) 1407 } 1408 case b.Succs[1].Block(): 1409 s.Br(jmp.asm, b.Succs[0].Block()) 1410 if jmp.asmeq { 1411 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 1412 } 1413 default: 1414 if b.Likely != ssa.BranchUnlikely { 1415 s.Br(jmp.asm, b.Succs[0].Block()) 1416 if jmp.asmeq { 1417 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 1418 } 1419 s.Br(obj.AJMP, b.Succs[1].Block()) 1420 } else { 1421 s.Br(jmp.invasm, b.Succs[1].Block()) 1422 if jmp.invasmun { 1423 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 1424 s.Br(ppc64.ABVS, b.Succs[1].Block()) 1425 } 1426 s.Br(obj.AJMP, b.Succs[0].Block()) 1427 } 1428 } 1429 default: 1430 b.Fatalf("branch not implemented: %s", b.LongString()) 1431 } 1432 }