github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ppc64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ppc64 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 9 "github.com/bir3/gocompiler/src/cmd/compile/internal/ir" 10 "github.com/bir3/gocompiler/src/cmd/compile/internal/logopt" 11 "github.com/bir3/gocompiler/src/cmd/compile/internal/objw" 12 "github.com/bir3/gocompiler/src/cmd/compile/internal/ssa" 13 "github.com/bir3/gocompiler/src/cmd/compile/internal/ssagen" 14 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 15 "github.com/bir3/gocompiler/src/cmd/internal/obj" 16 "github.com/bir3/gocompiler/src/cmd/internal/obj/ppc64" 17 "github.com/bir3/gocompiler/src/internal/buildcfg" 18 "math" 19 "strings" 20 ) 21 22 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. 23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) { 24 // flive := b.FlagsLiveAtEnd 25 // if b.Control != nil && b.Control.Type.IsFlags() { 26 // flive = true 27 // } 28 // for i := len(b.Values) - 1; i >= 0; i-- { 29 // v := b.Values[i] 30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) { 31 // // The "mark" is any non-nil Aux value. 32 // v.Aux = v 33 // } 34 // if v.Type.IsFlags() { 35 // flive = false 36 // } 37 // for _, a := range v.Args { 38 // if a.Type.IsFlags() { 39 // flive = true 40 // } 41 // } 42 // } 43 } 44 45 // loadByType returns the load instruction of the given type. 46 func loadByType(t *types.Type) obj.As { 47 if t.IsFloat() { 48 switch t.Size() { 49 case 4: 50 return ppc64.AFMOVS 51 case 8: 52 return ppc64.AFMOVD 53 } 54 } else { 55 switch t.Size() { 56 case 1: 57 if t.IsSigned() { 58 return ppc64.AMOVB 59 } else { 60 return ppc64.AMOVBZ 61 } 62 case 2: 63 if t.IsSigned() { 64 return ppc64.AMOVH 65 } else { 66 return ppc64.AMOVHZ 67 } 68 case 4: 69 if t.IsSigned() { 70 return ppc64.AMOVW 71 } else { 72 return ppc64.AMOVWZ 73 } 74 case 8: 75 return ppc64.AMOVD 76 } 77 } 78 panic("bad load type") 79 } 80 81 // storeByType returns the store instruction of the given type. 82 func storeByType(t *types.Type) obj.As { 83 if t.IsFloat() { 84 switch t.Size() { 85 case 4: 86 return ppc64.AFMOVS 87 case 8: 88 return ppc64.AFMOVD 89 } 90 } else { 91 switch t.Size() { 92 case 1: 93 return ppc64.AMOVB 94 case 2: 95 return ppc64.AMOVH 96 case 4: 97 return ppc64.AMOVW 98 case 8: 99 return ppc64.AMOVD 100 } 101 } 102 panic("bad store type") 103 } 104 105 func ssaGenValue(s *ssagen.State, v *ssa.Value) { 106 switch v.Op { 107 case ssa.OpCopy: 108 t := v.Type 109 if t.IsMemory() { 110 return 111 } 112 x := v.Args[0].Reg() 113 y := v.Reg() 114 if x != y { 115 rt := obj.TYPE_REG 116 op := ppc64.AMOVD 117 118 if t.IsFloat() { 119 op = ppc64.AFMOVD 120 } 121 p := s.Prog(op) 122 p.From.Type = rt 123 p.From.Reg = x 124 p.To.Type = rt 125 p.To.Reg = y 126 } 127 128 case ssa.OpPPC64LoweredAtomicAnd8, 129 ssa.OpPPC64LoweredAtomicAnd32, 130 ssa.OpPPC64LoweredAtomicOr8, 131 ssa.OpPPC64LoweredAtomicOr32: 132 // LWSYNC 133 // LBAR/LWAR (Rarg0), Rtmp 134 // AND/OR Rarg1, Rtmp 135 // STBCCC/STWCCC Rtmp, (Rarg0) 136 // BNE -3(PC) 137 ld := ppc64.ALBAR 138 st := ppc64.ASTBCCC 139 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 { 140 ld = ppc64.ALWAR 141 st = ppc64.ASTWCCC 142 } 143 r0 := v.Args[0].Reg() 144 r1 := v.Args[1].Reg() 145 // LWSYNC - Assuming shared data not write-through-required nor 146 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 147 plwsync := s.Prog(ppc64.ALWSYNC) 148 plwsync.To.Type = obj.TYPE_NONE 149 // LBAR or LWAR 150 p := s.Prog(ld) 151 p.From.Type = obj.TYPE_MEM 152 p.From.Reg = r0 153 p.To.Type = obj.TYPE_REG 154 p.To.Reg = ppc64.REGTMP 155 // AND/OR reg1,out 156 p1 := s.Prog(v.Op.Asm()) 157 p1.From.Type = obj.TYPE_REG 158 p1.From.Reg = r1 159 p1.To.Type = obj.TYPE_REG 160 p1.To.Reg = ppc64.REGTMP 161 // STBCCC or STWCCC 162 p2 := s.Prog(st) 163 p2.From.Type = obj.TYPE_REG 164 p2.From.Reg = ppc64.REGTMP 165 p2.To.Type = obj.TYPE_MEM 166 p2.To.Reg = r0 167 p2.RegTo2 = ppc64.REGTMP 168 // BNE retry 169 p3 := s.Prog(ppc64.ABNE) 170 p3.To.Type = obj.TYPE_BRANCH 171 p3.To.SetTarget(p) 172 173 case ssa.OpPPC64LoweredAtomicAdd32, 174 ssa.OpPPC64LoweredAtomicAdd64: 175 // LWSYNC 176 // LDAR/LWAR (Rarg0), Rout 177 // ADD Rarg1, Rout 178 // STDCCC/STWCCC Rout, (Rarg0) 179 // BNE -3(PC) 180 // MOVW Rout,Rout (if Add32) 181 ld := ppc64.ALDAR 182 st := ppc64.ASTDCCC 183 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 184 ld = ppc64.ALWAR 185 st = ppc64.ASTWCCC 186 } 187 r0 := v.Args[0].Reg() 188 r1 := v.Args[1].Reg() 189 out := v.Reg0() 190 // LWSYNC - Assuming shared data not write-through-required nor 191 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 192 plwsync := s.Prog(ppc64.ALWSYNC) 193 plwsync.To.Type = obj.TYPE_NONE 194 // LDAR or LWAR 195 p := s.Prog(ld) 196 p.From.Type = obj.TYPE_MEM 197 p.From.Reg = r0 198 p.To.Type = obj.TYPE_REG 199 p.To.Reg = out 200 // ADD reg1,out 201 p1 := s.Prog(ppc64.AADD) 202 p1.From.Type = obj.TYPE_REG 203 p1.From.Reg = r1 204 p1.To.Reg = out 205 p1.To.Type = obj.TYPE_REG 206 // STDCCC or STWCCC 207 p3 := s.Prog(st) 208 p3.From.Type = obj.TYPE_REG 209 p3.From.Reg = out 210 p3.To.Type = obj.TYPE_MEM 211 p3.To.Reg = r0 212 // BNE retry 213 p4 := s.Prog(ppc64.ABNE) 214 p4.To.Type = obj.TYPE_BRANCH 215 p4.To.SetTarget(p) 216 217 // Ensure a 32 bit result 218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 219 p5 := s.Prog(ppc64.AMOVWZ) 220 p5.To.Type = obj.TYPE_REG 221 p5.To.Reg = out 222 p5.From.Type = obj.TYPE_REG 223 p5.From.Reg = out 224 } 225 226 case ssa.OpPPC64LoweredAtomicExchange32, 227 ssa.OpPPC64LoweredAtomicExchange64: 228 // LWSYNC 229 // LDAR/LWAR (Rarg0), Rout 230 // STDCCC/STWCCC Rout, (Rarg0) 231 // BNE -2(PC) 232 // ISYNC 233 ld := ppc64.ALDAR 234 st := ppc64.ASTDCCC 235 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 { 236 ld = ppc64.ALWAR 237 st = ppc64.ASTWCCC 238 } 239 r0 := v.Args[0].Reg() 240 r1 := v.Args[1].Reg() 241 out := v.Reg0() 242 // LWSYNC - Assuming shared data not write-through-required nor 243 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 244 plwsync := s.Prog(ppc64.ALWSYNC) 245 plwsync.To.Type = obj.TYPE_NONE 246 // LDAR or LWAR 247 p := s.Prog(ld) 248 p.From.Type = obj.TYPE_MEM 249 p.From.Reg = r0 250 p.To.Type = obj.TYPE_REG 251 p.To.Reg = out 252 // STDCCC or STWCCC 253 p1 := s.Prog(st) 254 p1.From.Type = obj.TYPE_REG 255 p1.From.Reg = r1 256 p1.To.Type = obj.TYPE_MEM 257 p1.To.Reg = r0 258 // BNE retry 259 p2 := s.Prog(ppc64.ABNE) 260 p2.To.Type = obj.TYPE_BRANCH 261 p2.To.SetTarget(p) 262 // ISYNC 263 pisync := s.Prog(ppc64.AISYNC) 264 pisync.To.Type = obj.TYPE_NONE 265 266 case ssa.OpPPC64LoweredAtomicLoad8, 267 ssa.OpPPC64LoweredAtomicLoad32, 268 ssa.OpPPC64LoweredAtomicLoad64, 269 ssa.OpPPC64LoweredAtomicLoadPtr: 270 // SYNC 271 // MOVB/MOVD/MOVW (Rarg0), Rout 272 // CMP Rout,Rout 273 // BNE 1(PC) 274 // ISYNC 275 ld := ppc64.AMOVD 276 cmp := ppc64.ACMP 277 switch v.Op { 278 case ssa.OpPPC64LoweredAtomicLoad8: 279 ld = ppc64.AMOVBZ 280 case ssa.OpPPC64LoweredAtomicLoad32: 281 ld = ppc64.AMOVWZ 282 cmp = ppc64.ACMPW 283 } 284 arg0 := v.Args[0].Reg() 285 out := v.Reg0() 286 // SYNC when AuxInt == 1; otherwise, load-acquire 287 if v.AuxInt == 1 { 288 psync := s.Prog(ppc64.ASYNC) 289 psync.To.Type = obj.TYPE_NONE 290 } 291 // Load 292 p := s.Prog(ld) 293 p.From.Type = obj.TYPE_MEM 294 p.From.Reg = arg0 295 p.To.Type = obj.TYPE_REG 296 p.To.Reg = out 297 // CMP 298 p1 := s.Prog(cmp) 299 p1.From.Type = obj.TYPE_REG 300 p1.From.Reg = out 301 p1.To.Type = obj.TYPE_REG 302 p1.To.Reg = out 303 // BNE 304 p2 := s.Prog(ppc64.ABNE) 305 p2.To.Type = obj.TYPE_BRANCH 306 // ISYNC 307 pisync := s.Prog(ppc64.AISYNC) 308 pisync.To.Type = obj.TYPE_NONE 309 p2.To.SetTarget(pisync) 310 311 case ssa.OpPPC64LoweredAtomicStore8, 312 ssa.OpPPC64LoweredAtomicStore32, 313 ssa.OpPPC64LoweredAtomicStore64: 314 // SYNC or LWSYNC 315 // MOVB/MOVW/MOVD arg1,(arg0) 316 st := ppc64.AMOVD 317 switch v.Op { 318 case ssa.OpPPC64LoweredAtomicStore8: 319 st = ppc64.AMOVB 320 case ssa.OpPPC64LoweredAtomicStore32: 321 st = ppc64.AMOVW 322 } 323 arg0 := v.Args[0].Reg() 324 arg1 := v.Args[1].Reg() 325 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC 326 // SYNC 327 syncOp := ppc64.ASYNC 328 if v.AuxInt == 0 { 329 syncOp = ppc64.ALWSYNC 330 } 331 psync := s.Prog(syncOp) 332 psync.To.Type = obj.TYPE_NONE 333 // Store 334 p := s.Prog(st) 335 p.To.Type = obj.TYPE_MEM 336 p.To.Reg = arg0 337 p.From.Type = obj.TYPE_REG 338 p.From.Reg = arg1 339 340 case ssa.OpPPC64LoweredAtomicCas64, 341 ssa.OpPPC64LoweredAtomicCas32: 342 // MOVD $0, Rout 343 // LWSYNC 344 // loop: 345 // LDAR (Rarg0), MutexHint, Rtmp 346 // CMP Rarg1, Rtmp 347 // BNE end 348 // STDCCC Rarg2, (Rarg0) 349 // BNE loop 350 // LWSYNC // Only for sequential consistency; not required in CasRel. 351 // MOVD $1, Rout 352 // end: 353 ld := ppc64.ALDAR 354 st := ppc64.ASTDCCC 355 cmp := ppc64.ACMP 356 if v.Op == ssa.OpPPC64LoweredAtomicCas32 { 357 ld = ppc64.ALWAR 358 st = ppc64.ASTWCCC 359 cmp = ppc64.ACMPW 360 } 361 r0 := v.Args[0].Reg() 362 r1 := v.Args[1].Reg() 363 r2 := v.Args[2].Reg() 364 out := v.Reg0() 365 // Initialize return value to false 366 p := s.Prog(ppc64.AMOVD) 367 p.From.Type = obj.TYPE_CONST 368 p.From.Offset = 0 369 p.To.Type = obj.TYPE_REG 370 p.To.Reg = out 371 // LWSYNC - Assuming shared data not write-through-required nor 372 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 373 plwsync1 := s.Prog(ppc64.ALWSYNC) 374 plwsync1.To.Type = obj.TYPE_NONE 375 // LDAR or LWAR 376 p0 := s.Prog(ld) 377 p0.From.Type = obj.TYPE_MEM 378 p0.From.Reg = r0 379 p0.To.Type = obj.TYPE_REG 380 p0.To.Reg = ppc64.REGTMP 381 // If it is a Compare-and-Swap-Release operation, set the EH field with 382 // the release hint. 383 if v.AuxInt == 0 { 384 p0.SetFrom3Const(0) 385 } 386 // CMP reg1,reg2 387 p1 := s.Prog(cmp) 388 p1.From.Type = obj.TYPE_REG 389 p1.From.Reg = r1 390 p1.To.Reg = ppc64.REGTMP 391 p1.To.Type = obj.TYPE_REG 392 // BNE done with return value = false 393 p2 := s.Prog(ppc64.ABNE) 394 p2.To.Type = obj.TYPE_BRANCH 395 // STDCCC or STWCCC 396 p3 := s.Prog(st) 397 p3.From.Type = obj.TYPE_REG 398 p3.From.Reg = r2 399 p3.To.Type = obj.TYPE_MEM 400 p3.To.Reg = r0 401 // BNE retry 402 p4 := s.Prog(ppc64.ABNE) 403 p4.To.Type = obj.TYPE_BRANCH 404 p4.To.SetTarget(p0) 405 // LWSYNC - Assuming shared data not write-through-required nor 406 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b. 407 // If the operation is a CAS-Release, then synchronization is not necessary. 408 if v.AuxInt != 0 { 409 plwsync2 := s.Prog(ppc64.ALWSYNC) 410 plwsync2.To.Type = obj.TYPE_NONE 411 } 412 // return value true 413 p5 := s.Prog(ppc64.AMOVD) 414 p5.From.Type = obj.TYPE_CONST 415 p5.From.Offset = 1 416 p5.To.Type = obj.TYPE_REG 417 p5.To.Reg = out 418 // done (label) 419 p6 := s.Prog(obj.ANOP) 420 p2.To.SetTarget(p6) 421 422 case ssa.OpPPC64LoweredPubBarrier: 423 // LWSYNC 424 s.Prog(v.Op.Asm()) 425 426 case ssa.OpPPC64LoweredGetClosurePtr: 427 // Closure pointer is R11 (already) 428 ssagen.CheckLoweredGetClosurePtr(v) 429 430 case ssa.OpPPC64LoweredGetCallerSP: 431 // caller's SP is FixedFrameSize below the address of the first arg 432 p := s.Prog(ppc64.AMOVD) 433 p.From.Type = obj.TYPE_ADDR 434 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize 435 p.From.Name = obj.NAME_PARAM 436 p.To.Type = obj.TYPE_REG 437 p.To.Reg = v.Reg() 438 439 case ssa.OpPPC64LoweredGetCallerPC: 440 p := s.Prog(obj.AGETCALLERPC) 441 p.To.Type = obj.TYPE_REG 442 p.To.Reg = v.Reg() 443 444 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F: 445 // input is already rounded 446 447 case ssa.OpLoadReg: 448 loadOp := loadByType(v.Type) 449 p := s.Prog(loadOp) 450 ssagen.AddrAuto(&p.From, v.Args[0]) 451 p.To.Type = obj.TYPE_REG 452 p.To.Reg = v.Reg() 453 454 case ssa.OpStoreReg: 455 storeOp := storeByType(v.Type) 456 p := s.Prog(storeOp) 457 p.From.Type = obj.TYPE_REG 458 p.From.Reg = v.Args[0].Reg() 459 ssagen.AddrAuto(&p.To, v) 460 461 case ssa.OpArgIntReg, ssa.OpArgFloatReg: 462 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill 463 // The loop only runs once. 464 for _, a := range v.Block.Func.RegArgs { 465 // Pass the spill/unspill information along to the assembler, offset by size of 466 // the saved LR slot. 467 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize) 468 s.FuncInfo().AddSpill( 469 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)}) 470 } 471 v.Block.Func.RegArgs = nil 472 473 ssagen.CheckArgReg(v) 474 475 case ssa.OpPPC64DIVD: 476 // For now, 477 // 478 // cmp arg1, -1 479 // be ahead 480 // v = arg0 / arg1 481 // b over 482 // ahead: v = - arg0 483 // over: nop 484 r := v.Reg() 485 r0 := v.Args[0].Reg() 486 r1 := v.Args[1].Reg() 487 488 p := s.Prog(ppc64.ACMP) 489 p.From.Type = obj.TYPE_REG 490 p.From.Reg = r1 491 p.To.Type = obj.TYPE_CONST 492 p.To.Offset = -1 493 494 pbahead := s.Prog(ppc64.ABEQ) 495 pbahead.To.Type = obj.TYPE_BRANCH 496 497 p = s.Prog(v.Op.Asm()) 498 p.From.Type = obj.TYPE_REG 499 p.From.Reg = r1 500 p.Reg = r0 501 p.To.Type = obj.TYPE_REG 502 p.To.Reg = r 503 504 pbover := s.Prog(obj.AJMP) 505 pbover.To.Type = obj.TYPE_BRANCH 506 507 p = s.Prog(ppc64.ANEG) 508 p.To.Type = obj.TYPE_REG 509 p.To.Reg = r 510 p.From.Type = obj.TYPE_REG 511 p.From.Reg = r0 512 pbahead.To.SetTarget(p) 513 514 p = s.Prog(obj.ANOP) 515 pbover.To.SetTarget(p) 516 517 case ssa.OpPPC64DIVW: 518 // word-width version of above 519 r := v.Reg() 520 r0 := v.Args[0].Reg() 521 r1 := v.Args[1].Reg() 522 523 p := s.Prog(ppc64.ACMPW) 524 p.From.Type = obj.TYPE_REG 525 p.From.Reg = r1 526 p.To.Type = obj.TYPE_CONST 527 p.To.Offset = -1 528 529 pbahead := s.Prog(ppc64.ABEQ) 530 pbahead.To.Type = obj.TYPE_BRANCH 531 532 p = s.Prog(v.Op.Asm()) 533 p.From.Type = obj.TYPE_REG 534 p.From.Reg = r1 535 p.Reg = r0 536 p.To.Type = obj.TYPE_REG 537 p.To.Reg = r 538 539 pbover := s.Prog(obj.AJMP) 540 pbover.To.Type = obj.TYPE_BRANCH 541 542 p = s.Prog(ppc64.ANEG) 543 p.To.Type = obj.TYPE_REG 544 p.To.Reg = r 545 p.From.Type = obj.TYPE_REG 546 p.From.Reg = r0 547 pbahead.To.SetTarget(p) 548 549 p = s.Prog(obj.ANOP) 550 pbover.To.SetTarget(p) 551 552 case ssa.OpPPC64CLRLSLWI: 553 r := v.Reg() 554 r1 := v.Args[0].Reg() 555 shifts := v.AuxInt 556 p := s.Prog(v.Op.Asm()) 557 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA 558 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} 559 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts)) 560 p.Reg = r1 561 p.To.Type = obj.TYPE_REG 562 p.To.Reg = r 563 564 case ssa.OpPPC64CLRLSLDI: 565 r := v.Reg() 566 r1 := v.Args[0].Reg() 567 shifts := v.AuxInt 568 p := s.Prog(v.Op.Asm()) 569 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh 570 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} 571 p.SetFrom3Const(ssa.GetPPC64Shiftsh(shifts)) 572 p.Reg = r1 573 p.To.Type = obj.TYPE_REG 574 p.To.Reg = r 575 576 // Mask has been set as sh 577 case ssa.OpPPC64RLDICL: 578 r := v.Reg() 579 r1 := v.Args[0].Reg() 580 shifts := v.AuxInt 581 p := s.Prog(v.Op.Asm()) 582 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} 583 p.SetFrom3Const(ssa.GetPPC64Shiftmb(shifts)) 584 p.Reg = r1 585 p.To.Type = obj.TYPE_REG 586 p.To.Reg = r 587 588 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, 589 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, 590 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, 591 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW, 592 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, 593 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN, 594 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV, 595 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW: 596 r := v.Reg() 597 r1 := v.Args[0].Reg() 598 r2 := v.Args[1].Reg() 599 p := s.Prog(v.Op.Asm()) 600 p.From.Type = obj.TYPE_REG 601 p.From.Reg = r2 602 p.Reg = r1 603 p.To.Type = obj.TYPE_REG 604 p.To.Reg = r 605 606 case ssa.OpPPC64ANDCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC: 607 r1 := v.Args[0].Reg() 608 r2 := v.Args[1].Reg() 609 p := s.Prog(v.Op.Asm()) 610 p.From.Type = obj.TYPE_REG 611 p.From.Reg = r2 612 p.Reg = r1 613 p.To.Type = obj.TYPE_REG 614 p.To.Reg = v.Reg0() 615 616 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst: 617 p := s.Prog(v.Op.Asm()) 618 p.From.Type = obj.TYPE_CONST 619 p.From.Offset = v.AuxInt 620 p.Reg = v.Args[0].Reg() 621 p.To.Type = obj.TYPE_REG 622 p.To.Reg = v.Reg() 623 624 // Auxint holds encoded rotate + mask 625 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI: 626 rot, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt) 627 p := s.Prog(v.Op.Asm()) 628 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 629 p.Reg = v.Args[0].Reg() 630 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)} 631 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}}) 632 633 // Auxint holds mask 634 case ssa.OpPPC64RLWNM: 635 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt) 636 p := s.Prog(v.Op.Asm()) 637 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 638 p.Reg = v.Args[0].Reg() 639 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()} 640 p.SetRestArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}}) 641 642 case ssa.OpPPC64MADDLD: 643 r := v.Reg() 644 r1 := v.Args[0].Reg() 645 r2 := v.Args[1].Reg() 646 r3 := v.Args[2].Reg() 647 // r = r1*r2 ± r3 648 p := s.Prog(v.Op.Asm()) 649 p.From.Type = obj.TYPE_REG 650 p.From.Reg = r1 651 p.Reg = r2 652 p.SetFrom3Reg(r3) 653 p.To.Type = obj.TYPE_REG 654 p.To.Reg = r 655 656 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS: 657 r := v.Reg() 658 r1 := v.Args[0].Reg() 659 r2 := v.Args[1].Reg() 660 r3 := v.Args[2].Reg() 661 // r = r1*r2 ± r3 662 p := s.Prog(v.Op.Asm()) 663 p.From.Type = obj.TYPE_REG 664 p.From.Reg = r1 665 p.Reg = r3 666 p.SetFrom3Reg(r2) 667 p.To.Type = obj.TYPE_REG 668 p.To.Reg = r 669 670 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, 671 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, 672 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, 673 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD: 674 r := v.Reg() 675 p := s.Prog(v.Op.Asm()) 676 p.To.Type = obj.TYPE_REG 677 p.To.Reg = r 678 p.From.Type = obj.TYPE_REG 679 p.From.Reg = v.Args[0].Reg() 680 681 case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, 682 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, 683 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst: 684 p := s.Prog(v.Op.Asm()) 685 p.Reg = v.Args[0].Reg() 686 p.From.Type = obj.TYPE_CONST 687 p.From.Offset = v.AuxInt 688 p.To.Type = obj.TYPE_REG 689 p.To.Reg = v.Reg() 690 691 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE: 692 r := v.Reg0() // CA is the first, implied argument. 693 r1 := v.Args[0].Reg() 694 r2 := v.Args[1].Reg() 695 p := s.Prog(v.Op.Asm()) 696 p.From.Type = obj.TYPE_REG 697 p.From.Reg = r2 698 p.Reg = r1 699 p.To.Type = obj.TYPE_REG 700 p.To.Reg = r 701 702 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero: 703 p := s.Prog(v.Op.Asm()) 704 p.From.Type = obj.TYPE_REG 705 p.From.Reg = ppc64.REG_R0 706 p.To.Type = obj.TYPE_REG 707 p.To.Reg = v.Reg() 708 709 case ssa.OpPPC64ADDCconst: 710 p := s.Prog(v.Op.Asm()) 711 p.Reg = v.Args[0].Reg() 712 p.From.Type = obj.TYPE_CONST 713 p.From.Offset = v.AuxInt 714 p.To.Type = obj.TYPE_REG 715 // Output is a pair, the second is the CA, which is implied. 716 p.To.Reg = v.Reg0() 717 718 case ssa.OpPPC64SUBCconst: 719 p := s.Prog(v.Op.Asm()) 720 p.SetFrom3Const(v.AuxInt) 721 p.From.Type = obj.TYPE_REG 722 p.From.Reg = v.Args[0].Reg() 723 p.To.Type = obj.TYPE_REG 724 p.To.Reg = v.Reg0() 725 726 case ssa.OpPPC64SUBFCconst: 727 p := s.Prog(v.Op.Asm()) 728 p.SetFrom3Const(v.AuxInt) 729 p.From.Type = obj.TYPE_REG 730 p.From.Reg = v.Args[0].Reg() 731 p.To.Type = obj.TYPE_REG 732 p.To.Reg = v.Reg() 733 734 case ssa.OpPPC64ANDCCconst: 735 p := s.Prog(v.Op.Asm()) 736 p.Reg = v.Args[0].Reg() 737 p.From.Type = obj.TYPE_CONST 738 p.From.Offset = v.AuxInt 739 p.To.Type = obj.TYPE_REG 740 // p.To.Reg = ppc64.REGTMP // discard result 741 p.To.Reg = v.Reg0() 742 743 case ssa.OpPPC64MOVDaddr: 744 switch v.Aux.(type) { 745 default: 746 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux) 747 case nil: 748 // If aux offset and aux int are both 0, and the same 749 // input and output regs are used, no instruction 750 // needs to be generated, since it would just be 751 // addi rx, rx, 0. 752 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() { 753 p := s.Prog(ppc64.AMOVD) 754 p.From.Type = obj.TYPE_ADDR 755 p.From.Reg = v.Args[0].Reg() 756 p.From.Offset = v.AuxInt 757 p.To.Type = obj.TYPE_REG 758 p.To.Reg = v.Reg() 759 } 760 761 case *obj.LSym, ir.Node: 762 p := s.Prog(ppc64.AMOVD) 763 p.From.Type = obj.TYPE_ADDR 764 p.From.Reg = v.Args[0].Reg() 765 p.To.Type = obj.TYPE_REG 766 p.To.Reg = v.Reg() 767 ssagen.AddAux(&p.From, v) 768 769 } 770 771 case ssa.OpPPC64MOVDconst: 772 p := s.Prog(v.Op.Asm()) 773 p.From.Type = obj.TYPE_CONST 774 p.From.Offset = v.AuxInt 775 p.To.Type = obj.TYPE_REG 776 p.To.Reg = v.Reg() 777 778 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: 779 p := s.Prog(v.Op.Asm()) 780 p.From.Type = obj.TYPE_FCONST 781 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 782 p.To.Type = obj.TYPE_REG 783 p.To.Reg = v.Reg() 784 785 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: 786 p := s.Prog(v.Op.Asm()) 787 p.From.Type = obj.TYPE_REG 788 p.From.Reg = v.Args[0].Reg() 789 p.To.Type = obj.TYPE_REG 790 p.To.Reg = v.Args[1].Reg() 791 792 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: 793 p := s.Prog(v.Op.Asm()) 794 p.From.Type = obj.TYPE_REG 795 p.From.Reg = v.Args[0].Reg() 796 p.To.Type = obj.TYPE_CONST 797 p.To.Offset = v.AuxInt 798 799 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: 800 // Shift in register to required size 801 p := s.Prog(v.Op.Asm()) 802 p.From.Type = obj.TYPE_REG 803 p.From.Reg = v.Args[0].Reg() 804 p.To.Reg = v.Reg() 805 p.To.Type = obj.TYPE_REG 806 807 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload: 808 809 // MOVDload and MOVWload are DS form instructions that are restricted to 810 // offsets that are a multiple of 4. If the offset is not a multiple of 4, 811 // then the address of the symbol to be loaded is computed (base + offset) 812 // and used as the new base register and the offset field in the instruction 813 // can be set to zero. 814 815 // This same problem can happen with gostrings since the final offset is not 816 // known yet, but could be unaligned after the relocation is resolved. 817 // So gostrings are handled the same way. 818 819 // This allows the MOVDload and MOVWload to be generated in more cases and 820 // eliminates some offset and alignment checking in the rules file. 821 822 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 823 ssagen.AddAux(&fromAddr, v) 824 825 genAddr := false 826 827 switch fromAddr.Name { 828 case obj.NAME_EXTERN, obj.NAME_STATIC: 829 // Special case for a rule combines the bytes of gostring. 830 // The v alignment might seem OK, but we don't want to load it 831 // using an offset because relocation comes later. 832 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0 833 default: 834 genAddr = fromAddr.Offset%4 != 0 835 } 836 if genAddr { 837 // Load full address into the temp register. 838 p := s.Prog(ppc64.AMOVD) 839 p.From.Type = obj.TYPE_ADDR 840 p.From.Reg = v.Args[0].Reg() 841 ssagen.AddAux(&p.From, v) 842 // Load target using temp as base register 843 // and offset zero. Setting NAME_NONE 844 // prevents any extra offsets from being 845 // added. 846 p.To.Type = obj.TYPE_REG 847 p.To.Reg = ppc64.REGTMP 848 fromAddr.Reg = ppc64.REGTMP 849 // Clear the offset field and other 850 // information that might be used 851 // by the assembler to add to the 852 // final offset value. 853 fromAddr.Offset = 0 854 fromAddr.Name = obj.NAME_NONE 855 fromAddr.Sym = nil 856 } 857 p := s.Prog(v.Op.Asm()) 858 p.From = fromAddr 859 p.To.Type = obj.TYPE_REG 860 p.To.Reg = v.Reg() 861 862 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: 863 p := s.Prog(v.Op.Asm()) 864 p.From.Type = obj.TYPE_MEM 865 p.From.Reg = v.Args[0].Reg() 866 ssagen.AddAux(&p.From, v) 867 p.To.Type = obj.TYPE_REG 868 p.To.Reg = v.Reg() 869 870 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload: 871 p := s.Prog(v.Op.Asm()) 872 p.From.Type = obj.TYPE_MEM 873 p.From.Reg = v.Args[0].Reg() 874 p.To.Type = obj.TYPE_REG 875 p.To.Reg = v.Reg() 876 877 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore: 878 p := s.Prog(v.Op.Asm()) 879 p.To.Type = obj.TYPE_MEM 880 p.To.Reg = v.Args[0].Reg() 881 p.From.Type = obj.TYPE_REG 882 p.From.Reg = v.Args[1].Reg() 883 884 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx, 885 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx, 886 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx: 887 p := s.Prog(v.Op.Asm()) 888 p.From.Type = obj.TYPE_MEM 889 p.From.Reg = v.Args[0].Reg() 890 p.From.Index = v.Args[1].Reg() 891 p.To.Type = obj.TYPE_REG 892 p.To.Reg = v.Reg() 893 894 case ssa.OpPPC64DCBT: 895 p := s.Prog(v.Op.Asm()) 896 p.From.Type = obj.TYPE_MEM 897 p.From.Reg = v.Args[0].Reg() 898 p.To.Type = obj.TYPE_CONST 899 p.To.Offset = v.AuxInt 900 901 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: 902 p := s.Prog(v.Op.Asm()) 903 p.From.Type = obj.TYPE_REG 904 p.From.Reg = ppc64.REGZERO 905 p.To.Type = obj.TYPE_MEM 906 p.To.Reg = v.Args[0].Reg() 907 ssagen.AddAux(&p.To, v) 908 909 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero: 910 911 // MOVDstore and MOVDstorezero become DS form instructions that are restricted 912 // to offset values that are a multiple of 4. If the offset field is not a 913 // multiple of 4, then the full address of the store target is computed (base + 914 // offset) and used as the new base register and the offset in the instruction 915 // is set to 0. 916 917 // This allows the MOVDstore and MOVDstorezero to be generated in more cases, 918 // and prevents checking of the offset value and alignment in the rules. 919 920 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 921 ssagen.AddAux(&toAddr, v) 922 923 if toAddr.Offset%4 != 0 { 924 p := s.Prog(ppc64.AMOVD) 925 p.From.Type = obj.TYPE_ADDR 926 p.From.Reg = v.Args[0].Reg() 927 ssagen.AddAux(&p.From, v) 928 p.To.Type = obj.TYPE_REG 929 p.To.Reg = ppc64.REGTMP 930 toAddr.Reg = ppc64.REGTMP 931 // Clear the offset field and other 932 // information that might be used 933 // by the assembler to add to the 934 // final offset value. 935 toAddr.Offset = 0 936 toAddr.Name = obj.NAME_NONE 937 toAddr.Sym = nil 938 } 939 p := s.Prog(v.Op.Asm()) 940 p.To = toAddr 941 p.From.Type = obj.TYPE_REG 942 if v.Op == ssa.OpPPC64MOVDstorezero { 943 p.From.Reg = ppc64.REGZERO 944 } else { 945 p.From.Reg = v.Args[1].Reg() 946 } 947 948 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: 949 p := s.Prog(v.Op.Asm()) 950 p.From.Type = obj.TYPE_REG 951 p.From.Reg = v.Args[1].Reg() 952 p.To.Type = obj.TYPE_MEM 953 p.To.Reg = v.Args[0].Reg() 954 ssagen.AddAux(&p.To, v) 955 956 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx, 957 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx, 958 ssa.OpPPC64MOVHBRstoreidx: 959 p := s.Prog(v.Op.Asm()) 960 p.From.Type = obj.TYPE_REG 961 p.From.Reg = v.Args[2].Reg() 962 p.To.Index = v.Args[1].Reg() 963 p.To.Type = obj.TYPE_MEM 964 p.To.Reg = v.Args[0].Reg() 965 966 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELB, ssa.OpPPC64ISELZ: 967 // ISEL AuxInt ? arg0 : arg1 968 // ISELB is a special case of ISEL where AuxInt ? $1 (arg0) : $0. 969 // ISELZ is a special case of ISEL where arg1 is implicitly $0. 970 // 971 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO. 972 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt. 973 // Convert the condition to a CR bit argument by the following conversion: 974 // 975 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO 976 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO 977 p := s.Prog(ppc64.AISEL) 978 p.To.Type = obj.TYPE_REG 979 p.To.Reg = v.Reg() 980 // For ISELB/ISELZ Use R0 for 0 operand to avoid load. 981 r := obj.Addr{Type: obj.TYPE_REG, Reg: ppc64.REG_R0} 982 if v.Op == ssa.OpPPC64ISEL { 983 r.Reg = v.Args[1].Reg() 984 } 985 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2 986 if v.AuxInt > 3 { 987 p.Reg = r.Reg 988 p.SetFrom3Reg(v.Args[0].Reg()) 989 } else { 990 p.Reg = v.Args[0].Reg() 991 p.SetFrom3(r) 992 } 993 p.From.Type = obj.TYPE_CONST 994 p.From.Offset = v.AuxInt & 3 995 996 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort: 997 // The LoweredQuad code generation 998 // generates STXV instructions on 999 // power9. The Short variation is used 1000 // if no loop is generated. 1001 1002 // sizes >= 64 generate a loop as follows: 1003 1004 // Set up loop counter in CTR, used by BC 1005 // XXLXOR clears VS32 1006 // XXLXOR VS32,VS32,VS32 1007 // MOVD len/64,REG_TMP 1008 // MOVD REG_TMP,CTR 1009 // loop: 1010 // STXV VS32,0(R20) 1011 // STXV VS32,16(R20) 1012 // STXV VS32,32(R20) 1013 // STXV VS32,48(R20) 1014 // ADD $64,R20 1015 // BC 16, 0, loop 1016 1017 // Bytes per iteration 1018 ctr := v.AuxInt / 64 1019 1020 // Remainder bytes 1021 rem := v.AuxInt % 64 1022 1023 // Only generate a loop if there is more 1024 // than 1 iteration. 1025 if ctr > 1 { 1026 // Set up VS32 (V0) to hold 0s 1027 p := s.Prog(ppc64.AXXLXOR) 1028 p.From.Type = obj.TYPE_REG 1029 p.From.Reg = ppc64.REG_VS32 1030 p.To.Type = obj.TYPE_REG 1031 p.To.Reg = ppc64.REG_VS32 1032 p.Reg = ppc64.REG_VS32 1033 1034 // Set up CTR loop counter 1035 p = s.Prog(ppc64.AMOVD) 1036 p.From.Type = obj.TYPE_CONST 1037 p.From.Offset = ctr 1038 p.To.Type = obj.TYPE_REG 1039 p.To.Reg = ppc64.REGTMP 1040 1041 p = s.Prog(ppc64.AMOVD) 1042 p.From.Type = obj.TYPE_REG 1043 p.From.Reg = ppc64.REGTMP 1044 p.To.Type = obj.TYPE_REG 1045 p.To.Reg = ppc64.REG_CTR 1046 1047 // Don't generate padding for 1048 // loops with few iterations. 1049 if ctr > 3 { 1050 p = s.Prog(obj.APCALIGN) 1051 p.From.Type = obj.TYPE_CONST 1052 p.From.Offset = 16 1053 } 1054 1055 // generate 4 STXVs to zero 64 bytes 1056 var top *obj.Prog 1057 1058 p = s.Prog(ppc64.ASTXV) 1059 p.From.Type = obj.TYPE_REG 1060 p.From.Reg = ppc64.REG_VS32 1061 p.To.Type = obj.TYPE_MEM 1062 p.To.Reg = v.Args[0].Reg() 1063 1064 // Save the top of loop 1065 if top == nil { 1066 top = p 1067 } 1068 p = s.Prog(ppc64.ASTXV) 1069 p.From.Type = obj.TYPE_REG 1070 p.From.Reg = ppc64.REG_VS32 1071 p.To.Type = obj.TYPE_MEM 1072 p.To.Reg = v.Args[0].Reg() 1073 p.To.Offset = 16 1074 1075 p = s.Prog(ppc64.ASTXV) 1076 p.From.Type = obj.TYPE_REG 1077 p.From.Reg = ppc64.REG_VS32 1078 p.To.Type = obj.TYPE_MEM 1079 p.To.Reg = v.Args[0].Reg() 1080 p.To.Offset = 32 1081 1082 p = s.Prog(ppc64.ASTXV) 1083 p.From.Type = obj.TYPE_REG 1084 p.From.Reg = ppc64.REG_VS32 1085 p.To.Type = obj.TYPE_MEM 1086 p.To.Reg = v.Args[0].Reg() 1087 p.To.Offset = 48 1088 1089 // Increment address for the 1090 // 64 bytes just zeroed. 1091 p = s.Prog(ppc64.AADD) 1092 p.Reg = v.Args[0].Reg() 1093 p.From.Type = obj.TYPE_CONST 1094 p.From.Offset = 64 1095 p.To.Type = obj.TYPE_REG 1096 p.To.Reg = v.Args[0].Reg() 1097 1098 // Branch back to top of loop 1099 // based on CTR 1100 // BC with BO_BCTR generates bdnz 1101 p = s.Prog(ppc64.ABC) 1102 p.From.Type = obj.TYPE_CONST 1103 p.From.Offset = ppc64.BO_BCTR 1104 p.Reg = ppc64.REG_CR0LT 1105 p.To.Type = obj.TYPE_BRANCH 1106 p.To.SetTarget(top) 1107 } 1108 // When ctr == 1 the loop was not generated but 1109 // there are at least 64 bytes to clear, so add 1110 // that to the remainder to generate the code 1111 // to clear those doublewords 1112 if ctr == 1 { 1113 rem += 64 1114 } 1115 1116 // Clear the remainder starting at offset zero 1117 offset := int64(0) 1118 1119 if rem >= 16 && ctr <= 1 { 1120 // If the XXLXOR hasn't already been 1121 // generated, do it here to initialize 1122 // VS32 (V0) to 0. 1123 p := s.Prog(ppc64.AXXLXOR) 1124 p.From.Type = obj.TYPE_REG 1125 p.From.Reg = ppc64.REG_VS32 1126 p.To.Type = obj.TYPE_REG 1127 p.To.Reg = ppc64.REG_VS32 1128 p.Reg = ppc64.REG_VS32 1129 } 1130 // Generate STXV for 32 or 64 1131 // bytes. 1132 for rem >= 32 { 1133 p := s.Prog(ppc64.ASTXV) 1134 p.From.Type = obj.TYPE_REG 1135 p.From.Reg = ppc64.REG_VS32 1136 p.To.Type = obj.TYPE_MEM 1137 p.To.Reg = v.Args[0].Reg() 1138 p.To.Offset = offset 1139 1140 p = s.Prog(ppc64.ASTXV) 1141 p.From.Type = obj.TYPE_REG 1142 p.From.Reg = ppc64.REG_VS32 1143 p.To.Type = obj.TYPE_MEM 1144 p.To.Reg = v.Args[0].Reg() 1145 p.To.Offset = offset + 16 1146 offset += 32 1147 rem -= 32 1148 } 1149 // Generate 16 bytes 1150 if rem >= 16 { 1151 p := s.Prog(ppc64.ASTXV) 1152 p.From.Type = obj.TYPE_REG 1153 p.From.Reg = ppc64.REG_VS32 1154 p.To.Type = obj.TYPE_MEM 1155 p.To.Reg = v.Args[0].Reg() 1156 p.To.Offset = offset 1157 offset += 16 1158 rem -= 16 1159 } 1160 1161 // first clear as many doublewords as possible 1162 // then clear remaining sizes as available 1163 for rem > 0 { 1164 op, size := ppc64.AMOVB, int64(1) 1165 switch { 1166 case rem >= 8: 1167 op, size = ppc64.AMOVD, 8 1168 case rem >= 4: 1169 op, size = ppc64.AMOVW, 4 1170 case rem >= 2: 1171 op, size = ppc64.AMOVH, 2 1172 } 1173 p := s.Prog(op) 1174 p.From.Type = obj.TYPE_REG 1175 p.From.Reg = ppc64.REG_R0 1176 p.To.Type = obj.TYPE_MEM 1177 p.To.Reg = v.Args[0].Reg() 1178 p.To.Offset = offset 1179 rem -= size 1180 offset += size 1181 } 1182 1183 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort: 1184 1185 // Unaligned data doesn't hurt performance 1186 // for these instructions on power8. 1187 1188 // For sizes >= 64 generate a loop as follows: 1189 1190 // Set up loop counter in CTR, used by BC 1191 // XXLXOR VS32,VS32,VS32 1192 // MOVD len/32,REG_TMP 1193 // MOVD REG_TMP,CTR 1194 // MOVD $16,REG_TMP 1195 // loop: 1196 // STXVD2X VS32,(R0)(R20) 1197 // STXVD2X VS32,(R31)(R20) 1198 // ADD $32,R20 1199 // BC 16, 0, loop 1200 // 1201 // any remainder is done as described below 1202 1203 // for sizes < 64 bytes, first clear as many doublewords as possible, 1204 // then handle the remainder 1205 // MOVD R0,(R20) 1206 // MOVD R0,8(R20) 1207 // .... etc. 1208 // 1209 // the remainder bytes are cleared using one or more 1210 // of the following instructions with the appropriate 1211 // offsets depending which instructions are needed 1212 // 1213 // MOVW R0,n1(R20) 4 bytes 1214 // MOVH R0,n2(R20) 2 bytes 1215 // MOVB R0,n3(R20) 1 byte 1216 // 1217 // 7 bytes: MOVW, MOVH, MOVB 1218 // 6 bytes: MOVW, MOVH 1219 // 5 bytes: MOVW, MOVB 1220 // 3 bytes: MOVH, MOVB 1221 1222 // each loop iteration does 32 bytes 1223 ctr := v.AuxInt / 32 1224 1225 // remainder bytes 1226 rem := v.AuxInt % 32 1227 1228 // only generate a loop if there is more 1229 // than 1 iteration. 1230 if ctr > 1 { 1231 // Set up VS32 (V0) to hold 0s 1232 p := s.Prog(ppc64.AXXLXOR) 1233 p.From.Type = obj.TYPE_REG 1234 p.From.Reg = ppc64.REG_VS32 1235 p.To.Type = obj.TYPE_REG 1236 p.To.Reg = ppc64.REG_VS32 1237 p.Reg = ppc64.REG_VS32 1238 1239 // Set up CTR loop counter 1240 p = s.Prog(ppc64.AMOVD) 1241 p.From.Type = obj.TYPE_CONST 1242 p.From.Offset = ctr 1243 p.To.Type = obj.TYPE_REG 1244 p.To.Reg = ppc64.REGTMP 1245 1246 p = s.Prog(ppc64.AMOVD) 1247 p.From.Type = obj.TYPE_REG 1248 p.From.Reg = ppc64.REGTMP 1249 p.To.Type = obj.TYPE_REG 1250 p.To.Reg = ppc64.REG_CTR 1251 1252 // Set up R31 to hold index value 16 1253 p = s.Prog(ppc64.AMOVD) 1254 p.From.Type = obj.TYPE_CONST 1255 p.From.Offset = 16 1256 p.To.Type = obj.TYPE_REG 1257 p.To.Reg = ppc64.REGTMP 1258 1259 // Don't add padding for alignment 1260 // with few loop iterations. 1261 if ctr > 3 { 1262 p = s.Prog(obj.APCALIGN) 1263 p.From.Type = obj.TYPE_CONST 1264 p.From.Offset = 16 1265 } 1266 1267 // generate 2 STXVD2Xs to store 16 bytes 1268 // when this is a loop then the top must be saved 1269 var top *obj.Prog 1270 // This is the top of loop 1271 1272 p = s.Prog(ppc64.ASTXVD2X) 1273 p.From.Type = obj.TYPE_REG 1274 p.From.Reg = ppc64.REG_VS32 1275 p.To.Type = obj.TYPE_MEM 1276 p.To.Reg = v.Args[0].Reg() 1277 p.To.Index = ppc64.REGZERO 1278 // Save the top of loop 1279 if top == nil { 1280 top = p 1281 } 1282 p = s.Prog(ppc64.ASTXVD2X) 1283 p.From.Type = obj.TYPE_REG 1284 p.From.Reg = ppc64.REG_VS32 1285 p.To.Type = obj.TYPE_MEM 1286 p.To.Reg = v.Args[0].Reg() 1287 p.To.Index = ppc64.REGTMP 1288 1289 // Increment address for the 1290 // 4 doublewords just zeroed. 1291 p = s.Prog(ppc64.AADD) 1292 p.Reg = v.Args[0].Reg() 1293 p.From.Type = obj.TYPE_CONST 1294 p.From.Offset = 32 1295 p.To.Type = obj.TYPE_REG 1296 p.To.Reg = v.Args[0].Reg() 1297 1298 // Branch back to top of loop 1299 // based on CTR 1300 // BC with BO_BCTR generates bdnz 1301 p = s.Prog(ppc64.ABC) 1302 p.From.Type = obj.TYPE_CONST 1303 p.From.Offset = ppc64.BO_BCTR 1304 p.Reg = ppc64.REG_CR0LT 1305 p.To.Type = obj.TYPE_BRANCH 1306 p.To.SetTarget(top) 1307 } 1308 1309 // when ctr == 1 the loop was not generated but 1310 // there are at least 32 bytes to clear, so add 1311 // that to the remainder to generate the code 1312 // to clear those doublewords 1313 if ctr == 1 { 1314 rem += 32 1315 } 1316 1317 // clear the remainder starting at offset zero 1318 offset := int64(0) 1319 1320 // first clear as many doublewords as possible 1321 // then clear remaining sizes as available 1322 for rem > 0 { 1323 op, size := ppc64.AMOVB, int64(1) 1324 switch { 1325 case rem >= 8: 1326 op, size = ppc64.AMOVD, 8 1327 case rem >= 4: 1328 op, size = ppc64.AMOVW, 4 1329 case rem >= 2: 1330 op, size = ppc64.AMOVH, 2 1331 } 1332 p := s.Prog(op) 1333 p.From.Type = obj.TYPE_REG 1334 p.From.Reg = ppc64.REG_R0 1335 p.To.Type = obj.TYPE_MEM 1336 p.To.Reg = v.Args[0].Reg() 1337 p.To.Offset = offset 1338 rem -= size 1339 offset += size 1340 } 1341 1342 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort: 1343 1344 bytesPerLoop := int64(32) 1345 // This will be used when moving more 1346 // than 8 bytes. Moves start with 1347 // as many 8 byte moves as possible, then 1348 // 4, 2, or 1 byte(s) as remaining. This will 1349 // work and be efficient for power8 or later. 1350 // If there are 64 or more bytes, then a 1351 // loop is generated to move 32 bytes and 1352 // update the src and dst addresses on each 1353 // iteration. When < 64 bytes, the appropriate 1354 // number of moves are generated based on the 1355 // size. 1356 // When moving >= 64 bytes a loop is used 1357 // MOVD len/32,REG_TMP 1358 // MOVD REG_TMP,CTR 1359 // MOVD $16,REG_TMP 1360 // top: 1361 // LXVD2X (R0)(R21),VS32 1362 // LXVD2X (R31)(R21),VS33 1363 // ADD $32,R21 1364 // STXVD2X VS32,(R0)(R20) 1365 // STXVD2X VS33,(R31)(R20) 1366 // ADD $32,R20 1367 // BC 16,0,top 1368 // Bytes not moved by this loop are moved 1369 // with a combination of the following instructions, 1370 // starting with the largest sizes and generating as 1371 // many as needed, using the appropriate offset value. 1372 // MOVD n(R21),R31 1373 // MOVD R31,n(R20) 1374 // MOVW n1(R21),R31 1375 // MOVW R31,n1(R20) 1376 // MOVH n2(R21),R31 1377 // MOVH R31,n2(R20) 1378 // MOVB n3(R21),R31 1379 // MOVB R31,n3(R20) 1380 1381 // Each loop iteration moves 32 bytes 1382 ctr := v.AuxInt / bytesPerLoop 1383 1384 // Remainder after the loop 1385 rem := v.AuxInt % bytesPerLoop 1386 1387 dstReg := v.Args[0].Reg() 1388 srcReg := v.Args[1].Reg() 1389 1390 // The set of registers used here, must match the clobbered reg list 1391 // in PPC64Ops.go. 1392 offset := int64(0) 1393 1394 // top of the loop 1395 var top *obj.Prog 1396 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1397 if ctr > 1 { 1398 // Set up the CTR 1399 p := s.Prog(ppc64.AMOVD) 1400 p.From.Type = obj.TYPE_CONST 1401 p.From.Offset = ctr 1402 p.To.Type = obj.TYPE_REG 1403 p.To.Reg = ppc64.REGTMP 1404 1405 p = s.Prog(ppc64.AMOVD) 1406 p.From.Type = obj.TYPE_REG 1407 p.From.Reg = ppc64.REGTMP 1408 p.To.Type = obj.TYPE_REG 1409 p.To.Reg = ppc64.REG_CTR 1410 1411 // Use REGTMP as index reg 1412 p = s.Prog(ppc64.AMOVD) 1413 p.From.Type = obj.TYPE_CONST 1414 p.From.Offset = 16 1415 p.To.Type = obj.TYPE_REG 1416 p.To.Reg = ppc64.REGTMP 1417 1418 // Don't adding padding for 1419 // alignment with small iteration 1420 // counts. 1421 if ctr > 3 { 1422 p = s.Prog(obj.APCALIGN) 1423 p.From.Type = obj.TYPE_CONST 1424 p.From.Offset = 16 1425 } 1426 1427 // Generate 16 byte loads and stores. 1428 // Use temp register for index (16) 1429 // on the second one. 1430 1431 p = s.Prog(ppc64.ALXVD2X) 1432 p.From.Type = obj.TYPE_MEM 1433 p.From.Reg = srcReg 1434 p.From.Index = ppc64.REGZERO 1435 p.To.Type = obj.TYPE_REG 1436 p.To.Reg = ppc64.REG_VS32 1437 if top == nil { 1438 top = p 1439 } 1440 p = s.Prog(ppc64.ALXVD2X) 1441 p.From.Type = obj.TYPE_MEM 1442 p.From.Reg = srcReg 1443 p.From.Index = ppc64.REGTMP 1444 p.To.Type = obj.TYPE_REG 1445 p.To.Reg = ppc64.REG_VS33 1446 1447 // increment the src reg for next iteration 1448 p = s.Prog(ppc64.AADD) 1449 p.Reg = srcReg 1450 p.From.Type = obj.TYPE_CONST 1451 p.From.Offset = bytesPerLoop 1452 p.To.Type = obj.TYPE_REG 1453 p.To.Reg = srcReg 1454 1455 // generate 16 byte stores 1456 p = s.Prog(ppc64.ASTXVD2X) 1457 p.From.Type = obj.TYPE_REG 1458 p.From.Reg = ppc64.REG_VS32 1459 p.To.Type = obj.TYPE_MEM 1460 p.To.Reg = dstReg 1461 p.To.Index = ppc64.REGZERO 1462 1463 p = s.Prog(ppc64.ASTXVD2X) 1464 p.From.Type = obj.TYPE_REG 1465 p.From.Reg = ppc64.REG_VS33 1466 p.To.Type = obj.TYPE_MEM 1467 p.To.Reg = dstReg 1468 p.To.Index = ppc64.REGTMP 1469 1470 // increment the dst reg for next iteration 1471 p = s.Prog(ppc64.AADD) 1472 p.Reg = dstReg 1473 p.From.Type = obj.TYPE_CONST 1474 p.From.Offset = bytesPerLoop 1475 p.To.Type = obj.TYPE_REG 1476 p.To.Reg = dstReg 1477 1478 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1479 // to loop top. 1480 p = s.Prog(ppc64.ABC) 1481 p.From.Type = obj.TYPE_CONST 1482 p.From.Offset = ppc64.BO_BCTR 1483 p.Reg = ppc64.REG_CR0LT 1484 p.To.Type = obj.TYPE_BRANCH 1485 p.To.SetTarget(top) 1486 1487 // srcReg and dstReg were incremented in the loop, so 1488 // later instructions start with offset 0. 1489 offset = int64(0) 1490 } 1491 1492 // No loop was generated for one iteration, so 1493 // add 32 bytes to the remainder to move those bytes. 1494 if ctr == 1 { 1495 rem += bytesPerLoop 1496 } 1497 1498 if rem >= 16 { 1499 // Generate 16 byte loads and stores. 1500 // Use temp register for index (value 16) 1501 // on the second one. 1502 p := s.Prog(ppc64.ALXVD2X) 1503 p.From.Type = obj.TYPE_MEM 1504 p.From.Reg = srcReg 1505 p.From.Index = ppc64.REGZERO 1506 p.To.Type = obj.TYPE_REG 1507 p.To.Reg = ppc64.REG_VS32 1508 1509 p = s.Prog(ppc64.ASTXVD2X) 1510 p.From.Type = obj.TYPE_REG 1511 p.From.Reg = ppc64.REG_VS32 1512 p.To.Type = obj.TYPE_MEM 1513 p.To.Reg = dstReg 1514 p.To.Index = ppc64.REGZERO 1515 1516 offset = 16 1517 rem -= 16 1518 1519 if rem >= 16 { 1520 // Use REGTMP as index reg 1521 p := s.Prog(ppc64.AMOVD) 1522 p.From.Type = obj.TYPE_CONST 1523 p.From.Offset = 16 1524 p.To.Type = obj.TYPE_REG 1525 p.To.Reg = ppc64.REGTMP 1526 1527 p = s.Prog(ppc64.ALXVD2X) 1528 p.From.Type = obj.TYPE_MEM 1529 p.From.Reg = srcReg 1530 p.From.Index = ppc64.REGTMP 1531 p.To.Type = obj.TYPE_REG 1532 p.To.Reg = ppc64.REG_VS32 1533 1534 p = s.Prog(ppc64.ASTXVD2X) 1535 p.From.Type = obj.TYPE_REG 1536 p.From.Reg = ppc64.REG_VS32 1537 p.To.Type = obj.TYPE_MEM 1538 p.To.Reg = dstReg 1539 p.To.Index = ppc64.REGTMP 1540 1541 offset = 32 1542 rem -= 16 1543 } 1544 } 1545 1546 // Generate all the remaining load and store pairs, starting with 1547 // as many 8 byte moves as possible, then 4, 2, 1. 1548 for rem > 0 { 1549 op, size := ppc64.AMOVB, int64(1) 1550 switch { 1551 case rem >= 8: 1552 op, size = ppc64.AMOVD, 8 1553 case rem >= 4: 1554 op, size = ppc64.AMOVWZ, 4 1555 case rem >= 2: 1556 op, size = ppc64.AMOVH, 2 1557 } 1558 // Load 1559 p := s.Prog(op) 1560 p.To.Type = obj.TYPE_REG 1561 p.To.Reg = ppc64.REGTMP 1562 p.From.Type = obj.TYPE_MEM 1563 p.From.Reg = srcReg 1564 p.From.Offset = offset 1565 1566 // Store 1567 p = s.Prog(op) 1568 p.From.Type = obj.TYPE_REG 1569 p.From.Reg = ppc64.REGTMP 1570 p.To.Type = obj.TYPE_MEM 1571 p.To.Reg = dstReg 1572 p.To.Offset = offset 1573 rem -= size 1574 offset += size 1575 } 1576 1577 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort: 1578 bytesPerLoop := int64(64) 1579 // This is used when moving more 1580 // than 8 bytes on power9. Moves start with 1581 // as many 8 byte moves as possible, then 1582 // 4, 2, or 1 byte(s) as remaining. This will 1583 // work and be efficient for power8 or later. 1584 // If there are 64 or more bytes, then a 1585 // loop is generated to move 32 bytes and 1586 // update the src and dst addresses on each 1587 // iteration. When < 64 bytes, the appropriate 1588 // number of moves are generated based on the 1589 // size. 1590 // When moving >= 64 bytes a loop is used 1591 // MOVD len/32,REG_TMP 1592 // MOVD REG_TMP,CTR 1593 // top: 1594 // LXV 0(R21),VS32 1595 // LXV 16(R21),VS33 1596 // ADD $32,R21 1597 // STXV VS32,0(R20) 1598 // STXV VS33,16(R20) 1599 // ADD $32,R20 1600 // BC 16,0,top 1601 // Bytes not moved by this loop are moved 1602 // with a combination of the following instructions, 1603 // starting with the largest sizes and generating as 1604 // many as needed, using the appropriate offset value. 1605 // MOVD n(R21),R31 1606 // MOVD R31,n(R20) 1607 // MOVW n1(R21),R31 1608 // MOVW R31,n1(R20) 1609 // MOVH n2(R21),R31 1610 // MOVH R31,n2(R20) 1611 // MOVB n3(R21),R31 1612 // MOVB R31,n3(R20) 1613 1614 // Each loop iteration moves 32 bytes 1615 ctr := v.AuxInt / bytesPerLoop 1616 1617 // Remainder after the loop 1618 rem := v.AuxInt % bytesPerLoop 1619 1620 dstReg := v.Args[0].Reg() 1621 srcReg := v.Args[1].Reg() 1622 1623 offset := int64(0) 1624 1625 // top of the loop 1626 var top *obj.Prog 1627 1628 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1629 if ctr > 1 { 1630 // Set up the CTR 1631 p := s.Prog(ppc64.AMOVD) 1632 p.From.Type = obj.TYPE_CONST 1633 p.From.Offset = ctr 1634 p.To.Type = obj.TYPE_REG 1635 p.To.Reg = ppc64.REGTMP 1636 1637 p = s.Prog(ppc64.AMOVD) 1638 p.From.Type = obj.TYPE_REG 1639 p.From.Reg = ppc64.REGTMP 1640 p.To.Type = obj.TYPE_REG 1641 p.To.Reg = ppc64.REG_CTR 1642 1643 p = s.Prog(obj.APCALIGN) 1644 p.From.Type = obj.TYPE_CONST 1645 p.From.Offset = 16 1646 1647 // Generate 16 byte loads and stores. 1648 p = s.Prog(ppc64.ALXV) 1649 p.From.Type = obj.TYPE_MEM 1650 p.From.Reg = srcReg 1651 p.From.Offset = offset 1652 p.To.Type = obj.TYPE_REG 1653 p.To.Reg = ppc64.REG_VS32 1654 if top == nil { 1655 top = p 1656 } 1657 p = s.Prog(ppc64.ALXV) 1658 p.From.Type = obj.TYPE_MEM 1659 p.From.Reg = srcReg 1660 p.From.Offset = offset + 16 1661 p.To.Type = obj.TYPE_REG 1662 p.To.Reg = ppc64.REG_VS33 1663 1664 // generate 16 byte stores 1665 p = s.Prog(ppc64.ASTXV) 1666 p.From.Type = obj.TYPE_REG 1667 p.From.Reg = ppc64.REG_VS32 1668 p.To.Type = obj.TYPE_MEM 1669 p.To.Reg = dstReg 1670 p.To.Offset = offset 1671 1672 p = s.Prog(ppc64.ASTXV) 1673 p.From.Type = obj.TYPE_REG 1674 p.From.Reg = ppc64.REG_VS33 1675 p.To.Type = obj.TYPE_MEM 1676 p.To.Reg = dstReg 1677 p.To.Offset = offset + 16 1678 1679 // Generate 16 byte loads and stores. 1680 p = s.Prog(ppc64.ALXV) 1681 p.From.Type = obj.TYPE_MEM 1682 p.From.Reg = srcReg 1683 p.From.Offset = offset + 32 1684 p.To.Type = obj.TYPE_REG 1685 p.To.Reg = ppc64.REG_VS32 1686 1687 p = s.Prog(ppc64.ALXV) 1688 p.From.Type = obj.TYPE_MEM 1689 p.From.Reg = srcReg 1690 p.From.Offset = offset + 48 1691 p.To.Type = obj.TYPE_REG 1692 p.To.Reg = ppc64.REG_VS33 1693 1694 // generate 16 byte stores 1695 p = s.Prog(ppc64.ASTXV) 1696 p.From.Type = obj.TYPE_REG 1697 p.From.Reg = ppc64.REG_VS32 1698 p.To.Type = obj.TYPE_MEM 1699 p.To.Reg = dstReg 1700 p.To.Offset = offset + 32 1701 1702 p = s.Prog(ppc64.ASTXV) 1703 p.From.Type = obj.TYPE_REG 1704 p.From.Reg = ppc64.REG_VS33 1705 p.To.Type = obj.TYPE_MEM 1706 p.To.Reg = dstReg 1707 p.To.Offset = offset + 48 1708 1709 // increment the src reg for next iteration 1710 p = s.Prog(ppc64.AADD) 1711 p.Reg = srcReg 1712 p.From.Type = obj.TYPE_CONST 1713 p.From.Offset = bytesPerLoop 1714 p.To.Type = obj.TYPE_REG 1715 p.To.Reg = srcReg 1716 1717 // increment the dst reg for next iteration 1718 p = s.Prog(ppc64.AADD) 1719 p.Reg = dstReg 1720 p.From.Type = obj.TYPE_CONST 1721 p.From.Offset = bytesPerLoop 1722 p.To.Type = obj.TYPE_REG 1723 p.To.Reg = dstReg 1724 1725 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1726 // to loop top. 1727 p = s.Prog(ppc64.ABC) 1728 p.From.Type = obj.TYPE_CONST 1729 p.From.Offset = ppc64.BO_BCTR 1730 p.Reg = ppc64.REG_CR0LT 1731 p.To.Type = obj.TYPE_BRANCH 1732 p.To.SetTarget(top) 1733 1734 // srcReg and dstReg were incremented in the loop, so 1735 // later instructions start with offset 0. 1736 offset = int64(0) 1737 } 1738 1739 // No loop was generated for one iteration, so 1740 // add 32 bytes to the remainder to move those bytes. 1741 if ctr == 1 { 1742 rem += bytesPerLoop 1743 } 1744 if rem >= 32 { 1745 p := s.Prog(ppc64.ALXV) 1746 p.From.Type = obj.TYPE_MEM 1747 p.From.Reg = srcReg 1748 p.To.Type = obj.TYPE_REG 1749 p.To.Reg = ppc64.REG_VS32 1750 1751 p = s.Prog(ppc64.ALXV) 1752 p.From.Type = obj.TYPE_MEM 1753 p.From.Reg = srcReg 1754 p.From.Offset = 16 1755 p.To.Type = obj.TYPE_REG 1756 p.To.Reg = ppc64.REG_VS33 1757 1758 p = s.Prog(ppc64.ASTXV) 1759 p.From.Type = obj.TYPE_REG 1760 p.From.Reg = ppc64.REG_VS32 1761 p.To.Type = obj.TYPE_MEM 1762 p.To.Reg = dstReg 1763 1764 p = s.Prog(ppc64.ASTXV) 1765 p.From.Type = obj.TYPE_REG 1766 p.From.Reg = ppc64.REG_VS33 1767 p.To.Type = obj.TYPE_MEM 1768 p.To.Reg = dstReg 1769 p.To.Offset = 16 1770 1771 offset = 32 1772 rem -= 32 1773 } 1774 1775 if rem >= 16 { 1776 // Generate 16 byte loads and stores. 1777 p := s.Prog(ppc64.ALXV) 1778 p.From.Type = obj.TYPE_MEM 1779 p.From.Reg = srcReg 1780 p.From.Offset = offset 1781 p.To.Type = obj.TYPE_REG 1782 p.To.Reg = ppc64.REG_VS32 1783 1784 p = s.Prog(ppc64.ASTXV) 1785 p.From.Type = obj.TYPE_REG 1786 p.From.Reg = ppc64.REG_VS32 1787 p.To.Type = obj.TYPE_MEM 1788 p.To.Reg = dstReg 1789 p.To.Offset = offset 1790 1791 offset += 16 1792 rem -= 16 1793 1794 if rem >= 16 { 1795 p := s.Prog(ppc64.ALXV) 1796 p.From.Type = obj.TYPE_MEM 1797 p.From.Reg = srcReg 1798 p.From.Offset = offset 1799 p.To.Type = obj.TYPE_REG 1800 p.To.Reg = ppc64.REG_VS32 1801 1802 p = s.Prog(ppc64.ASTXV) 1803 p.From.Type = obj.TYPE_REG 1804 p.From.Reg = ppc64.REG_VS32 1805 p.To.Type = obj.TYPE_MEM 1806 p.To.Reg = dstReg 1807 p.To.Offset = offset 1808 1809 offset += 16 1810 rem -= 16 1811 } 1812 } 1813 // Generate all the remaining load and store pairs, starting with 1814 // as many 8 byte moves as possible, then 4, 2, 1. 1815 for rem > 0 { 1816 op, size := ppc64.AMOVB, int64(1) 1817 switch { 1818 case rem >= 8: 1819 op, size = ppc64.AMOVD, 8 1820 case rem >= 4: 1821 op, size = ppc64.AMOVWZ, 4 1822 case rem >= 2: 1823 op, size = ppc64.AMOVH, 2 1824 } 1825 // Load 1826 p := s.Prog(op) 1827 p.To.Type = obj.TYPE_REG 1828 p.To.Reg = ppc64.REGTMP 1829 p.From.Type = obj.TYPE_MEM 1830 p.From.Reg = srcReg 1831 p.From.Offset = offset 1832 1833 // Store 1834 p = s.Prog(op) 1835 p.From.Type = obj.TYPE_REG 1836 p.From.Reg = ppc64.REGTMP 1837 p.To.Type = obj.TYPE_MEM 1838 p.To.Reg = dstReg 1839 p.To.Offset = offset 1840 rem -= size 1841 offset += size 1842 } 1843 1844 case ssa.OpPPC64CALLstatic: 1845 s.Call(v) 1846 1847 case ssa.OpPPC64CALLtail: 1848 s.TailCall(v) 1849 1850 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: 1851 p := s.Prog(ppc64.AMOVD) 1852 p.From.Type = obj.TYPE_REG 1853 p.From.Reg = v.Args[0].Reg() 1854 p.To.Type = obj.TYPE_REG 1855 p.To.Reg = ppc64.REG_LR 1856 1857 if v.Args[0].Reg() != ppc64.REG_R12 { 1858 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg) 1859 } 1860 1861 pp := s.Call(v) 1862 1863 // Convert the call into a blrl with hint this is not a subroutine return. 1864 // The full bclrl opcode must be specified when passing a hint. 1865 pp.As = ppc64.ABCL 1866 pp.From.Type = obj.TYPE_CONST 1867 pp.From.Offset = ppc64.BO_ALWAYS 1868 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored. 1869 pp.To.Reg = ppc64.REG_LR 1870 pp.SetFrom3Const(1) 1871 1872 if base.Ctxt.Flag_shared { 1873 // When compiling Go into PIC, the function we just 1874 // called via pointer might have been implemented in 1875 // a separate module and so overwritten the TOC 1876 // pointer in R2; reload it. 1877 q := s.Prog(ppc64.AMOVD) 1878 q.From.Type = obj.TYPE_MEM 1879 q.From.Offset = 24 1880 q.From.Reg = ppc64.REGSP 1881 q.To.Type = obj.TYPE_REG 1882 q.To.Reg = ppc64.REG_R2 1883 } 1884 1885 case ssa.OpPPC64LoweredWB: 1886 p := s.Prog(obj.ACALL) 1887 p.To.Type = obj.TYPE_MEM 1888 p.To.Name = obj.NAME_EXTERN 1889 p.To.Sym = v.Aux.(*obj.LSym) 1890 1891 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC: 1892 p := s.Prog(obj.ACALL) 1893 p.To.Type = obj.TYPE_MEM 1894 p.To.Name = obj.NAME_EXTERN 1895 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] 1896 s.UseArgs(16) // space used in callee args area by assembly stubs 1897 1898 case ssa.OpPPC64LoweredNilCheck: 1899 if buildcfg.GOOS == "aix" { 1900 // CMP Rarg0, R0 1901 // BNE 2(PC) 1902 // STW R0, 0(R0) 1903 // NOP (so the BNE has somewhere to land) 1904 1905 // CMP Rarg0, R0 1906 p := s.Prog(ppc64.ACMP) 1907 p.From.Type = obj.TYPE_REG 1908 p.From.Reg = v.Args[0].Reg() 1909 p.To.Type = obj.TYPE_REG 1910 p.To.Reg = ppc64.REG_R0 1911 1912 // BNE 2(PC) 1913 p2 := s.Prog(ppc64.ABNE) 1914 p2.To.Type = obj.TYPE_BRANCH 1915 1916 // STW R0, 0(R0) 1917 // Write at 0 is forbidden and will trigger a SIGSEGV 1918 p = s.Prog(ppc64.AMOVW) 1919 p.From.Type = obj.TYPE_REG 1920 p.From.Reg = ppc64.REG_R0 1921 p.To.Type = obj.TYPE_MEM 1922 p.To.Reg = ppc64.REG_R0 1923 1924 // NOP (so the BNE has somewhere to land) 1925 nop := s.Prog(obj.ANOP) 1926 p2.To.SetTarget(nop) 1927 1928 } else { 1929 // Issue a load which will fault if arg is nil. 1930 p := s.Prog(ppc64.AMOVBZ) 1931 p.From.Type = obj.TYPE_MEM 1932 p.From.Reg = v.Args[0].Reg() 1933 ssagen.AddAux(&p.From, v) 1934 p.To.Type = obj.TYPE_REG 1935 p.To.Reg = ppc64.REGTMP 1936 } 1937 if logopt.Enabled() { 1938 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 1939 } 1940 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1941 base.WarnfAt(v.Pos, "generated nil check") 1942 } 1943 1944 // These should be resolved by rules and not make it here. 1945 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan, 1946 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual, 1947 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual: 1948 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString()) 1949 case ssa.OpPPC64InvertFlags: 1950 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1951 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: 1952 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1953 case ssa.OpClobber, ssa.OpClobberReg: 1954 // TODO: implement for clobberdead experiment. Nop is ok for now. 1955 default: 1956 v.Fatalf("genValue not implemented: %s", v.LongString()) 1957 } 1958 } 1959 1960 var blockJump = [...]struct { 1961 asm, invasm obj.As 1962 asmeq, invasmun bool 1963 }{ 1964 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false}, 1965 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false}, 1966 1967 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1968 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false}, 1969 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false}, 1970 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1971 1972 // TODO: need to work FP comparisons into block jumps 1973 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1974 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN 1975 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN 1976 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1977 } 1978 1979 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { 1980 switch b.Kind { 1981 case ssa.BlockDefer: 1982 // defer returns in R3: 1983 // 0 if we should continue executing 1984 // 1 if we should jump to deferreturn call 1985 p := s.Prog(ppc64.ACMP) 1986 p.From.Type = obj.TYPE_REG 1987 p.From.Reg = ppc64.REG_R3 1988 p.To.Type = obj.TYPE_REG 1989 p.To.Reg = ppc64.REG_R0 1990 1991 p = s.Prog(ppc64.ABNE) 1992 p.To.Type = obj.TYPE_BRANCH 1993 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) 1994 if b.Succs[0].Block() != next { 1995 p := s.Prog(obj.AJMP) 1996 p.To.Type = obj.TYPE_BRANCH 1997 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 1998 } 1999 2000 case ssa.BlockPlain: 2001 if b.Succs[0].Block() != next { 2002 p := s.Prog(obj.AJMP) 2003 p.To.Type = obj.TYPE_BRANCH 2004 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 2005 } 2006 case ssa.BlockExit, ssa.BlockRetJmp: 2007 case ssa.BlockRet: 2008 s.Prog(obj.ARET) 2009 2010 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE, 2011 ssa.BlockPPC64LT, ssa.BlockPPC64GE, 2012 ssa.BlockPPC64LE, ssa.BlockPPC64GT, 2013 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE, 2014 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT: 2015 jmp := blockJump[b.Kind] 2016 switch next { 2017 case b.Succs[0].Block(): 2018 s.Br(jmp.invasm, b.Succs[1].Block()) 2019 if jmp.invasmun { 2020 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 2021 s.Br(ppc64.ABVS, b.Succs[1].Block()) 2022 } 2023 case b.Succs[1].Block(): 2024 s.Br(jmp.asm, b.Succs[0].Block()) 2025 if jmp.asmeq { 2026 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 2027 } 2028 default: 2029 if b.Likely != ssa.BranchUnlikely { 2030 s.Br(jmp.asm, b.Succs[0].Block()) 2031 if jmp.asmeq { 2032 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 2033 } 2034 s.Br(obj.AJMP, b.Succs[1].Block()) 2035 } else { 2036 s.Br(jmp.invasm, b.Succs[1].Block()) 2037 if jmp.invasmun { 2038 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 2039 s.Br(ppc64.ABVS, b.Succs[1].Block()) 2040 } 2041 s.Br(obj.AJMP, b.Succs[0].Block()) 2042 } 2043 } 2044 default: 2045 b.Fatalf("branch not implemented: %s", b.LongString()) 2046 } 2047 } 2048 2049 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 2050 p := s.Prog(loadByType(t)) 2051 p.From.Type = obj.TYPE_MEM 2052 p.From.Name = obj.NAME_AUTO 2053 p.From.Sym = n.Linksym() 2054 p.From.Offset = n.FrameOffset() + off 2055 p.To.Type = obj.TYPE_REG 2056 p.To.Reg = reg 2057 return p 2058 } 2059 2060 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 2061 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) 2062 p.To.Name = obj.NAME_PARAM 2063 p.To.Sym = n.Linksym() 2064 p.Pos = p.Pos.WithNotStmt() 2065 return p 2066 }