github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/ppc64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ppc64 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 9 "github.com/bir3/gocompiler/src/cmd/compile/internal/ir" 10 "github.com/bir3/gocompiler/src/cmd/compile/internal/logopt" 11 "github.com/bir3/gocompiler/src/cmd/compile/internal/objw" 12 "github.com/bir3/gocompiler/src/cmd/compile/internal/ssa" 13 "github.com/bir3/gocompiler/src/cmd/compile/internal/ssagen" 14 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 15 "github.com/bir3/gocompiler/src/cmd/internal/obj" 16 "github.com/bir3/gocompiler/src/cmd/internal/obj/ppc64" 17 "github.com/bir3/gocompiler/src/internal/buildcfg" 18 "math" 19 "strings" 20 ) 21 22 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. 23 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) { 24 // flive := b.FlagsLiveAtEnd 25 // if b.Control != nil && b.Control.Type.IsFlags() { 26 // flive = true 27 // } 28 // for i := len(b.Values) - 1; i >= 0; i-- { 29 // v := b.Values[i] 30 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) { 31 // // The "mark" is any non-nil Aux value. 32 // v.Aux = v 33 // } 34 // if v.Type.IsFlags() { 35 // flive = false 36 // } 37 // for _, a := range v.Args { 38 // if a.Type.IsFlags() { 39 // flive = true 40 // } 41 // } 42 // } 43 } 44 45 // loadByType returns the load instruction of the given type. 46 func loadByType(t *types.Type) obj.As { 47 if t.IsFloat() { 48 switch t.Size() { 49 case 4: 50 return ppc64.AFMOVS 51 case 8: 52 return ppc64.AFMOVD 53 } 54 } else { 55 switch t.Size() { 56 case 1: 57 if t.IsSigned() { 58 return ppc64.AMOVB 59 } else { 60 return ppc64.AMOVBZ 61 } 62 case 2: 63 if t.IsSigned() { 64 return ppc64.AMOVH 65 } else { 66 return ppc64.AMOVHZ 67 } 68 case 4: 69 if t.IsSigned() { 70 return ppc64.AMOVW 71 } else { 72 return ppc64.AMOVWZ 73 } 74 case 8: 75 return ppc64.AMOVD 76 } 77 } 78 panic("bad load type") 79 } 80 81 // storeByType returns the store instruction of the given type. 82 func storeByType(t *types.Type) obj.As { 83 if t.IsFloat() { 84 switch t.Size() { 85 case 4: 86 return ppc64.AFMOVS 87 case 8: 88 return ppc64.AFMOVD 89 } 90 } else { 91 switch t.Size() { 92 case 1: 93 return ppc64.AMOVB 94 case 2: 95 return ppc64.AMOVH 96 case 4: 97 return ppc64.AMOVW 98 case 8: 99 return ppc64.AMOVD 100 } 101 } 102 panic("bad store type") 103 } 104 105 func ssaGenValue(s *ssagen.State, v *ssa.Value) { 106 switch v.Op { 107 case ssa.OpCopy: 108 t := v.Type 109 if t.IsMemory() { 110 return 111 } 112 x := v.Args[0].Reg() 113 y := v.Reg() 114 if x != y { 115 rt := obj.TYPE_REG 116 op := ppc64.AMOVD 117 118 if t.IsFloat() { 119 op = ppc64.AFMOVD 120 } 121 p := s.Prog(op) 122 p.From.Type = rt 123 p.From.Reg = x 124 p.To.Type = rt 125 p.To.Reg = y 126 } 127 128 case ssa.OpPPC64LoweredAtomicAnd8, 129 ssa.OpPPC64LoweredAtomicAnd32, 130 ssa.OpPPC64LoweredAtomicOr8, 131 ssa.OpPPC64LoweredAtomicOr32: 132 // LWSYNC 133 // LBAR/LWAR (Rarg0), Rtmp 134 // AND/OR Rarg1, Rtmp 135 // STBCCC/STWCCC Rtmp, (Rarg0) 136 // BNE -3(PC) 137 ld := ppc64.ALBAR 138 st := ppc64.ASTBCCC 139 if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 { 140 ld = ppc64.ALWAR 141 st = ppc64.ASTWCCC 142 } 143 r0 := v.Args[0].Reg() 144 r1 := v.Args[1].Reg() 145 // LWSYNC - Assuming shared data not write-through-required nor 146 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 147 plwsync := s.Prog(ppc64.ALWSYNC) 148 plwsync.To.Type = obj.TYPE_NONE 149 // LBAR or LWAR 150 p := s.Prog(ld) 151 p.From.Type = obj.TYPE_MEM 152 p.From.Reg = r0 153 p.To.Type = obj.TYPE_REG 154 p.To.Reg = ppc64.REGTMP 155 // AND/OR reg1,out 156 p1 := s.Prog(v.Op.Asm()) 157 p1.From.Type = obj.TYPE_REG 158 p1.From.Reg = r1 159 p1.To.Type = obj.TYPE_REG 160 p1.To.Reg = ppc64.REGTMP 161 // STBCCC or STWCCC 162 p2 := s.Prog(st) 163 p2.From.Type = obj.TYPE_REG 164 p2.From.Reg = ppc64.REGTMP 165 p2.To.Type = obj.TYPE_MEM 166 p2.To.Reg = r0 167 p2.RegTo2 = ppc64.REGTMP 168 // BNE retry 169 p3 := s.Prog(ppc64.ABNE) 170 p3.To.Type = obj.TYPE_BRANCH 171 p3.To.SetTarget(p) 172 173 case ssa.OpPPC64LoweredAtomicAdd32, 174 ssa.OpPPC64LoweredAtomicAdd64: 175 // LWSYNC 176 // LDAR/LWAR (Rarg0), Rout 177 // ADD Rarg1, Rout 178 // STDCCC/STWCCC Rout, (Rarg0) 179 // BNE -3(PC) 180 // MOVW Rout,Rout (if Add32) 181 ld := ppc64.ALDAR 182 st := ppc64.ASTDCCC 183 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 184 ld = ppc64.ALWAR 185 st = ppc64.ASTWCCC 186 } 187 r0 := v.Args[0].Reg() 188 r1 := v.Args[1].Reg() 189 out := v.Reg0() 190 // LWSYNC - Assuming shared data not write-through-required nor 191 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 192 plwsync := s.Prog(ppc64.ALWSYNC) 193 plwsync.To.Type = obj.TYPE_NONE 194 // LDAR or LWAR 195 p := s.Prog(ld) 196 p.From.Type = obj.TYPE_MEM 197 p.From.Reg = r0 198 p.To.Type = obj.TYPE_REG 199 p.To.Reg = out 200 // ADD reg1,out 201 p1 := s.Prog(ppc64.AADD) 202 p1.From.Type = obj.TYPE_REG 203 p1.From.Reg = r1 204 p1.To.Reg = out 205 p1.To.Type = obj.TYPE_REG 206 // STDCCC or STWCCC 207 p3 := s.Prog(st) 208 p3.From.Type = obj.TYPE_REG 209 p3.From.Reg = out 210 p3.To.Type = obj.TYPE_MEM 211 p3.To.Reg = r0 212 // BNE retry 213 p4 := s.Prog(ppc64.ABNE) 214 p4.To.Type = obj.TYPE_BRANCH 215 p4.To.SetTarget(p) 216 217 // Ensure a 32 bit result 218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 219 p5 := s.Prog(ppc64.AMOVWZ) 220 p5.To.Type = obj.TYPE_REG 221 p5.To.Reg = out 222 p5.From.Type = obj.TYPE_REG 223 p5.From.Reg = out 224 } 225 226 case ssa.OpPPC64LoweredAtomicExchange32, 227 ssa.OpPPC64LoweredAtomicExchange64: 228 // LWSYNC 229 // LDAR/LWAR (Rarg0), Rout 230 // STDCCC/STWCCC Rout, (Rarg0) 231 // BNE -2(PC) 232 // ISYNC 233 ld := ppc64.ALDAR 234 st := ppc64.ASTDCCC 235 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 { 236 ld = ppc64.ALWAR 237 st = ppc64.ASTWCCC 238 } 239 r0 := v.Args[0].Reg() 240 r1 := v.Args[1].Reg() 241 out := v.Reg0() 242 // LWSYNC - Assuming shared data not write-through-required nor 243 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 244 plwsync := s.Prog(ppc64.ALWSYNC) 245 plwsync.To.Type = obj.TYPE_NONE 246 // LDAR or LWAR 247 p := s.Prog(ld) 248 p.From.Type = obj.TYPE_MEM 249 p.From.Reg = r0 250 p.To.Type = obj.TYPE_REG 251 p.To.Reg = out 252 // STDCCC or STWCCC 253 p1 := s.Prog(st) 254 p1.From.Type = obj.TYPE_REG 255 p1.From.Reg = r1 256 p1.To.Type = obj.TYPE_MEM 257 p1.To.Reg = r0 258 // BNE retry 259 p2 := s.Prog(ppc64.ABNE) 260 p2.To.Type = obj.TYPE_BRANCH 261 p2.To.SetTarget(p) 262 // ISYNC 263 pisync := s.Prog(ppc64.AISYNC) 264 pisync.To.Type = obj.TYPE_NONE 265 266 case ssa.OpPPC64LoweredAtomicLoad8, 267 ssa.OpPPC64LoweredAtomicLoad32, 268 ssa.OpPPC64LoweredAtomicLoad64, 269 ssa.OpPPC64LoweredAtomicLoadPtr: 270 // SYNC 271 // MOVB/MOVD/MOVW (Rarg0), Rout 272 // CMP Rout,Rout 273 // BNE 1(PC) 274 // ISYNC 275 ld := ppc64.AMOVD 276 cmp := ppc64.ACMP 277 switch v.Op { 278 case ssa.OpPPC64LoweredAtomicLoad8: 279 ld = ppc64.AMOVBZ 280 case ssa.OpPPC64LoweredAtomicLoad32: 281 ld = ppc64.AMOVWZ 282 cmp = ppc64.ACMPW 283 } 284 arg0 := v.Args[0].Reg() 285 out := v.Reg0() 286 // SYNC when AuxInt == 1; otherwise, load-acquire 287 if v.AuxInt == 1 { 288 psync := s.Prog(ppc64.ASYNC) 289 psync.To.Type = obj.TYPE_NONE 290 } 291 // Load 292 p := s.Prog(ld) 293 p.From.Type = obj.TYPE_MEM 294 p.From.Reg = arg0 295 p.To.Type = obj.TYPE_REG 296 p.To.Reg = out 297 // CMP 298 p1 := s.Prog(cmp) 299 p1.From.Type = obj.TYPE_REG 300 p1.From.Reg = out 301 p1.To.Type = obj.TYPE_REG 302 p1.To.Reg = out 303 // BNE 304 p2 := s.Prog(ppc64.ABNE) 305 p2.To.Type = obj.TYPE_BRANCH 306 // ISYNC 307 pisync := s.Prog(ppc64.AISYNC) 308 pisync.To.Type = obj.TYPE_NONE 309 p2.To.SetTarget(pisync) 310 311 case ssa.OpPPC64LoweredAtomicStore8, 312 ssa.OpPPC64LoweredAtomicStore32, 313 ssa.OpPPC64LoweredAtomicStore64: 314 // SYNC or LWSYNC 315 // MOVB/MOVW/MOVD arg1,(arg0) 316 st := ppc64.AMOVD 317 switch v.Op { 318 case ssa.OpPPC64LoweredAtomicStore8: 319 st = ppc64.AMOVB 320 case ssa.OpPPC64LoweredAtomicStore32: 321 st = ppc64.AMOVW 322 } 323 arg0 := v.Args[0].Reg() 324 arg1 := v.Args[1].Reg() 325 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC 326 // SYNC 327 syncOp := ppc64.ASYNC 328 if v.AuxInt == 0 { 329 syncOp = ppc64.ALWSYNC 330 } 331 psync := s.Prog(syncOp) 332 psync.To.Type = obj.TYPE_NONE 333 // Store 334 p := s.Prog(st) 335 p.To.Type = obj.TYPE_MEM 336 p.To.Reg = arg0 337 p.From.Type = obj.TYPE_REG 338 p.From.Reg = arg1 339 340 case ssa.OpPPC64LoweredAtomicCas64, 341 ssa.OpPPC64LoweredAtomicCas32: 342 // MOVD $0, Rout 343 // LWSYNC 344 // loop: 345 // LDAR (Rarg0), MutexHint, Rtmp 346 // CMP Rarg1, Rtmp 347 // BNE end 348 // STDCCC Rarg2, (Rarg0) 349 // BNE loop 350 // MOVD $1, Rout 351 // end: 352 // LWSYNC // Only for sequential consistency; not required in CasRel. 353 ld := ppc64.ALDAR 354 st := ppc64.ASTDCCC 355 cmp := ppc64.ACMP 356 if v.Op == ssa.OpPPC64LoweredAtomicCas32 { 357 ld = ppc64.ALWAR 358 st = ppc64.ASTWCCC 359 cmp = ppc64.ACMPW 360 } 361 r0 := v.Args[0].Reg() 362 r1 := v.Args[1].Reg() 363 r2 := v.Args[2].Reg() 364 out := v.Reg0() 365 // Initialize return value to false 366 p := s.Prog(ppc64.AMOVD) 367 p.From.Type = obj.TYPE_CONST 368 p.From.Offset = 0 369 p.To.Type = obj.TYPE_REG 370 p.To.Reg = out 371 // LWSYNC - Assuming shared data not write-through-required nor 372 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 373 plwsync1 := s.Prog(ppc64.ALWSYNC) 374 plwsync1.To.Type = obj.TYPE_NONE 375 // LDAR or LWAR 376 p0 := s.Prog(ld) 377 p0.From.Type = obj.TYPE_MEM 378 p0.From.Reg = r0 379 p0.To.Type = obj.TYPE_REG 380 p0.To.Reg = ppc64.REGTMP 381 // If it is a Compare-and-Swap-Release operation, set the EH field with 382 // the release hint. 383 if v.AuxInt == 0 { 384 p0.AddRestSourceConst(0) 385 } 386 // CMP reg1,reg2 387 p1 := s.Prog(cmp) 388 p1.From.Type = obj.TYPE_REG 389 p1.From.Reg = r1 390 p1.To.Reg = ppc64.REGTMP 391 p1.To.Type = obj.TYPE_REG 392 // BNE done with return value = false 393 p2 := s.Prog(ppc64.ABNE) 394 p2.To.Type = obj.TYPE_BRANCH 395 // STDCCC or STWCCC 396 p3 := s.Prog(st) 397 p3.From.Type = obj.TYPE_REG 398 p3.From.Reg = r2 399 p3.To.Type = obj.TYPE_MEM 400 p3.To.Reg = r0 401 // BNE retry 402 p4 := s.Prog(ppc64.ABNE) 403 p4.To.Type = obj.TYPE_BRANCH 404 p4.To.SetTarget(p0) 405 // return value true 406 p5 := s.Prog(ppc64.AMOVD) 407 p5.From.Type = obj.TYPE_CONST 408 p5.From.Offset = 1 409 p5.To.Type = obj.TYPE_REG 410 p5.To.Reg = out 411 // LWSYNC - Assuming shared data not write-through-required nor 412 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b. 413 // If the operation is a CAS-Release, then synchronization is not necessary. 414 if v.AuxInt != 0 { 415 plwsync2 := s.Prog(ppc64.ALWSYNC) 416 plwsync2.To.Type = obj.TYPE_NONE 417 p2.To.SetTarget(plwsync2) 418 } else { 419 // done (label) 420 p6 := s.Prog(obj.ANOP) 421 p2.To.SetTarget(p6) 422 } 423 424 case ssa.OpPPC64LoweredPubBarrier: 425 // LWSYNC 426 s.Prog(v.Op.Asm()) 427 428 case ssa.OpPPC64LoweredGetClosurePtr: 429 // Closure pointer is R11 (already) 430 ssagen.CheckLoweredGetClosurePtr(v) 431 432 case ssa.OpPPC64LoweredGetCallerSP: 433 // caller's SP is FixedFrameSize below the address of the first arg 434 p := s.Prog(ppc64.AMOVD) 435 p.From.Type = obj.TYPE_ADDR 436 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize 437 p.From.Name = obj.NAME_PARAM 438 p.To.Type = obj.TYPE_REG 439 p.To.Reg = v.Reg() 440 441 case ssa.OpPPC64LoweredGetCallerPC: 442 p := s.Prog(obj.AGETCALLERPC) 443 p.To.Type = obj.TYPE_REG 444 p.To.Reg = v.Reg() 445 446 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F: 447 // input is already rounded 448 449 case ssa.OpLoadReg: 450 loadOp := loadByType(v.Type) 451 p := s.Prog(loadOp) 452 ssagen.AddrAuto(&p.From, v.Args[0]) 453 p.To.Type = obj.TYPE_REG 454 p.To.Reg = v.Reg() 455 456 case ssa.OpStoreReg: 457 storeOp := storeByType(v.Type) 458 p := s.Prog(storeOp) 459 p.From.Type = obj.TYPE_REG 460 p.From.Reg = v.Args[0].Reg() 461 ssagen.AddrAuto(&p.To, v) 462 463 case ssa.OpArgIntReg, ssa.OpArgFloatReg: 464 // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill 465 // The loop only runs once. 466 for _, a := range v.Block.Func.RegArgs { 467 // Pass the spill/unspill information along to the assembler, offset by size of 468 // the saved LR slot. 469 addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize) 470 s.FuncInfo().AddSpill( 471 obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)}) 472 } 473 v.Block.Func.RegArgs = nil 474 475 ssagen.CheckArgReg(v) 476 477 case ssa.OpPPC64DIVD: 478 // For now, 479 // 480 // cmp arg1, -1 481 // be ahead 482 // v = arg0 / arg1 483 // b over 484 // ahead: v = - arg0 485 // over: nop 486 r := v.Reg() 487 r0 := v.Args[0].Reg() 488 r1 := v.Args[1].Reg() 489 490 p := s.Prog(ppc64.ACMP) 491 p.From.Type = obj.TYPE_REG 492 p.From.Reg = r1 493 p.To.Type = obj.TYPE_CONST 494 p.To.Offset = -1 495 496 pbahead := s.Prog(ppc64.ABEQ) 497 pbahead.To.Type = obj.TYPE_BRANCH 498 499 p = s.Prog(v.Op.Asm()) 500 p.From.Type = obj.TYPE_REG 501 p.From.Reg = r1 502 p.Reg = r0 503 p.To.Type = obj.TYPE_REG 504 p.To.Reg = r 505 506 pbover := s.Prog(obj.AJMP) 507 pbover.To.Type = obj.TYPE_BRANCH 508 509 p = s.Prog(ppc64.ANEG) 510 p.To.Type = obj.TYPE_REG 511 p.To.Reg = r 512 p.From.Type = obj.TYPE_REG 513 p.From.Reg = r0 514 pbahead.To.SetTarget(p) 515 516 p = s.Prog(obj.ANOP) 517 pbover.To.SetTarget(p) 518 519 case ssa.OpPPC64DIVW: 520 // word-width version of above 521 r := v.Reg() 522 r0 := v.Args[0].Reg() 523 r1 := v.Args[1].Reg() 524 525 p := s.Prog(ppc64.ACMPW) 526 p.From.Type = obj.TYPE_REG 527 p.From.Reg = r1 528 p.To.Type = obj.TYPE_CONST 529 p.To.Offset = -1 530 531 pbahead := s.Prog(ppc64.ABEQ) 532 pbahead.To.Type = obj.TYPE_BRANCH 533 534 p = s.Prog(v.Op.Asm()) 535 p.From.Type = obj.TYPE_REG 536 p.From.Reg = r1 537 p.Reg = r0 538 p.To.Type = obj.TYPE_REG 539 p.To.Reg = r 540 541 pbover := s.Prog(obj.AJMP) 542 pbover.To.Type = obj.TYPE_BRANCH 543 544 p = s.Prog(ppc64.ANEG) 545 p.To.Type = obj.TYPE_REG 546 p.To.Reg = r 547 p.From.Type = obj.TYPE_REG 548 p.From.Reg = r0 549 pbahead.To.SetTarget(p) 550 551 p = s.Prog(obj.ANOP) 552 pbover.To.SetTarget(p) 553 554 case ssa.OpPPC64CLRLSLWI: 555 r := v.Reg() 556 r1 := v.Args[0].Reg() 557 shifts := v.AuxInt 558 p := s.Prog(v.Op.Asm()) 559 // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA 560 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} 561 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts)) 562 p.Reg = r1 563 p.To.Type = obj.TYPE_REG 564 p.To.Reg = r 565 566 case ssa.OpPPC64CLRLSLDI: 567 r := v.Reg() 568 r1 := v.Args[0].Reg() 569 shifts := v.AuxInt 570 p := s.Prog(v.Op.Asm()) 571 // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh 572 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} 573 p.AddRestSourceConst(ssa.GetPPC64Shiftsh(shifts)) 574 p.Reg = r1 575 p.To.Type = obj.TYPE_REG 576 p.To.Reg = r 577 578 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, 579 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, 580 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, 581 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW, 582 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, 583 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN, 584 ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV, 585 ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW: 586 r := v.Reg() 587 r1 := v.Args[0].Reg() 588 r2 := v.Args[1].Reg() 589 p := s.Prog(v.Op.Asm()) 590 p.From.Type = obj.TYPE_REG 591 p.From.Reg = r2 592 p.Reg = r1 593 p.To.Type = obj.TYPE_REG 594 p.To.Reg = r 595 596 case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC, 597 ssa.OpPPC64ANDNCC: 598 r1 := v.Args[0].Reg() 599 r2 := v.Args[1].Reg() 600 p := s.Prog(v.Op.Asm()) 601 p.From.Type = obj.TYPE_REG 602 p.From.Reg = r2 603 p.Reg = r1 604 p.To.Type = obj.TYPE_REG 605 p.To.Reg = v.Reg0() 606 607 case ssa.OpPPC64NEGCC, ssa.OpPPC64CNTLZDCC: 608 p := s.Prog(v.Op.Asm()) 609 p.To.Type = obj.TYPE_REG 610 p.To.Reg = v.Reg0() 611 p.From.Type = obj.TYPE_REG 612 p.From.Reg = v.Args[0].Reg() 613 614 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst: 615 p := s.Prog(v.Op.Asm()) 616 p.From.Type = obj.TYPE_CONST 617 p.From.Offset = v.AuxInt 618 p.Reg = v.Args[0].Reg() 619 p.To.Type = obj.TYPE_REG 620 p.To.Reg = v.Reg() 621 622 // Auxint holds encoded rotate + mask 623 case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI: 624 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt) 625 p := s.Prog(v.Op.Asm()) 626 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 627 p.Reg = v.Args[0].Reg() 628 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(sh)} 629 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}}) 630 // Auxint holds mask 631 632 case ssa.OpPPC64RLDICL, ssa.OpPPC64RLDICR: 633 sh, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt) 634 p := s.Prog(v.Op.Asm()) 635 p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: sh} 636 switch v.Op { 637 case ssa.OpPPC64RLDICL: 638 p.AddRestSourceConst(mb) 639 case ssa.OpPPC64RLDICR: 640 p.AddRestSourceConst(me) 641 } 642 p.Reg = v.Args[0].Reg() 643 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 644 645 case ssa.OpPPC64RLWNM: 646 _, mb, me, _ := ssa.DecodePPC64RotateMask(v.AuxInt) 647 p := s.Prog(v.Op.Asm()) 648 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 649 p.Reg = v.Args[0].Reg() 650 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()} 651 p.AddRestSourceArgs([]obj.Addr{{Type: obj.TYPE_CONST, Offset: mb}, {Type: obj.TYPE_CONST, Offset: me}}) 652 653 case ssa.OpPPC64MADDLD: 654 r := v.Reg() 655 r1 := v.Args[0].Reg() 656 r2 := v.Args[1].Reg() 657 r3 := v.Args[2].Reg() 658 // r = r1*r2 ± r3 659 p := s.Prog(v.Op.Asm()) 660 p.From.Type = obj.TYPE_REG 661 p.From.Reg = r1 662 p.Reg = r2 663 p.AddRestSourceReg(r3) 664 p.To.Type = obj.TYPE_REG 665 p.To.Reg = r 666 667 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS: 668 r := v.Reg() 669 r1 := v.Args[0].Reg() 670 r2 := v.Args[1].Reg() 671 r3 := v.Args[2].Reg() 672 // r = r1*r2 ± r3 673 p := s.Prog(v.Op.Asm()) 674 p.From.Type = obj.TYPE_REG 675 p.From.Reg = r1 676 p.Reg = r3 677 p.AddRestSourceReg(r2) 678 p.To.Type = obj.TYPE_REG 679 p.To.Reg = r 680 681 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, 682 ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, 683 ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, 684 ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD, ssa.OpPPC64BRH, ssa.OpPPC64BRW, ssa.OpPPC64BRD: 685 r := v.Reg() 686 p := s.Prog(v.Op.Asm()) 687 p.To.Type = obj.TYPE_REG 688 p.To.Reg = r 689 p.From.Type = obj.TYPE_REG 690 p.From.Reg = v.Args[0].Reg() 691 692 case ssa.OpPPC64ADDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, 693 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, 694 ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst: 695 p := s.Prog(v.Op.Asm()) 696 p.Reg = v.Args[0].Reg() 697 p.From.Type = obj.TYPE_CONST 698 p.From.Offset = v.AuxInt 699 p.To.Type = obj.TYPE_REG 700 p.To.Reg = v.Reg() 701 702 case ssa.OpPPC64ADDC, ssa.OpPPC64ADDE, ssa.OpPPC64SUBC, ssa.OpPPC64SUBE: 703 r := v.Reg0() // CA is the first, implied argument. 704 r1 := v.Args[0].Reg() 705 r2 := v.Args[1].Reg() 706 p := s.Prog(v.Op.Asm()) 707 p.From.Type = obj.TYPE_REG 708 p.From.Reg = r2 709 p.Reg = r1 710 p.To.Type = obj.TYPE_REG 711 p.To.Reg = r 712 713 case ssa.OpPPC64ADDZEzero, ssa.OpPPC64SUBZEzero: 714 p := s.Prog(v.Op.Asm()) 715 p.From.Type = obj.TYPE_REG 716 p.From.Reg = ppc64.REG_R0 717 p.To.Type = obj.TYPE_REG 718 p.To.Reg = v.Reg() 719 720 case ssa.OpPPC64ADDCconst: 721 p := s.Prog(v.Op.Asm()) 722 p.Reg = v.Args[0].Reg() 723 p.From.Type = obj.TYPE_CONST 724 p.From.Offset = v.AuxInt 725 p.To.Type = obj.TYPE_REG 726 // Output is a pair, the second is the CA, which is implied. 727 p.To.Reg = v.Reg0() 728 729 case ssa.OpPPC64SUBCconst: 730 p := s.Prog(v.Op.Asm()) 731 p.AddRestSourceConst(v.AuxInt) 732 p.From.Type = obj.TYPE_REG 733 p.From.Reg = v.Args[0].Reg() 734 p.To.Type = obj.TYPE_REG 735 p.To.Reg = v.Reg0() 736 737 case ssa.OpPPC64SUBFCconst: 738 p := s.Prog(v.Op.Asm()) 739 p.AddRestSourceConst(v.AuxInt) 740 p.From.Type = obj.TYPE_REG 741 p.From.Reg = v.Args[0].Reg() 742 p.To.Type = obj.TYPE_REG 743 p.To.Reg = v.Reg() 744 745 case ssa.OpPPC64ADDCCconst, ssa.OpPPC64ANDCCconst: 746 p := s.Prog(v.Op.Asm()) 747 p.Reg = v.Args[0].Reg() 748 p.From.Type = obj.TYPE_CONST 749 p.From.Offset = v.AuxInt 750 p.To.Type = obj.TYPE_REG 751 p.To.Reg = v.Reg0() 752 753 case ssa.OpPPC64MOVDaddr: 754 switch v.Aux.(type) { 755 default: 756 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux) 757 case nil: 758 // If aux offset and aux int are both 0, and the same 759 // input and output regs are used, no instruction 760 // needs to be generated, since it would just be 761 // addi rx, rx, 0. 762 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() { 763 p := s.Prog(ppc64.AMOVD) 764 p.From.Type = obj.TYPE_ADDR 765 p.From.Reg = v.Args[0].Reg() 766 p.From.Offset = v.AuxInt 767 p.To.Type = obj.TYPE_REG 768 p.To.Reg = v.Reg() 769 } 770 771 case *obj.LSym, ir.Node: 772 p := s.Prog(ppc64.AMOVD) 773 p.From.Type = obj.TYPE_ADDR 774 p.From.Reg = v.Args[0].Reg() 775 p.To.Type = obj.TYPE_REG 776 p.To.Reg = v.Reg() 777 ssagen.AddAux(&p.From, v) 778 779 } 780 781 case ssa.OpPPC64MOVDconst: 782 p := s.Prog(v.Op.Asm()) 783 p.From.Type = obj.TYPE_CONST 784 p.From.Offset = v.AuxInt 785 p.To.Type = obj.TYPE_REG 786 p.To.Reg = v.Reg() 787 788 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: 789 p := s.Prog(v.Op.Asm()) 790 p.From.Type = obj.TYPE_FCONST 791 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 792 p.To.Type = obj.TYPE_REG 793 p.To.Reg = v.Reg() 794 795 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: 796 p := s.Prog(v.Op.Asm()) 797 p.From.Type = obj.TYPE_REG 798 p.From.Reg = v.Args[0].Reg() 799 p.To.Type = obj.TYPE_REG 800 p.To.Reg = v.Args[1].Reg() 801 802 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: 803 p := s.Prog(v.Op.Asm()) 804 p.From.Type = obj.TYPE_REG 805 p.From.Reg = v.Args[0].Reg() 806 p.To.Type = obj.TYPE_CONST 807 p.To.Offset = v.AuxInt 808 809 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: 810 // Shift in register to required size 811 p := s.Prog(v.Op.Asm()) 812 p.From.Type = obj.TYPE_REG 813 p.From.Reg = v.Args[0].Reg() 814 p.To.Reg = v.Reg() 815 p.To.Type = obj.TYPE_REG 816 817 case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload: 818 819 // MOVDload and MOVWload are DS form instructions that are restricted to 820 // offsets that are a multiple of 4. If the offset is not a multiple of 4, 821 // then the address of the symbol to be loaded is computed (base + offset) 822 // and used as the new base register and the offset field in the instruction 823 // can be set to zero. 824 825 // This same problem can happen with gostrings since the final offset is not 826 // known yet, but could be unaligned after the relocation is resolved. 827 // So gostrings are handled the same way. 828 829 // This allows the MOVDload and MOVWload to be generated in more cases and 830 // eliminates some offset and alignment checking in the rules file. 831 832 fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 833 ssagen.AddAux(&fromAddr, v) 834 835 genAddr := false 836 837 switch fromAddr.Name { 838 case obj.NAME_EXTERN, obj.NAME_STATIC: 839 // Special case for a rule combines the bytes of gostring. 840 // The v alignment might seem OK, but we don't want to load it 841 // using an offset because relocation comes later. 842 genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go:string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0 843 default: 844 genAddr = fromAddr.Offset%4 != 0 845 } 846 if genAddr { 847 // Load full address into the temp register. 848 p := s.Prog(ppc64.AMOVD) 849 p.From.Type = obj.TYPE_ADDR 850 p.From.Reg = v.Args[0].Reg() 851 ssagen.AddAux(&p.From, v) 852 // Load target using temp as base register 853 // and offset zero. Setting NAME_NONE 854 // prevents any extra offsets from being 855 // added. 856 p.To.Type = obj.TYPE_REG 857 p.To.Reg = ppc64.REGTMP 858 fromAddr.Reg = ppc64.REGTMP 859 // Clear the offset field and other 860 // information that might be used 861 // by the assembler to add to the 862 // final offset value. 863 fromAddr.Offset = 0 864 fromAddr.Name = obj.NAME_NONE 865 fromAddr.Sym = nil 866 } 867 p := s.Prog(v.Op.Asm()) 868 p.From = fromAddr 869 p.To.Type = obj.TYPE_REG 870 p.To.Reg = v.Reg() 871 872 case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: 873 p := s.Prog(v.Op.Asm()) 874 p.From.Type = obj.TYPE_MEM 875 p.From.Reg = v.Args[0].Reg() 876 ssagen.AddAux(&p.From, v) 877 p.To.Type = obj.TYPE_REG 878 p.To.Reg = v.Reg() 879 880 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload: 881 p := s.Prog(v.Op.Asm()) 882 p.From.Type = obj.TYPE_MEM 883 p.From.Reg = v.Args[0].Reg() 884 p.To.Type = obj.TYPE_REG 885 p.To.Reg = v.Reg() 886 887 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore: 888 p := s.Prog(v.Op.Asm()) 889 p.To.Type = obj.TYPE_MEM 890 p.To.Reg = v.Args[0].Reg() 891 p.From.Type = obj.TYPE_REG 892 p.From.Reg = v.Args[1].Reg() 893 894 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx, 895 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx, 896 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx: 897 p := s.Prog(v.Op.Asm()) 898 p.From.Type = obj.TYPE_MEM 899 p.From.Reg = v.Args[0].Reg() 900 p.From.Index = v.Args[1].Reg() 901 p.To.Type = obj.TYPE_REG 902 p.To.Reg = v.Reg() 903 904 case ssa.OpPPC64DCBT: 905 p := s.Prog(v.Op.Asm()) 906 p.From.Type = obj.TYPE_MEM 907 p.From.Reg = v.Args[0].Reg() 908 p.To.Type = obj.TYPE_CONST 909 p.To.Offset = v.AuxInt 910 911 case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: 912 p := s.Prog(v.Op.Asm()) 913 p.From.Type = obj.TYPE_REG 914 p.From.Reg = ppc64.REGZERO 915 p.To.Type = obj.TYPE_MEM 916 p.To.Reg = v.Args[0].Reg() 917 ssagen.AddAux(&p.To, v) 918 919 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero: 920 921 // MOVDstore and MOVDstorezero become DS form instructions that are restricted 922 // to offset values that are a multiple of 4. If the offset field is not a 923 // multiple of 4, then the full address of the store target is computed (base + 924 // offset) and used as the new base register and the offset in the instruction 925 // is set to 0. 926 927 // This allows the MOVDstore and MOVDstorezero to be generated in more cases, 928 // and prevents checking of the offset value and alignment in the rules. 929 930 toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} 931 ssagen.AddAux(&toAddr, v) 932 933 if toAddr.Offset%4 != 0 { 934 p := s.Prog(ppc64.AMOVD) 935 p.From.Type = obj.TYPE_ADDR 936 p.From.Reg = v.Args[0].Reg() 937 ssagen.AddAux(&p.From, v) 938 p.To.Type = obj.TYPE_REG 939 p.To.Reg = ppc64.REGTMP 940 toAddr.Reg = ppc64.REGTMP 941 // Clear the offset field and other 942 // information that might be used 943 // by the assembler to add to the 944 // final offset value. 945 toAddr.Offset = 0 946 toAddr.Name = obj.NAME_NONE 947 toAddr.Sym = nil 948 } 949 p := s.Prog(v.Op.Asm()) 950 p.To = toAddr 951 p.From.Type = obj.TYPE_REG 952 if v.Op == ssa.OpPPC64MOVDstorezero { 953 p.From.Reg = ppc64.REGZERO 954 } else { 955 p.From.Reg = v.Args[1].Reg() 956 } 957 958 case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: 959 p := s.Prog(v.Op.Asm()) 960 p.From.Type = obj.TYPE_REG 961 p.From.Reg = v.Args[1].Reg() 962 p.To.Type = obj.TYPE_MEM 963 p.To.Reg = v.Args[0].Reg() 964 ssagen.AddAux(&p.To, v) 965 966 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx, 967 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx, 968 ssa.OpPPC64MOVHBRstoreidx: 969 p := s.Prog(v.Op.Asm()) 970 p.From.Type = obj.TYPE_REG 971 p.From.Reg = v.Args[2].Reg() 972 p.To.Index = v.Args[1].Reg() 973 p.To.Type = obj.TYPE_MEM 974 p.To.Reg = v.Args[0].Reg() 975 976 case ssa.OpPPC64ISEL, ssa.OpPPC64ISELZ: 977 // ISEL AuxInt ? arg0 : arg1 978 // ISELZ is a special case of ISEL where arg1 is implicitly $0. 979 // 980 // AuxInt value indicates conditions 0=LT 1=GT 2=EQ 3=SO 4=GE 5=LE 6=NE 7=NSO. 981 // ISEL accepts a CR bit argument, not a condition as expressed by AuxInt. 982 // Convert the condition to a CR bit argument by the following conversion: 983 // 984 // AuxInt&3 ? arg0 : arg1 for conditions LT, GT, EQ, SO 985 // AuxInt&3 ? arg1 : arg0 for conditions GE, LE, NE, NSO 986 p := s.Prog(v.Op.Asm()) 987 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()} 988 p.Reg = v.Args[0].Reg() 989 if v.Op == ssa.OpPPC64ISEL { 990 p.AddRestSourceReg(v.Args[1].Reg()) 991 } else { 992 p.AddRestSourceReg(ppc64.REG_R0) 993 } 994 // AuxInt values 4,5,6 implemented with reverse operand order from 0,1,2 995 if v.AuxInt > 3 { 996 p.Reg, p.GetFrom3().Reg = p.GetFrom3().Reg, p.Reg 997 } 998 p.From.SetConst(v.AuxInt & 3) 999 1000 case ssa.OpPPC64SETBC, ssa.OpPPC64SETBCR: 1001 p := s.Prog(v.Op.Asm()) 1002 p.To.Type = obj.TYPE_REG 1003 p.To.Reg = v.Reg() 1004 p.From.Type = obj.TYPE_REG 1005 p.From.Reg = int16(ppc64.REG_CR0LT + v.AuxInt) 1006 1007 case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort: 1008 // The LoweredQuad code generation 1009 // generates STXV instructions on 1010 // power9. The Short variation is used 1011 // if no loop is generated. 1012 1013 // sizes >= 64 generate a loop as follows: 1014 1015 // Set up loop counter in CTR, used by BC 1016 // XXLXOR clears VS32 1017 // XXLXOR VS32,VS32,VS32 1018 // MOVD len/64,REG_TMP 1019 // MOVD REG_TMP,CTR 1020 // loop: 1021 // STXV VS32,0(R20) 1022 // STXV VS32,16(R20) 1023 // STXV VS32,32(R20) 1024 // STXV VS32,48(R20) 1025 // ADD $64,R20 1026 // BC 16, 0, loop 1027 1028 // Bytes per iteration 1029 ctr := v.AuxInt / 64 1030 1031 // Remainder bytes 1032 rem := v.AuxInt % 64 1033 1034 // Only generate a loop if there is more 1035 // than 1 iteration. 1036 if ctr > 1 { 1037 // Set up VS32 (V0) to hold 0s 1038 p := s.Prog(ppc64.AXXLXOR) 1039 p.From.Type = obj.TYPE_REG 1040 p.From.Reg = ppc64.REG_VS32 1041 p.To.Type = obj.TYPE_REG 1042 p.To.Reg = ppc64.REG_VS32 1043 p.Reg = ppc64.REG_VS32 1044 1045 // Set up CTR loop counter 1046 p = s.Prog(ppc64.AMOVD) 1047 p.From.Type = obj.TYPE_CONST 1048 p.From.Offset = ctr 1049 p.To.Type = obj.TYPE_REG 1050 p.To.Reg = ppc64.REGTMP 1051 1052 p = s.Prog(ppc64.AMOVD) 1053 p.From.Type = obj.TYPE_REG 1054 p.From.Reg = ppc64.REGTMP 1055 p.To.Type = obj.TYPE_REG 1056 p.To.Reg = ppc64.REG_CTR 1057 1058 // Don't generate padding for 1059 // loops with few iterations. 1060 if ctr > 3 { 1061 p = s.Prog(obj.APCALIGN) 1062 p.From.Type = obj.TYPE_CONST 1063 p.From.Offset = 16 1064 } 1065 1066 // generate 4 STXVs to zero 64 bytes 1067 var top *obj.Prog 1068 1069 p = s.Prog(ppc64.ASTXV) 1070 p.From.Type = obj.TYPE_REG 1071 p.From.Reg = ppc64.REG_VS32 1072 p.To.Type = obj.TYPE_MEM 1073 p.To.Reg = v.Args[0].Reg() 1074 1075 // Save the top of loop 1076 if top == nil { 1077 top = p 1078 } 1079 p = s.Prog(ppc64.ASTXV) 1080 p.From.Type = obj.TYPE_REG 1081 p.From.Reg = ppc64.REG_VS32 1082 p.To.Type = obj.TYPE_MEM 1083 p.To.Reg = v.Args[0].Reg() 1084 p.To.Offset = 16 1085 1086 p = s.Prog(ppc64.ASTXV) 1087 p.From.Type = obj.TYPE_REG 1088 p.From.Reg = ppc64.REG_VS32 1089 p.To.Type = obj.TYPE_MEM 1090 p.To.Reg = v.Args[0].Reg() 1091 p.To.Offset = 32 1092 1093 p = s.Prog(ppc64.ASTXV) 1094 p.From.Type = obj.TYPE_REG 1095 p.From.Reg = ppc64.REG_VS32 1096 p.To.Type = obj.TYPE_MEM 1097 p.To.Reg = v.Args[0].Reg() 1098 p.To.Offset = 48 1099 1100 // Increment address for the 1101 // 64 bytes just zeroed. 1102 p = s.Prog(ppc64.AADD) 1103 p.Reg = v.Args[0].Reg() 1104 p.From.Type = obj.TYPE_CONST 1105 p.From.Offset = 64 1106 p.To.Type = obj.TYPE_REG 1107 p.To.Reg = v.Args[0].Reg() 1108 1109 // Branch back to top of loop 1110 // based on CTR 1111 // BC with BO_BCTR generates bdnz 1112 p = s.Prog(ppc64.ABC) 1113 p.From.Type = obj.TYPE_CONST 1114 p.From.Offset = ppc64.BO_BCTR 1115 p.Reg = ppc64.REG_CR0LT 1116 p.To.Type = obj.TYPE_BRANCH 1117 p.To.SetTarget(top) 1118 } 1119 // When ctr == 1 the loop was not generated but 1120 // there are at least 64 bytes to clear, so add 1121 // that to the remainder to generate the code 1122 // to clear those doublewords 1123 if ctr == 1 { 1124 rem += 64 1125 } 1126 1127 // Clear the remainder starting at offset zero 1128 offset := int64(0) 1129 1130 if rem >= 16 && ctr <= 1 { 1131 // If the XXLXOR hasn't already been 1132 // generated, do it here to initialize 1133 // VS32 (V0) to 0. 1134 p := s.Prog(ppc64.AXXLXOR) 1135 p.From.Type = obj.TYPE_REG 1136 p.From.Reg = ppc64.REG_VS32 1137 p.To.Type = obj.TYPE_REG 1138 p.To.Reg = ppc64.REG_VS32 1139 p.Reg = ppc64.REG_VS32 1140 } 1141 // Generate STXV for 32 or 64 1142 // bytes. 1143 for rem >= 32 { 1144 p := s.Prog(ppc64.ASTXV) 1145 p.From.Type = obj.TYPE_REG 1146 p.From.Reg = ppc64.REG_VS32 1147 p.To.Type = obj.TYPE_MEM 1148 p.To.Reg = v.Args[0].Reg() 1149 p.To.Offset = offset 1150 1151 p = s.Prog(ppc64.ASTXV) 1152 p.From.Type = obj.TYPE_REG 1153 p.From.Reg = ppc64.REG_VS32 1154 p.To.Type = obj.TYPE_MEM 1155 p.To.Reg = v.Args[0].Reg() 1156 p.To.Offset = offset + 16 1157 offset += 32 1158 rem -= 32 1159 } 1160 // Generate 16 bytes 1161 if rem >= 16 { 1162 p := s.Prog(ppc64.ASTXV) 1163 p.From.Type = obj.TYPE_REG 1164 p.From.Reg = ppc64.REG_VS32 1165 p.To.Type = obj.TYPE_MEM 1166 p.To.Reg = v.Args[0].Reg() 1167 p.To.Offset = offset 1168 offset += 16 1169 rem -= 16 1170 } 1171 1172 // first clear as many doublewords as possible 1173 // then clear remaining sizes as available 1174 for rem > 0 { 1175 op, size := ppc64.AMOVB, int64(1) 1176 switch { 1177 case rem >= 8: 1178 op, size = ppc64.AMOVD, 8 1179 case rem >= 4: 1180 op, size = ppc64.AMOVW, 4 1181 case rem >= 2: 1182 op, size = ppc64.AMOVH, 2 1183 } 1184 p := s.Prog(op) 1185 p.From.Type = obj.TYPE_REG 1186 p.From.Reg = ppc64.REG_R0 1187 p.To.Type = obj.TYPE_MEM 1188 p.To.Reg = v.Args[0].Reg() 1189 p.To.Offset = offset 1190 rem -= size 1191 offset += size 1192 } 1193 1194 case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort: 1195 1196 // Unaligned data doesn't hurt performance 1197 // for these instructions on power8. 1198 1199 // For sizes >= 64 generate a loop as follows: 1200 1201 // Set up loop counter in CTR, used by BC 1202 // XXLXOR VS32,VS32,VS32 1203 // MOVD len/32,REG_TMP 1204 // MOVD REG_TMP,CTR 1205 // MOVD $16,REG_TMP 1206 // loop: 1207 // STXVD2X VS32,(R0)(R20) 1208 // STXVD2X VS32,(R31)(R20) 1209 // ADD $32,R20 1210 // BC 16, 0, loop 1211 // 1212 // any remainder is done as described below 1213 1214 // for sizes < 64 bytes, first clear as many doublewords as possible, 1215 // then handle the remainder 1216 // MOVD R0,(R20) 1217 // MOVD R0,8(R20) 1218 // .... etc. 1219 // 1220 // the remainder bytes are cleared using one or more 1221 // of the following instructions with the appropriate 1222 // offsets depending which instructions are needed 1223 // 1224 // MOVW R0,n1(R20) 4 bytes 1225 // MOVH R0,n2(R20) 2 bytes 1226 // MOVB R0,n3(R20) 1 byte 1227 // 1228 // 7 bytes: MOVW, MOVH, MOVB 1229 // 6 bytes: MOVW, MOVH 1230 // 5 bytes: MOVW, MOVB 1231 // 3 bytes: MOVH, MOVB 1232 1233 // each loop iteration does 32 bytes 1234 ctr := v.AuxInt / 32 1235 1236 // remainder bytes 1237 rem := v.AuxInt % 32 1238 1239 // only generate a loop if there is more 1240 // than 1 iteration. 1241 if ctr > 1 { 1242 // Set up VS32 (V0) to hold 0s 1243 p := s.Prog(ppc64.AXXLXOR) 1244 p.From.Type = obj.TYPE_REG 1245 p.From.Reg = ppc64.REG_VS32 1246 p.To.Type = obj.TYPE_REG 1247 p.To.Reg = ppc64.REG_VS32 1248 p.Reg = ppc64.REG_VS32 1249 1250 // Set up CTR loop counter 1251 p = s.Prog(ppc64.AMOVD) 1252 p.From.Type = obj.TYPE_CONST 1253 p.From.Offset = ctr 1254 p.To.Type = obj.TYPE_REG 1255 p.To.Reg = ppc64.REGTMP 1256 1257 p = s.Prog(ppc64.AMOVD) 1258 p.From.Type = obj.TYPE_REG 1259 p.From.Reg = ppc64.REGTMP 1260 p.To.Type = obj.TYPE_REG 1261 p.To.Reg = ppc64.REG_CTR 1262 1263 // Set up R31 to hold index value 16 1264 p = s.Prog(ppc64.AMOVD) 1265 p.From.Type = obj.TYPE_CONST 1266 p.From.Offset = 16 1267 p.To.Type = obj.TYPE_REG 1268 p.To.Reg = ppc64.REGTMP 1269 1270 // Don't add padding for alignment 1271 // with few loop iterations. 1272 if ctr > 3 { 1273 p = s.Prog(obj.APCALIGN) 1274 p.From.Type = obj.TYPE_CONST 1275 p.From.Offset = 16 1276 } 1277 1278 // generate 2 STXVD2Xs to store 16 bytes 1279 // when this is a loop then the top must be saved 1280 var top *obj.Prog 1281 // This is the top of loop 1282 1283 p = s.Prog(ppc64.ASTXVD2X) 1284 p.From.Type = obj.TYPE_REG 1285 p.From.Reg = ppc64.REG_VS32 1286 p.To.Type = obj.TYPE_MEM 1287 p.To.Reg = v.Args[0].Reg() 1288 p.To.Index = ppc64.REGZERO 1289 // Save the top of loop 1290 if top == nil { 1291 top = p 1292 } 1293 p = s.Prog(ppc64.ASTXVD2X) 1294 p.From.Type = obj.TYPE_REG 1295 p.From.Reg = ppc64.REG_VS32 1296 p.To.Type = obj.TYPE_MEM 1297 p.To.Reg = v.Args[0].Reg() 1298 p.To.Index = ppc64.REGTMP 1299 1300 // Increment address for the 1301 // 4 doublewords just zeroed. 1302 p = s.Prog(ppc64.AADD) 1303 p.Reg = v.Args[0].Reg() 1304 p.From.Type = obj.TYPE_CONST 1305 p.From.Offset = 32 1306 p.To.Type = obj.TYPE_REG 1307 p.To.Reg = v.Args[0].Reg() 1308 1309 // Branch back to top of loop 1310 // based on CTR 1311 // BC with BO_BCTR generates bdnz 1312 p = s.Prog(ppc64.ABC) 1313 p.From.Type = obj.TYPE_CONST 1314 p.From.Offset = ppc64.BO_BCTR 1315 p.Reg = ppc64.REG_CR0LT 1316 p.To.Type = obj.TYPE_BRANCH 1317 p.To.SetTarget(top) 1318 } 1319 1320 // when ctr == 1 the loop was not generated but 1321 // there are at least 32 bytes to clear, so add 1322 // that to the remainder to generate the code 1323 // to clear those doublewords 1324 if ctr == 1 { 1325 rem += 32 1326 } 1327 1328 // clear the remainder starting at offset zero 1329 offset := int64(0) 1330 1331 // first clear as many doublewords as possible 1332 // then clear remaining sizes as available 1333 for rem > 0 { 1334 op, size := ppc64.AMOVB, int64(1) 1335 switch { 1336 case rem >= 8: 1337 op, size = ppc64.AMOVD, 8 1338 case rem >= 4: 1339 op, size = ppc64.AMOVW, 4 1340 case rem >= 2: 1341 op, size = ppc64.AMOVH, 2 1342 } 1343 p := s.Prog(op) 1344 p.From.Type = obj.TYPE_REG 1345 p.From.Reg = ppc64.REG_R0 1346 p.To.Type = obj.TYPE_MEM 1347 p.To.Reg = v.Args[0].Reg() 1348 p.To.Offset = offset 1349 rem -= size 1350 offset += size 1351 } 1352 1353 case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort: 1354 1355 bytesPerLoop := int64(32) 1356 // This will be used when moving more 1357 // than 8 bytes. Moves start with 1358 // as many 8 byte moves as possible, then 1359 // 4, 2, or 1 byte(s) as remaining. This will 1360 // work and be efficient for power8 or later. 1361 // If there are 64 or more bytes, then a 1362 // loop is generated to move 32 bytes and 1363 // update the src and dst addresses on each 1364 // iteration. When < 64 bytes, the appropriate 1365 // number of moves are generated based on the 1366 // size. 1367 // When moving >= 64 bytes a loop is used 1368 // MOVD len/32,REG_TMP 1369 // MOVD REG_TMP,CTR 1370 // MOVD $16,REG_TMP 1371 // top: 1372 // LXVD2X (R0)(R21),VS32 1373 // LXVD2X (R31)(R21),VS33 1374 // ADD $32,R21 1375 // STXVD2X VS32,(R0)(R20) 1376 // STXVD2X VS33,(R31)(R20) 1377 // ADD $32,R20 1378 // BC 16,0,top 1379 // Bytes not moved by this loop are moved 1380 // with a combination of the following instructions, 1381 // starting with the largest sizes and generating as 1382 // many as needed, using the appropriate offset value. 1383 // MOVD n(R21),R31 1384 // MOVD R31,n(R20) 1385 // MOVW n1(R21),R31 1386 // MOVW R31,n1(R20) 1387 // MOVH n2(R21),R31 1388 // MOVH R31,n2(R20) 1389 // MOVB n3(R21),R31 1390 // MOVB R31,n3(R20) 1391 1392 // Each loop iteration moves 32 bytes 1393 ctr := v.AuxInt / bytesPerLoop 1394 1395 // Remainder after the loop 1396 rem := v.AuxInt % bytesPerLoop 1397 1398 dstReg := v.Args[0].Reg() 1399 srcReg := v.Args[1].Reg() 1400 1401 // The set of registers used here, must match the clobbered reg list 1402 // in PPC64Ops.go. 1403 offset := int64(0) 1404 1405 // top of the loop 1406 var top *obj.Prog 1407 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1408 if ctr > 1 { 1409 // Set up the CTR 1410 p := s.Prog(ppc64.AMOVD) 1411 p.From.Type = obj.TYPE_CONST 1412 p.From.Offset = ctr 1413 p.To.Type = obj.TYPE_REG 1414 p.To.Reg = ppc64.REGTMP 1415 1416 p = s.Prog(ppc64.AMOVD) 1417 p.From.Type = obj.TYPE_REG 1418 p.From.Reg = ppc64.REGTMP 1419 p.To.Type = obj.TYPE_REG 1420 p.To.Reg = ppc64.REG_CTR 1421 1422 // Use REGTMP as index reg 1423 p = s.Prog(ppc64.AMOVD) 1424 p.From.Type = obj.TYPE_CONST 1425 p.From.Offset = 16 1426 p.To.Type = obj.TYPE_REG 1427 p.To.Reg = ppc64.REGTMP 1428 1429 // Don't adding padding for 1430 // alignment with small iteration 1431 // counts. 1432 if ctr > 3 { 1433 p = s.Prog(obj.APCALIGN) 1434 p.From.Type = obj.TYPE_CONST 1435 p.From.Offset = 16 1436 } 1437 1438 // Generate 16 byte loads and stores. 1439 // Use temp register for index (16) 1440 // on the second one. 1441 1442 p = s.Prog(ppc64.ALXVD2X) 1443 p.From.Type = obj.TYPE_MEM 1444 p.From.Reg = srcReg 1445 p.From.Index = ppc64.REGZERO 1446 p.To.Type = obj.TYPE_REG 1447 p.To.Reg = ppc64.REG_VS32 1448 if top == nil { 1449 top = p 1450 } 1451 p = s.Prog(ppc64.ALXVD2X) 1452 p.From.Type = obj.TYPE_MEM 1453 p.From.Reg = srcReg 1454 p.From.Index = ppc64.REGTMP 1455 p.To.Type = obj.TYPE_REG 1456 p.To.Reg = ppc64.REG_VS33 1457 1458 // increment the src reg for next iteration 1459 p = s.Prog(ppc64.AADD) 1460 p.Reg = srcReg 1461 p.From.Type = obj.TYPE_CONST 1462 p.From.Offset = bytesPerLoop 1463 p.To.Type = obj.TYPE_REG 1464 p.To.Reg = srcReg 1465 1466 // generate 16 byte stores 1467 p = s.Prog(ppc64.ASTXVD2X) 1468 p.From.Type = obj.TYPE_REG 1469 p.From.Reg = ppc64.REG_VS32 1470 p.To.Type = obj.TYPE_MEM 1471 p.To.Reg = dstReg 1472 p.To.Index = ppc64.REGZERO 1473 1474 p = s.Prog(ppc64.ASTXVD2X) 1475 p.From.Type = obj.TYPE_REG 1476 p.From.Reg = ppc64.REG_VS33 1477 p.To.Type = obj.TYPE_MEM 1478 p.To.Reg = dstReg 1479 p.To.Index = ppc64.REGTMP 1480 1481 // increment the dst reg for next iteration 1482 p = s.Prog(ppc64.AADD) 1483 p.Reg = dstReg 1484 p.From.Type = obj.TYPE_CONST 1485 p.From.Offset = bytesPerLoop 1486 p.To.Type = obj.TYPE_REG 1487 p.To.Reg = dstReg 1488 1489 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1490 // to loop top. 1491 p = s.Prog(ppc64.ABC) 1492 p.From.Type = obj.TYPE_CONST 1493 p.From.Offset = ppc64.BO_BCTR 1494 p.Reg = ppc64.REG_CR0LT 1495 p.To.Type = obj.TYPE_BRANCH 1496 p.To.SetTarget(top) 1497 1498 // srcReg and dstReg were incremented in the loop, so 1499 // later instructions start with offset 0. 1500 offset = int64(0) 1501 } 1502 1503 // No loop was generated for one iteration, so 1504 // add 32 bytes to the remainder to move those bytes. 1505 if ctr == 1 { 1506 rem += bytesPerLoop 1507 } 1508 1509 if rem >= 16 { 1510 // Generate 16 byte loads and stores. 1511 // Use temp register for index (value 16) 1512 // on the second one. 1513 p := s.Prog(ppc64.ALXVD2X) 1514 p.From.Type = obj.TYPE_MEM 1515 p.From.Reg = srcReg 1516 p.From.Index = ppc64.REGZERO 1517 p.To.Type = obj.TYPE_REG 1518 p.To.Reg = ppc64.REG_VS32 1519 1520 p = s.Prog(ppc64.ASTXVD2X) 1521 p.From.Type = obj.TYPE_REG 1522 p.From.Reg = ppc64.REG_VS32 1523 p.To.Type = obj.TYPE_MEM 1524 p.To.Reg = dstReg 1525 p.To.Index = ppc64.REGZERO 1526 1527 offset = 16 1528 rem -= 16 1529 1530 if rem >= 16 { 1531 // Use REGTMP as index reg 1532 p := s.Prog(ppc64.AMOVD) 1533 p.From.Type = obj.TYPE_CONST 1534 p.From.Offset = 16 1535 p.To.Type = obj.TYPE_REG 1536 p.To.Reg = ppc64.REGTMP 1537 1538 p = s.Prog(ppc64.ALXVD2X) 1539 p.From.Type = obj.TYPE_MEM 1540 p.From.Reg = srcReg 1541 p.From.Index = ppc64.REGTMP 1542 p.To.Type = obj.TYPE_REG 1543 p.To.Reg = ppc64.REG_VS32 1544 1545 p = s.Prog(ppc64.ASTXVD2X) 1546 p.From.Type = obj.TYPE_REG 1547 p.From.Reg = ppc64.REG_VS32 1548 p.To.Type = obj.TYPE_MEM 1549 p.To.Reg = dstReg 1550 p.To.Index = ppc64.REGTMP 1551 1552 offset = 32 1553 rem -= 16 1554 } 1555 } 1556 1557 // Generate all the remaining load and store pairs, starting with 1558 // as many 8 byte moves as possible, then 4, 2, 1. 1559 for rem > 0 { 1560 op, size := ppc64.AMOVB, int64(1) 1561 switch { 1562 case rem >= 8: 1563 op, size = ppc64.AMOVD, 8 1564 case rem >= 4: 1565 op, size = ppc64.AMOVWZ, 4 1566 case rem >= 2: 1567 op, size = ppc64.AMOVH, 2 1568 } 1569 // Load 1570 p := s.Prog(op) 1571 p.To.Type = obj.TYPE_REG 1572 p.To.Reg = ppc64.REGTMP 1573 p.From.Type = obj.TYPE_MEM 1574 p.From.Reg = srcReg 1575 p.From.Offset = offset 1576 1577 // Store 1578 p = s.Prog(op) 1579 p.From.Type = obj.TYPE_REG 1580 p.From.Reg = ppc64.REGTMP 1581 p.To.Type = obj.TYPE_MEM 1582 p.To.Reg = dstReg 1583 p.To.Offset = offset 1584 rem -= size 1585 offset += size 1586 } 1587 1588 case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort: 1589 bytesPerLoop := int64(64) 1590 // This is used when moving more 1591 // than 8 bytes on power9. Moves start with 1592 // as many 8 byte moves as possible, then 1593 // 4, 2, or 1 byte(s) as remaining. This will 1594 // work and be efficient for power8 or later. 1595 // If there are 64 or more bytes, then a 1596 // loop is generated to move 32 bytes and 1597 // update the src and dst addresses on each 1598 // iteration. When < 64 bytes, the appropriate 1599 // number of moves are generated based on the 1600 // size. 1601 // When moving >= 64 bytes a loop is used 1602 // MOVD len/32,REG_TMP 1603 // MOVD REG_TMP,CTR 1604 // top: 1605 // LXV 0(R21),VS32 1606 // LXV 16(R21),VS33 1607 // ADD $32,R21 1608 // STXV VS32,0(R20) 1609 // STXV VS33,16(R20) 1610 // ADD $32,R20 1611 // BC 16,0,top 1612 // Bytes not moved by this loop are moved 1613 // with a combination of the following instructions, 1614 // starting with the largest sizes and generating as 1615 // many as needed, using the appropriate offset value. 1616 // MOVD n(R21),R31 1617 // MOVD R31,n(R20) 1618 // MOVW n1(R21),R31 1619 // MOVW R31,n1(R20) 1620 // MOVH n2(R21),R31 1621 // MOVH R31,n2(R20) 1622 // MOVB n3(R21),R31 1623 // MOVB R31,n3(R20) 1624 1625 // Each loop iteration moves 32 bytes 1626 ctr := v.AuxInt / bytesPerLoop 1627 1628 // Remainder after the loop 1629 rem := v.AuxInt % bytesPerLoop 1630 1631 dstReg := v.Args[0].Reg() 1632 srcReg := v.Args[1].Reg() 1633 1634 offset := int64(0) 1635 1636 // top of the loop 1637 var top *obj.Prog 1638 1639 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1640 if ctr > 1 { 1641 // Set up the CTR 1642 p := s.Prog(ppc64.AMOVD) 1643 p.From.Type = obj.TYPE_CONST 1644 p.From.Offset = ctr 1645 p.To.Type = obj.TYPE_REG 1646 p.To.Reg = ppc64.REGTMP 1647 1648 p = s.Prog(ppc64.AMOVD) 1649 p.From.Type = obj.TYPE_REG 1650 p.From.Reg = ppc64.REGTMP 1651 p.To.Type = obj.TYPE_REG 1652 p.To.Reg = ppc64.REG_CTR 1653 1654 p = s.Prog(obj.APCALIGN) 1655 p.From.Type = obj.TYPE_CONST 1656 p.From.Offset = 16 1657 1658 // Generate 16 byte loads and stores. 1659 p = s.Prog(ppc64.ALXV) 1660 p.From.Type = obj.TYPE_MEM 1661 p.From.Reg = srcReg 1662 p.From.Offset = offset 1663 p.To.Type = obj.TYPE_REG 1664 p.To.Reg = ppc64.REG_VS32 1665 if top == nil { 1666 top = p 1667 } 1668 p = s.Prog(ppc64.ALXV) 1669 p.From.Type = obj.TYPE_MEM 1670 p.From.Reg = srcReg 1671 p.From.Offset = offset + 16 1672 p.To.Type = obj.TYPE_REG 1673 p.To.Reg = ppc64.REG_VS33 1674 1675 // generate 16 byte stores 1676 p = s.Prog(ppc64.ASTXV) 1677 p.From.Type = obj.TYPE_REG 1678 p.From.Reg = ppc64.REG_VS32 1679 p.To.Type = obj.TYPE_MEM 1680 p.To.Reg = dstReg 1681 p.To.Offset = offset 1682 1683 p = s.Prog(ppc64.ASTXV) 1684 p.From.Type = obj.TYPE_REG 1685 p.From.Reg = ppc64.REG_VS33 1686 p.To.Type = obj.TYPE_MEM 1687 p.To.Reg = dstReg 1688 p.To.Offset = offset + 16 1689 1690 // Generate 16 byte loads and stores. 1691 p = s.Prog(ppc64.ALXV) 1692 p.From.Type = obj.TYPE_MEM 1693 p.From.Reg = srcReg 1694 p.From.Offset = offset + 32 1695 p.To.Type = obj.TYPE_REG 1696 p.To.Reg = ppc64.REG_VS32 1697 1698 p = s.Prog(ppc64.ALXV) 1699 p.From.Type = obj.TYPE_MEM 1700 p.From.Reg = srcReg 1701 p.From.Offset = offset + 48 1702 p.To.Type = obj.TYPE_REG 1703 p.To.Reg = ppc64.REG_VS33 1704 1705 // generate 16 byte stores 1706 p = s.Prog(ppc64.ASTXV) 1707 p.From.Type = obj.TYPE_REG 1708 p.From.Reg = ppc64.REG_VS32 1709 p.To.Type = obj.TYPE_MEM 1710 p.To.Reg = dstReg 1711 p.To.Offset = offset + 32 1712 1713 p = s.Prog(ppc64.ASTXV) 1714 p.From.Type = obj.TYPE_REG 1715 p.From.Reg = ppc64.REG_VS33 1716 p.To.Type = obj.TYPE_MEM 1717 p.To.Reg = dstReg 1718 p.To.Offset = offset + 48 1719 1720 // increment the src reg for next iteration 1721 p = s.Prog(ppc64.AADD) 1722 p.Reg = srcReg 1723 p.From.Type = obj.TYPE_CONST 1724 p.From.Offset = bytesPerLoop 1725 p.To.Type = obj.TYPE_REG 1726 p.To.Reg = srcReg 1727 1728 // increment the dst reg for next iteration 1729 p = s.Prog(ppc64.AADD) 1730 p.Reg = dstReg 1731 p.From.Type = obj.TYPE_CONST 1732 p.From.Offset = bytesPerLoop 1733 p.To.Type = obj.TYPE_REG 1734 p.To.Reg = dstReg 1735 1736 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1737 // to loop top. 1738 p = s.Prog(ppc64.ABC) 1739 p.From.Type = obj.TYPE_CONST 1740 p.From.Offset = ppc64.BO_BCTR 1741 p.Reg = ppc64.REG_CR0LT 1742 p.To.Type = obj.TYPE_BRANCH 1743 p.To.SetTarget(top) 1744 1745 // srcReg and dstReg were incremented in the loop, so 1746 // later instructions start with offset 0. 1747 offset = int64(0) 1748 } 1749 1750 // No loop was generated for one iteration, so 1751 // add 32 bytes to the remainder to move those bytes. 1752 if ctr == 1 { 1753 rem += bytesPerLoop 1754 } 1755 if rem >= 32 { 1756 p := s.Prog(ppc64.ALXV) 1757 p.From.Type = obj.TYPE_MEM 1758 p.From.Reg = srcReg 1759 p.To.Type = obj.TYPE_REG 1760 p.To.Reg = ppc64.REG_VS32 1761 1762 p = s.Prog(ppc64.ALXV) 1763 p.From.Type = obj.TYPE_MEM 1764 p.From.Reg = srcReg 1765 p.From.Offset = 16 1766 p.To.Type = obj.TYPE_REG 1767 p.To.Reg = ppc64.REG_VS33 1768 1769 p = s.Prog(ppc64.ASTXV) 1770 p.From.Type = obj.TYPE_REG 1771 p.From.Reg = ppc64.REG_VS32 1772 p.To.Type = obj.TYPE_MEM 1773 p.To.Reg = dstReg 1774 1775 p = s.Prog(ppc64.ASTXV) 1776 p.From.Type = obj.TYPE_REG 1777 p.From.Reg = ppc64.REG_VS33 1778 p.To.Type = obj.TYPE_MEM 1779 p.To.Reg = dstReg 1780 p.To.Offset = 16 1781 1782 offset = 32 1783 rem -= 32 1784 } 1785 1786 if rem >= 16 { 1787 // Generate 16 byte loads and stores. 1788 p := s.Prog(ppc64.ALXV) 1789 p.From.Type = obj.TYPE_MEM 1790 p.From.Reg = srcReg 1791 p.From.Offset = offset 1792 p.To.Type = obj.TYPE_REG 1793 p.To.Reg = ppc64.REG_VS32 1794 1795 p = s.Prog(ppc64.ASTXV) 1796 p.From.Type = obj.TYPE_REG 1797 p.From.Reg = ppc64.REG_VS32 1798 p.To.Type = obj.TYPE_MEM 1799 p.To.Reg = dstReg 1800 p.To.Offset = offset 1801 1802 offset += 16 1803 rem -= 16 1804 1805 if rem >= 16 { 1806 p := s.Prog(ppc64.ALXV) 1807 p.From.Type = obj.TYPE_MEM 1808 p.From.Reg = srcReg 1809 p.From.Offset = offset 1810 p.To.Type = obj.TYPE_REG 1811 p.To.Reg = ppc64.REG_VS32 1812 1813 p = s.Prog(ppc64.ASTXV) 1814 p.From.Type = obj.TYPE_REG 1815 p.From.Reg = ppc64.REG_VS32 1816 p.To.Type = obj.TYPE_MEM 1817 p.To.Reg = dstReg 1818 p.To.Offset = offset 1819 1820 offset += 16 1821 rem -= 16 1822 } 1823 } 1824 // Generate all the remaining load and store pairs, starting with 1825 // as many 8 byte moves as possible, then 4, 2, 1. 1826 for rem > 0 { 1827 op, size := ppc64.AMOVB, int64(1) 1828 switch { 1829 case rem >= 8: 1830 op, size = ppc64.AMOVD, 8 1831 case rem >= 4: 1832 op, size = ppc64.AMOVWZ, 4 1833 case rem >= 2: 1834 op, size = ppc64.AMOVH, 2 1835 } 1836 // Load 1837 p := s.Prog(op) 1838 p.To.Type = obj.TYPE_REG 1839 p.To.Reg = ppc64.REGTMP 1840 p.From.Type = obj.TYPE_MEM 1841 p.From.Reg = srcReg 1842 p.From.Offset = offset 1843 1844 // Store 1845 p = s.Prog(op) 1846 p.From.Type = obj.TYPE_REG 1847 p.From.Reg = ppc64.REGTMP 1848 p.To.Type = obj.TYPE_MEM 1849 p.To.Reg = dstReg 1850 p.To.Offset = offset 1851 rem -= size 1852 offset += size 1853 } 1854 1855 case ssa.OpPPC64CALLstatic: 1856 s.Call(v) 1857 1858 case ssa.OpPPC64CALLtail: 1859 s.TailCall(v) 1860 1861 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: 1862 p := s.Prog(ppc64.AMOVD) 1863 p.From.Type = obj.TYPE_REG 1864 p.From.Reg = v.Args[0].Reg() 1865 p.To.Type = obj.TYPE_REG 1866 p.To.Reg = ppc64.REG_LR 1867 1868 if v.Args[0].Reg() != ppc64.REG_R12 { 1869 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg) 1870 } 1871 1872 pp := s.Call(v) 1873 1874 // Convert the call into a blrl with hint this is not a subroutine return. 1875 // The full bclrl opcode must be specified when passing a hint. 1876 pp.As = ppc64.ABCL 1877 pp.From.Type = obj.TYPE_CONST 1878 pp.From.Offset = ppc64.BO_ALWAYS 1879 pp.Reg = ppc64.REG_CR0LT // The preferred value if BI is ignored. 1880 pp.To.Reg = ppc64.REG_LR 1881 pp.AddRestSourceConst(1) 1882 1883 if ppc64.NeedTOCpointer(base.Ctxt) { 1884 // When compiling Go into PIC, the function we just 1885 // called via pointer might have been implemented in 1886 // a separate module and so overwritten the TOC 1887 // pointer in R2; reload it. 1888 q := s.Prog(ppc64.AMOVD) 1889 q.From.Type = obj.TYPE_MEM 1890 q.From.Offset = 24 1891 q.From.Reg = ppc64.REGSP 1892 q.To.Type = obj.TYPE_REG 1893 q.To.Reg = ppc64.REG_R2 1894 } 1895 1896 case ssa.OpPPC64LoweredWB: 1897 p := s.Prog(obj.ACALL) 1898 p.To.Type = obj.TYPE_MEM 1899 p.To.Name = obj.NAME_EXTERN 1900 // AuxInt encodes how many buffer entries we need. 1901 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1] 1902 1903 case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC: 1904 p := s.Prog(obj.ACALL) 1905 p.To.Type = obj.TYPE_MEM 1906 p.To.Name = obj.NAME_EXTERN 1907 p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] 1908 s.UseArgs(16) // space used in callee args area by assembly stubs 1909 1910 case ssa.OpPPC64LoweredNilCheck: 1911 if buildcfg.GOOS == "aix" { 1912 // CMP Rarg0, R0 1913 // BNE 2(PC) 1914 // STW R0, 0(R0) 1915 // NOP (so the BNE has somewhere to land) 1916 1917 // CMP Rarg0, R0 1918 p := s.Prog(ppc64.ACMP) 1919 p.From.Type = obj.TYPE_REG 1920 p.From.Reg = v.Args[0].Reg() 1921 p.To.Type = obj.TYPE_REG 1922 p.To.Reg = ppc64.REG_R0 1923 1924 // BNE 2(PC) 1925 p2 := s.Prog(ppc64.ABNE) 1926 p2.To.Type = obj.TYPE_BRANCH 1927 1928 // STW R0, 0(R0) 1929 // Write at 0 is forbidden and will trigger a SIGSEGV 1930 p = s.Prog(ppc64.AMOVW) 1931 p.From.Type = obj.TYPE_REG 1932 p.From.Reg = ppc64.REG_R0 1933 p.To.Type = obj.TYPE_MEM 1934 p.To.Reg = ppc64.REG_R0 1935 1936 // NOP (so the BNE has somewhere to land) 1937 nop := s.Prog(obj.ANOP) 1938 p2.To.SetTarget(nop) 1939 1940 } else { 1941 // Issue a load which will fault if arg is nil. 1942 p := s.Prog(ppc64.AMOVBZ) 1943 p.From.Type = obj.TYPE_MEM 1944 p.From.Reg = v.Args[0].Reg() 1945 ssagen.AddAux(&p.From, v) 1946 p.To.Type = obj.TYPE_REG 1947 p.To.Reg = ppc64.REGTMP 1948 } 1949 if logopt.Enabled() { 1950 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) 1951 } 1952 if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1953 base.WarnfAt(v.Pos, "generated nil check") 1954 } 1955 1956 // These should be resolved by rules and not make it here. 1957 case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan, 1958 ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual, 1959 ssa.OpPPC64FLessEqual, ssa.OpPPC64FGreaterEqual: 1960 v.Fatalf("Pseudo-op should not make it to codegen: %s ###\n", v.LongString()) 1961 case ssa.OpPPC64InvertFlags: 1962 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1963 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: 1964 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1965 case ssa.OpClobber, ssa.OpClobberReg: 1966 // TODO: implement for clobberdead experiment. Nop is ok for now. 1967 default: 1968 v.Fatalf("genValue not implemented: %s", v.LongString()) 1969 } 1970 } 1971 1972 var blockJump = [...]struct { 1973 asm, invasm obj.As 1974 asmeq, invasmun bool 1975 }{ 1976 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false}, 1977 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false}, 1978 1979 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1980 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false}, 1981 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false}, 1982 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1983 1984 // TODO: need to work FP comparisons into block jumps 1985 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1986 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN 1987 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN 1988 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1989 } 1990 1991 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { 1992 switch b.Kind { 1993 case ssa.BlockDefer: 1994 // defer returns in R3: 1995 // 0 if we should continue executing 1996 // 1 if we should jump to deferreturn call 1997 p := s.Prog(ppc64.ACMP) 1998 p.From.Type = obj.TYPE_REG 1999 p.From.Reg = ppc64.REG_R3 2000 p.To.Type = obj.TYPE_REG 2001 p.To.Reg = ppc64.REG_R0 2002 2003 p = s.Prog(ppc64.ABNE) 2004 p.To.Type = obj.TYPE_BRANCH 2005 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) 2006 if b.Succs[0].Block() != next { 2007 p := s.Prog(obj.AJMP) 2008 p.To.Type = obj.TYPE_BRANCH 2009 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 2010 } 2011 2012 case ssa.BlockPlain: 2013 if b.Succs[0].Block() != next { 2014 p := s.Prog(obj.AJMP) 2015 p.To.Type = obj.TYPE_BRANCH 2016 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) 2017 } 2018 case ssa.BlockExit, ssa.BlockRetJmp: 2019 case ssa.BlockRet: 2020 s.Prog(obj.ARET) 2021 2022 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE, 2023 ssa.BlockPPC64LT, ssa.BlockPPC64GE, 2024 ssa.BlockPPC64LE, ssa.BlockPPC64GT, 2025 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE, 2026 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT: 2027 jmp := blockJump[b.Kind] 2028 switch next { 2029 case b.Succs[0].Block(): 2030 s.Br(jmp.invasm, b.Succs[1].Block()) 2031 if jmp.invasmun { 2032 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 2033 s.Br(ppc64.ABVS, b.Succs[1].Block()) 2034 } 2035 case b.Succs[1].Block(): 2036 s.Br(jmp.asm, b.Succs[0].Block()) 2037 if jmp.asmeq { 2038 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 2039 } 2040 default: 2041 if b.Likely != ssa.BranchUnlikely { 2042 s.Br(jmp.asm, b.Succs[0].Block()) 2043 if jmp.asmeq { 2044 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 2045 } 2046 s.Br(obj.AJMP, b.Succs[1].Block()) 2047 } else { 2048 s.Br(jmp.invasm, b.Succs[1].Block()) 2049 if jmp.invasmun { 2050 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 2051 s.Br(ppc64.ABVS, b.Succs[1].Block()) 2052 } 2053 s.Br(obj.AJMP, b.Succs[0].Block()) 2054 } 2055 } 2056 default: 2057 b.Fatalf("branch not implemented: %s", b.LongString()) 2058 } 2059 } 2060 2061 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 2062 p := s.Prog(loadByType(t)) 2063 p.From.Type = obj.TYPE_MEM 2064 p.From.Name = obj.NAME_AUTO 2065 p.From.Sym = n.Linksym() 2066 p.From.Offset = n.FrameOffset() + off 2067 p.To.Type = obj.TYPE_REG 2068 p.To.Reg = reg 2069 return p 2070 } 2071 2072 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { 2073 p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) 2074 p.To.Name = obj.NAME_PARAM 2075 p.To.Sym = n.Linksym() 2076 p.Pos = p.Pos.WithNotStmt() 2077 return p 2078 }