github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/cmd/compile/internal/ppc64/ssa.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ppc64 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/compile/internal/ssa" 10 "cmd/compile/internal/types" 11 "cmd/internal/obj" 12 "cmd/internal/obj/ppc64" 13 "cmd/internal/objabi" 14 "math" 15 "strings" 16 ) 17 18 // iselOp encodes mapping of comparison operations onto ISEL operands 19 type iselOp struct { 20 cond int64 21 valueIfCond int // if cond is true, the value to return (0 or 1) 22 } 23 24 // Input registers to ISEL used for comparison. Index 0 is zero, 1 is (will be) 1 25 var iselRegs = [2]int16{ppc64.REG_R0, ppc64.REGTMP} 26 27 var iselOps = map[ssa.Op]iselOp{ 28 ssa.OpPPC64Equal: iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 1}, 29 ssa.OpPPC64NotEqual: iselOp{cond: ppc64.C_COND_EQ, valueIfCond: 0}, 30 ssa.OpPPC64LessThan: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, 31 ssa.OpPPC64GreaterEqual: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 0}, 32 ssa.OpPPC64GreaterThan: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, 33 ssa.OpPPC64LessEqual: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 0}, 34 ssa.OpPPC64FLessThan: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, 35 ssa.OpPPC64FGreaterThan: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, 36 ssa.OpPPC64FLessEqual: iselOp{cond: ppc64.C_COND_LT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ 37 ssa.OpPPC64FGreaterEqual: iselOp{cond: ppc64.C_COND_GT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ 38 } 39 40 // markMoves marks any MOVXconst ops that need to avoid clobbering flags. 41 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) { 42 // flive := b.FlagsLiveAtEnd 43 // if b.Control != nil && b.Control.Type.IsFlags() { 44 // flive = true 45 // } 46 // for i := len(b.Values) - 1; i >= 0; i-- { 47 // v := b.Values[i] 48 // if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) { 49 // // The "mark" is any non-nil Aux value. 50 // v.Aux = v 51 // } 52 // if v.Type.IsFlags() { 53 // flive = false 54 // } 55 // for _, a := range v.Args { 56 // if a.Type.IsFlags() { 57 // flive = true 58 // } 59 // } 60 // } 61 } 62 63 // loadByType returns the load instruction of the given type. 64 func loadByType(t *types.Type) obj.As { 65 if t.IsFloat() { 66 switch t.Size() { 67 case 4: 68 return ppc64.AFMOVS 69 case 8: 70 return ppc64.AFMOVD 71 } 72 } else { 73 switch t.Size() { 74 case 1: 75 if t.IsSigned() { 76 return ppc64.AMOVB 77 } else { 78 return ppc64.AMOVBZ 79 } 80 case 2: 81 if t.IsSigned() { 82 return ppc64.AMOVH 83 } else { 84 return ppc64.AMOVHZ 85 } 86 case 4: 87 if t.IsSigned() { 88 return ppc64.AMOVW 89 } else { 90 return ppc64.AMOVWZ 91 } 92 case 8: 93 return ppc64.AMOVD 94 } 95 } 96 panic("bad load type") 97 } 98 99 // storeByType returns the store instruction of the given type. 100 func storeByType(t *types.Type) obj.As { 101 if t.IsFloat() { 102 switch t.Size() { 103 case 4: 104 return ppc64.AFMOVS 105 case 8: 106 return ppc64.AFMOVD 107 } 108 } else { 109 switch t.Size() { 110 case 1: 111 return ppc64.AMOVB 112 case 2: 113 return ppc64.AMOVH 114 case 4: 115 return ppc64.AMOVW 116 case 8: 117 return ppc64.AMOVD 118 } 119 } 120 panic("bad store type") 121 } 122 123 func ssaGenISEL(s *gc.SSAGenState, v *ssa.Value, cr int64, r1, r2 int16) { 124 r := v.Reg() 125 p := s.Prog(ppc64.AISEL) 126 p.To.Type = obj.TYPE_REG 127 p.To.Reg = r 128 p.Reg = r1 129 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2}) 130 p.From.Type = obj.TYPE_CONST 131 p.From.Offset = cr 132 } 133 134 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { 135 switch v.Op { 136 case ssa.OpCopy: 137 t := v.Type 138 if t.IsMemory() { 139 return 140 } 141 x := v.Args[0].Reg() 142 y := v.Reg() 143 if x != y { 144 rt := obj.TYPE_REG 145 op := ppc64.AMOVD 146 147 if t.IsFloat() { 148 op = ppc64.AFMOVD 149 } 150 p := s.Prog(op) 151 p.From.Type = rt 152 p.From.Reg = x 153 p.To.Type = rt 154 p.To.Reg = y 155 } 156 157 case ssa.OpPPC64LoweredMuluhilo: 158 // MULHDU Rarg1, Rarg0, Reg0 159 // MULLD Rarg1, Rarg0, Reg1 160 r0 := v.Args[0].Reg() 161 r1 := v.Args[1].Reg() 162 p := s.Prog(ppc64.AMULHDU) 163 p.From.Type = obj.TYPE_REG 164 p.From.Reg = r1 165 p.Reg = r0 166 p.To.Type = obj.TYPE_REG 167 p.To.Reg = v.Reg0() 168 p1 := s.Prog(ppc64.AMULLD) 169 p1.From.Type = obj.TYPE_REG 170 p1.From.Reg = r1 171 p1.Reg = r0 172 p1.To.Type = obj.TYPE_REG 173 p1.To.Reg = v.Reg1() 174 175 case ssa.OpPPC64LoweredAtomicAnd8, 176 ssa.OpPPC64LoweredAtomicOr8: 177 // LWSYNC 178 // LBAR (Rarg0), Rtmp 179 // AND/OR Rarg1, Rtmp 180 // STBCCC Rtmp, (Rarg0) 181 // BNE -3(PC) 182 r0 := v.Args[0].Reg() 183 r1 := v.Args[1].Reg() 184 // LWSYNC - Assuming shared data not write-through-required nor 185 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 186 plwsync := s.Prog(ppc64.ALWSYNC) 187 plwsync.To.Type = obj.TYPE_NONE 188 p := s.Prog(ppc64.ALBAR) 189 p.From.Type = obj.TYPE_MEM 190 p.From.Reg = r0 191 p.To.Type = obj.TYPE_REG 192 p.To.Reg = ppc64.REGTMP 193 p1 := s.Prog(v.Op.Asm()) 194 p1.From.Type = obj.TYPE_REG 195 p1.From.Reg = r1 196 p1.To.Type = obj.TYPE_REG 197 p1.To.Reg = ppc64.REGTMP 198 p2 := s.Prog(ppc64.ASTBCCC) 199 p2.From.Type = obj.TYPE_REG 200 p2.From.Reg = ppc64.REGTMP 201 p2.To.Type = obj.TYPE_MEM 202 p2.To.Reg = r0 203 p2.RegTo2 = ppc64.REGTMP 204 p3 := s.Prog(ppc64.ABNE) 205 p3.To.Type = obj.TYPE_BRANCH 206 gc.Patch(p3, p) 207 208 case ssa.OpPPC64LoweredAtomicAdd32, 209 ssa.OpPPC64LoweredAtomicAdd64: 210 // LWSYNC 211 // LDAR/LWAR (Rarg0), Rout 212 // ADD Rarg1, Rout 213 // STDCCC/STWCCC Rout, (Rarg0) 214 // BNE -3(PC) 215 // MOVW Rout,Rout (if Add32) 216 ld := ppc64.ALDAR 217 st := ppc64.ASTDCCC 218 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 219 ld = ppc64.ALWAR 220 st = ppc64.ASTWCCC 221 } 222 r0 := v.Args[0].Reg() 223 r1 := v.Args[1].Reg() 224 out := v.Reg0() 225 // LWSYNC - Assuming shared data not write-through-required nor 226 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 227 plwsync := s.Prog(ppc64.ALWSYNC) 228 plwsync.To.Type = obj.TYPE_NONE 229 // LDAR or LWAR 230 p := s.Prog(ld) 231 p.From.Type = obj.TYPE_MEM 232 p.From.Reg = r0 233 p.To.Type = obj.TYPE_REG 234 p.To.Reg = out 235 // ADD reg1,out 236 p1 := s.Prog(ppc64.AADD) 237 p1.From.Type = obj.TYPE_REG 238 p1.From.Reg = r1 239 p1.To.Reg = out 240 p1.To.Type = obj.TYPE_REG 241 // STDCCC or STWCCC 242 p3 := s.Prog(st) 243 p3.From.Type = obj.TYPE_REG 244 p3.From.Reg = out 245 p3.To.Type = obj.TYPE_MEM 246 p3.To.Reg = r0 247 // BNE retry 248 p4 := s.Prog(ppc64.ABNE) 249 p4.To.Type = obj.TYPE_BRANCH 250 gc.Patch(p4, p) 251 252 // Ensure a 32 bit result 253 if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { 254 p5 := s.Prog(ppc64.AMOVWZ) 255 p5.To.Type = obj.TYPE_REG 256 p5.To.Reg = out 257 p5.From.Type = obj.TYPE_REG 258 p5.From.Reg = out 259 } 260 261 case ssa.OpPPC64LoweredAtomicExchange32, 262 ssa.OpPPC64LoweredAtomicExchange64: 263 // LWSYNC 264 // LDAR/LWAR (Rarg0), Rout 265 // STDCCC/STWCCC Rout, (Rarg0) 266 // BNE -2(PC) 267 // ISYNC 268 ld := ppc64.ALDAR 269 st := ppc64.ASTDCCC 270 if v.Op == ssa.OpPPC64LoweredAtomicExchange32 { 271 ld = ppc64.ALWAR 272 st = ppc64.ASTWCCC 273 } 274 r0 := v.Args[0].Reg() 275 r1 := v.Args[1].Reg() 276 out := v.Reg0() 277 // LWSYNC - Assuming shared data not write-through-required nor 278 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 279 plwsync := s.Prog(ppc64.ALWSYNC) 280 plwsync.To.Type = obj.TYPE_NONE 281 // LDAR or LWAR 282 p := s.Prog(ld) 283 p.From.Type = obj.TYPE_MEM 284 p.From.Reg = r0 285 p.To.Type = obj.TYPE_REG 286 p.To.Reg = out 287 // STDCCC or STWCCC 288 p1 := s.Prog(st) 289 p1.From.Type = obj.TYPE_REG 290 p1.From.Reg = r1 291 p1.To.Type = obj.TYPE_MEM 292 p1.To.Reg = r0 293 // BNE retry 294 p2 := s.Prog(ppc64.ABNE) 295 p2.To.Type = obj.TYPE_BRANCH 296 gc.Patch(p2, p) 297 // ISYNC 298 pisync := s.Prog(ppc64.AISYNC) 299 pisync.To.Type = obj.TYPE_NONE 300 301 case ssa.OpPPC64LoweredAtomicLoad32, 302 ssa.OpPPC64LoweredAtomicLoad64, 303 ssa.OpPPC64LoweredAtomicLoadPtr: 304 // SYNC 305 // MOVD/MOVW (Rarg0), Rout 306 // CMP Rout,Rout 307 // BNE 1(PC) 308 // ISYNC 309 ld := ppc64.AMOVD 310 cmp := ppc64.ACMP 311 if v.Op == ssa.OpPPC64LoweredAtomicLoad32 { 312 ld = ppc64.AMOVW 313 cmp = ppc64.ACMPW 314 } 315 arg0 := v.Args[0].Reg() 316 out := v.Reg0() 317 // SYNC when AuxInt == 1; otherwise, load-acquire 318 if v.AuxInt == 1 { 319 psync := s.Prog(ppc64.ASYNC) 320 psync.To.Type = obj.TYPE_NONE 321 } 322 // Load 323 p := s.Prog(ld) 324 p.From.Type = obj.TYPE_MEM 325 p.From.Reg = arg0 326 p.To.Type = obj.TYPE_REG 327 p.To.Reg = out 328 // CMP 329 p1 := s.Prog(cmp) 330 p1.From.Type = obj.TYPE_REG 331 p1.From.Reg = out 332 p1.To.Type = obj.TYPE_REG 333 p1.To.Reg = out 334 // BNE 335 p2 := s.Prog(ppc64.ABNE) 336 p2.To.Type = obj.TYPE_BRANCH 337 // ISYNC 338 pisync := s.Prog(ppc64.AISYNC) 339 pisync.To.Type = obj.TYPE_NONE 340 gc.Patch(p2, pisync) 341 342 case ssa.OpPPC64LoweredAtomicStore32, 343 ssa.OpPPC64LoweredAtomicStore64: 344 // SYNC or LWSYNC 345 // MOVD/MOVW arg1,(arg0) 346 st := ppc64.AMOVD 347 if v.Op == ssa.OpPPC64LoweredAtomicStore32 { 348 st = ppc64.AMOVW 349 } 350 arg0 := v.Args[0].Reg() 351 arg1 := v.Args[1].Reg() 352 // If AuxInt == 0, LWSYNC (Store-Release), else SYNC 353 // SYNC 354 syncOp := ppc64.ASYNC 355 if v.AuxInt == 0 { 356 syncOp = ppc64.ALWSYNC 357 } 358 psync := s.Prog(syncOp) 359 psync.To.Type = obj.TYPE_NONE 360 // Store 361 p := s.Prog(st) 362 p.To.Type = obj.TYPE_MEM 363 p.To.Reg = arg0 364 p.From.Type = obj.TYPE_REG 365 p.From.Reg = arg1 366 367 case ssa.OpPPC64LoweredAtomicCas64, 368 ssa.OpPPC64LoweredAtomicCas32: 369 // LWSYNC 370 // loop: 371 // LDAR (Rarg0), MutexHint, Rtmp 372 // CMP Rarg1, Rtmp 373 // BNE fail 374 // STDCCC Rarg2, (Rarg0) 375 // BNE loop 376 // LWSYNC // Only for sequential consistency; not required in CasRel. 377 // MOVD $1, Rout 378 // BR end 379 // fail: 380 // MOVD $0, Rout 381 // end: 382 ld := ppc64.ALDAR 383 st := ppc64.ASTDCCC 384 cmp := ppc64.ACMP 385 if v.Op == ssa.OpPPC64LoweredAtomicCas32 { 386 ld = ppc64.ALWAR 387 st = ppc64.ASTWCCC 388 cmp = ppc64.ACMPW 389 } 390 r0 := v.Args[0].Reg() 391 r1 := v.Args[1].Reg() 392 r2 := v.Args[2].Reg() 393 out := v.Reg0() 394 // LWSYNC - Assuming shared data not write-through-required nor 395 // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. 396 plwsync1 := s.Prog(ppc64.ALWSYNC) 397 plwsync1.To.Type = obj.TYPE_NONE 398 // LDAR or LWAR 399 p := s.Prog(ld) 400 p.From.Type = obj.TYPE_MEM 401 p.From.Reg = r0 402 p.To.Type = obj.TYPE_REG 403 p.To.Reg = ppc64.REGTMP 404 // If it is a Compare-and-Swap-Release operation, set the EH field with 405 // the release hint. 406 if v.AuxInt == 0 { 407 p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0}) 408 } 409 // CMP reg1,reg2 410 p1 := s.Prog(cmp) 411 p1.From.Type = obj.TYPE_REG 412 p1.From.Reg = r1 413 p1.To.Reg = ppc64.REGTMP 414 p1.To.Type = obj.TYPE_REG 415 // BNE cas_fail 416 p2 := s.Prog(ppc64.ABNE) 417 p2.To.Type = obj.TYPE_BRANCH 418 // STDCCC or STWCCC 419 p3 := s.Prog(st) 420 p3.From.Type = obj.TYPE_REG 421 p3.From.Reg = r2 422 p3.To.Type = obj.TYPE_MEM 423 p3.To.Reg = r0 424 // BNE retry 425 p4 := s.Prog(ppc64.ABNE) 426 p4.To.Type = obj.TYPE_BRANCH 427 gc.Patch(p4, p) 428 // LWSYNC - Assuming shared data not write-through-required nor 429 // caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b. 430 // If the operation is a CAS-Release, then synchronization is not necessary. 431 if v.AuxInt != 0 { 432 plwsync2 := s.Prog(ppc64.ALWSYNC) 433 plwsync2.To.Type = obj.TYPE_NONE 434 } 435 // return true 436 p5 := s.Prog(ppc64.AMOVD) 437 p5.From.Type = obj.TYPE_CONST 438 p5.From.Offset = 1 439 p5.To.Type = obj.TYPE_REG 440 p5.To.Reg = out 441 // BR done 442 p6 := s.Prog(obj.AJMP) 443 p6.To.Type = obj.TYPE_BRANCH 444 // return false 445 p7 := s.Prog(ppc64.AMOVD) 446 p7.From.Type = obj.TYPE_CONST 447 p7.From.Offset = 0 448 p7.To.Type = obj.TYPE_REG 449 p7.To.Reg = out 450 gc.Patch(p2, p7) 451 // done (label) 452 p8 := s.Prog(obj.ANOP) 453 gc.Patch(p6, p8) 454 455 case ssa.OpPPC64LoweredGetClosurePtr: 456 // Closure pointer is R11 (already) 457 gc.CheckLoweredGetClosurePtr(v) 458 459 case ssa.OpPPC64LoweredGetCallerSP: 460 // caller's SP is FixedFrameSize below the address of the first arg 461 p := s.Prog(ppc64.AMOVD) 462 p.From.Type = obj.TYPE_ADDR 463 p.From.Offset = -gc.Ctxt.FixedFrameSize() 464 p.From.Name = obj.NAME_PARAM 465 p.To.Type = obj.TYPE_REG 466 p.To.Reg = v.Reg() 467 468 case ssa.OpPPC64LoweredGetCallerPC: 469 p := s.Prog(obj.AGETCALLERPC) 470 p.To.Type = obj.TYPE_REG 471 p.To.Reg = v.Reg() 472 473 case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F: 474 // input is already rounded 475 476 case ssa.OpLoadReg: 477 loadOp := loadByType(v.Type) 478 p := s.Prog(loadOp) 479 gc.AddrAuto(&p.From, v.Args[0]) 480 p.To.Type = obj.TYPE_REG 481 p.To.Reg = v.Reg() 482 483 case ssa.OpStoreReg: 484 storeOp := storeByType(v.Type) 485 p := s.Prog(storeOp) 486 p.From.Type = obj.TYPE_REG 487 p.From.Reg = v.Args[0].Reg() 488 gc.AddrAuto(&p.To, v) 489 490 case ssa.OpPPC64DIVD: 491 // For now, 492 // 493 // cmp arg1, -1 494 // be ahead 495 // v = arg0 / arg1 496 // b over 497 // ahead: v = - arg0 498 // over: nop 499 r := v.Reg() 500 r0 := v.Args[0].Reg() 501 r1 := v.Args[1].Reg() 502 503 p := s.Prog(ppc64.ACMP) 504 p.From.Type = obj.TYPE_REG 505 p.From.Reg = r1 506 p.To.Type = obj.TYPE_CONST 507 p.To.Offset = -1 508 509 pbahead := s.Prog(ppc64.ABEQ) 510 pbahead.To.Type = obj.TYPE_BRANCH 511 512 p = s.Prog(v.Op.Asm()) 513 p.From.Type = obj.TYPE_REG 514 p.From.Reg = r1 515 p.Reg = r0 516 p.To.Type = obj.TYPE_REG 517 p.To.Reg = r 518 519 pbover := s.Prog(obj.AJMP) 520 pbover.To.Type = obj.TYPE_BRANCH 521 522 p = s.Prog(ppc64.ANEG) 523 p.To.Type = obj.TYPE_REG 524 p.To.Reg = r 525 p.From.Type = obj.TYPE_REG 526 p.From.Reg = r0 527 gc.Patch(pbahead, p) 528 529 p = s.Prog(obj.ANOP) 530 gc.Patch(pbover, p) 531 532 case ssa.OpPPC64DIVW: 533 // word-width version of above 534 r := v.Reg() 535 r0 := v.Args[0].Reg() 536 r1 := v.Args[1].Reg() 537 538 p := s.Prog(ppc64.ACMPW) 539 p.From.Type = obj.TYPE_REG 540 p.From.Reg = r1 541 p.To.Type = obj.TYPE_CONST 542 p.To.Offset = -1 543 544 pbahead := s.Prog(ppc64.ABEQ) 545 pbahead.To.Type = obj.TYPE_BRANCH 546 547 p = s.Prog(v.Op.Asm()) 548 p.From.Type = obj.TYPE_REG 549 p.From.Reg = r1 550 p.Reg = r0 551 p.To.Type = obj.TYPE_REG 552 p.To.Reg = r 553 554 pbover := s.Prog(obj.AJMP) 555 pbover.To.Type = obj.TYPE_BRANCH 556 557 p = s.Prog(ppc64.ANEG) 558 p.To.Type = obj.TYPE_REG 559 p.To.Reg = r 560 p.From.Type = obj.TYPE_REG 561 p.From.Reg = r0 562 gc.Patch(pbahead, p) 563 564 p = s.Prog(obj.ANOP) 565 gc.Patch(pbover, p) 566 567 case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, 568 ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, 569 ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, 570 ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW, 571 ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, 572 ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN, 573 ssa.OpPPC64AND, ssa.OpPPC64ANDCC, ssa.OpPPC64OR, ssa.OpPPC64ORCC, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64XORCC, ssa.OpPPC64EQV: 574 r := v.Reg() 575 r1 := v.Args[0].Reg() 576 r2 := v.Args[1].Reg() 577 p := s.Prog(v.Op.Asm()) 578 p.From.Type = obj.TYPE_REG 579 p.From.Reg = r2 580 p.Reg = r1 581 p.To.Type = obj.TYPE_REG 582 p.To.Reg = r 583 584 case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst: 585 p := s.Prog(v.Op.Asm()) 586 p.From.Type = obj.TYPE_CONST 587 p.From.Offset = v.AuxInt 588 p.Reg = v.Args[0].Reg() 589 p.To.Type = obj.TYPE_REG 590 p.To.Reg = v.Reg() 591 592 case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS: 593 r := v.Reg() 594 r1 := v.Args[0].Reg() 595 r2 := v.Args[1].Reg() 596 r3 := v.Args[2].Reg() 597 // r = r1*r2 ± r3 598 p := s.Prog(v.Op.Asm()) 599 p.From.Type = obj.TYPE_REG 600 p.From.Reg = r1 601 p.Reg = r3 602 p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2}) 603 p.To.Type = obj.TYPE_REG 604 p.To.Reg = r 605 606 case ssa.OpPPC64MaskIfNotCarry: 607 r := v.Reg() 608 p := s.Prog(v.Op.Asm()) 609 p.From.Type = obj.TYPE_REG 610 p.From.Reg = ppc64.REGZERO 611 p.To.Type = obj.TYPE_REG 612 p.To.Reg = r 613 614 case ssa.OpPPC64ADDconstForCarry: 615 r1 := v.Args[0].Reg() 616 p := s.Prog(v.Op.Asm()) 617 p.Reg = r1 618 p.From.Type = obj.TYPE_CONST 619 p.From.Offset = v.AuxInt 620 p.To.Type = obj.TYPE_REG 621 p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect. 622 623 case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, ssa.OpPPC64FROUND: 624 r := v.Reg() 625 p := s.Prog(v.Op.Asm()) 626 p.To.Type = obj.TYPE_REG 627 p.To.Reg = r 628 p.From.Type = obj.TYPE_REG 629 p.From.Reg = v.Args[0].Reg() 630 631 case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, 632 ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: 633 p := s.Prog(v.Op.Asm()) 634 p.Reg = v.Args[0].Reg() 635 p.From.Type = obj.TYPE_CONST 636 p.From.Offset = v.AuxInt 637 p.To.Type = obj.TYPE_REG 638 p.To.Reg = v.Reg() 639 640 case ssa.OpPPC64ANDCCconst: 641 p := s.Prog(v.Op.Asm()) 642 p.Reg = v.Args[0].Reg() 643 644 if v.Aux != nil { 645 p.From.Type = obj.TYPE_CONST 646 p.From.Offset = gc.AuxOffset(v) 647 } else { 648 p.From.Type = obj.TYPE_CONST 649 p.From.Offset = v.AuxInt 650 } 651 652 p.To.Type = obj.TYPE_REG 653 p.To.Reg = ppc64.REGTMP // discard result 654 655 case ssa.OpPPC64MOVDaddr: 656 switch v.Aux.(type) { 657 default: 658 v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux) 659 case nil: 660 // If aux offset and aux int are both 0, and the same 661 // input and output regs are used, no instruction 662 // needs to be generated, since it would just be 663 // addi rx, rx, 0. 664 if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() { 665 p := s.Prog(ppc64.AMOVD) 666 p.From.Type = obj.TYPE_ADDR 667 p.From.Reg = v.Args[0].Reg() 668 p.From.Offset = v.AuxInt 669 p.To.Type = obj.TYPE_REG 670 p.To.Reg = v.Reg() 671 } 672 673 case *obj.LSym, *gc.Node: 674 p := s.Prog(ppc64.AMOVD) 675 p.From.Type = obj.TYPE_ADDR 676 p.From.Reg = v.Args[0].Reg() 677 p.To.Type = obj.TYPE_REG 678 p.To.Reg = v.Reg() 679 gc.AddAux(&p.From, v) 680 681 } 682 683 case ssa.OpPPC64MOVDconst: 684 p := s.Prog(v.Op.Asm()) 685 p.From.Type = obj.TYPE_CONST 686 p.From.Offset = v.AuxInt 687 p.To.Type = obj.TYPE_REG 688 p.To.Reg = v.Reg() 689 690 case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: 691 p := s.Prog(v.Op.Asm()) 692 p.From.Type = obj.TYPE_FCONST 693 p.From.Val = math.Float64frombits(uint64(v.AuxInt)) 694 p.To.Type = obj.TYPE_REG 695 p.To.Reg = v.Reg() 696 697 case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: 698 p := s.Prog(v.Op.Asm()) 699 p.From.Type = obj.TYPE_REG 700 p.From.Reg = v.Args[0].Reg() 701 p.To.Type = obj.TYPE_REG 702 p.To.Reg = v.Args[1].Reg() 703 704 case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: 705 p := s.Prog(v.Op.Asm()) 706 p.From.Type = obj.TYPE_REG 707 p.From.Reg = v.Args[0].Reg() 708 p.To.Type = obj.TYPE_CONST 709 p.To.Offset = v.AuxInt 710 711 case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: 712 // Shift in register to required size 713 p := s.Prog(v.Op.Asm()) 714 p.From.Type = obj.TYPE_REG 715 p.From.Reg = v.Args[0].Reg() 716 p.To.Reg = v.Reg() 717 p.To.Type = obj.TYPE_REG 718 719 case ssa.OpPPC64MOVDload: 720 721 // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4. 722 // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string, 723 // the offset is not known until link time. If the load of a go.string uses relocation for the 724 // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur. 725 // To avoid this problem, the full address of the go.string is computed and loaded into the base register, 726 // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with 727 // go.string types because other types will have proper alignment. 728 729 gostring := false 730 switch n := v.Aux.(type) { 731 case *obj.LSym: 732 gostring = strings.HasPrefix(n.Name, "go.string.") 733 } 734 if gostring { 735 // Generate full addr of the go.string const 736 // including AuxInt 737 p := s.Prog(ppc64.AMOVD) 738 p.From.Type = obj.TYPE_ADDR 739 p.From.Reg = v.Args[0].Reg() 740 gc.AddAux(&p.From, v) 741 p.To.Type = obj.TYPE_REG 742 p.To.Reg = v.Reg() 743 // Load go.string using 0 offset 744 p = s.Prog(v.Op.Asm()) 745 p.From.Type = obj.TYPE_MEM 746 p.From.Reg = v.Reg() 747 p.To.Type = obj.TYPE_REG 748 p.To.Reg = v.Reg() 749 break 750 } 751 // Not a go.string, generate a normal load 752 fallthrough 753 754 case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: 755 p := s.Prog(v.Op.Asm()) 756 p.From.Type = obj.TYPE_MEM 757 p.From.Reg = v.Args[0].Reg() 758 gc.AddAux(&p.From, v) 759 p.To.Type = obj.TYPE_REG 760 p.To.Reg = v.Reg() 761 762 case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload: 763 p := s.Prog(v.Op.Asm()) 764 p.From.Type = obj.TYPE_MEM 765 p.From.Reg = v.Args[0].Reg() 766 p.To.Type = obj.TYPE_REG 767 p.To.Reg = v.Reg() 768 769 case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore: 770 p := s.Prog(v.Op.Asm()) 771 p.To.Type = obj.TYPE_MEM 772 p.To.Reg = v.Args[0].Reg() 773 p.From.Type = obj.TYPE_REG 774 p.From.Reg = v.Args[1].Reg() 775 776 case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx, 777 ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx, 778 ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx: 779 p := s.Prog(v.Op.Asm()) 780 p.From.Type = obj.TYPE_MEM 781 p.From.Reg = v.Args[0].Reg() 782 p.From.Index = v.Args[1].Reg() 783 gc.AddAux(&p.From, v) 784 p.To.Type = obj.TYPE_REG 785 p.To.Reg = v.Reg() 786 787 case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: 788 p := s.Prog(v.Op.Asm()) 789 p.From.Type = obj.TYPE_REG 790 p.From.Reg = ppc64.REGZERO 791 p.To.Type = obj.TYPE_MEM 792 p.To.Reg = v.Args[0].Reg() 793 gc.AddAux(&p.To, v) 794 795 case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: 796 p := s.Prog(v.Op.Asm()) 797 p.From.Type = obj.TYPE_REG 798 p.From.Reg = v.Args[1].Reg() 799 p.To.Type = obj.TYPE_MEM 800 p.To.Reg = v.Args[0].Reg() 801 gc.AddAux(&p.To, v) 802 803 case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx, 804 ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx, 805 ssa.OpPPC64MOVHBRstoreidx: 806 p := s.Prog(v.Op.Asm()) 807 p.From.Type = obj.TYPE_REG 808 p.From.Reg = v.Args[2].Reg() 809 p.To.Index = v.Args[1].Reg() 810 p.To.Type = obj.TYPE_MEM 811 p.To.Reg = v.Args[0].Reg() 812 gc.AddAux(&p.To, v) 813 814 case ssa.OpPPC64Equal, 815 ssa.OpPPC64NotEqual, 816 ssa.OpPPC64LessThan, 817 ssa.OpPPC64FLessThan, 818 ssa.OpPPC64LessEqual, 819 ssa.OpPPC64GreaterThan, 820 ssa.OpPPC64FGreaterThan, 821 ssa.OpPPC64GreaterEqual: 822 823 // On Power7 or later, can use isel instruction: 824 // for a < b, a > b, a = b: 825 // rtmp := 1 826 // isel rt,rtmp,r0,cond // rt is target in ppc asm 827 828 // for a >= b, a <= b, a != b: 829 // rtmp := 1 830 // isel rt,0,rtmp,!cond // rt is target in ppc asm 831 832 p := s.Prog(ppc64.AMOVD) 833 p.From.Type = obj.TYPE_CONST 834 p.From.Offset = 1 835 p.To.Type = obj.TYPE_REG 836 p.To.Reg = iselRegs[1] 837 iop := iselOps[v.Op] 838 ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond]) 839 840 case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion 841 ssa.OpPPC64FGreaterEqual: 842 843 p := s.Prog(ppc64.AMOVD) 844 p.From.Type = obj.TYPE_CONST 845 p.From.Offset = 1 846 p.To.Type = obj.TYPE_REG 847 p.To.Reg = iselRegs[1] 848 iop := iselOps[v.Op] 849 ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond]) 850 ssaGenISEL(s, v, ppc64.C_COND_EQ, iselRegs[1], v.Reg()) 851 852 case ssa.OpPPC64LoweredZero: 853 854 // unaligned data doesn't hurt performance 855 // for these instructions on power8 or later 856 857 // for sizes >= 64 generate a loop as follows: 858 859 // set up loop counter in CTR, used by BC 860 // MOVD len/32,REG_TMP 861 // MOVD REG_TMP,CTR 862 // loop: 863 // MOVD R0,(R3) 864 // MOVD R0,8(R3) 865 // MOVD R0,16(R3) 866 // MOVD R0,24(R3) 867 // ADD $32,R3 868 // BC 16, 0, loop 869 // 870 // any remainder is done as described below 871 872 // for sizes < 64 bytes, first clear as many doublewords as possible, 873 // then handle the remainder 874 // MOVD R0,(R3) 875 // MOVD R0,8(R3) 876 // .... etc. 877 // 878 // the remainder bytes are cleared using one or more 879 // of the following instructions with the appropriate 880 // offsets depending which instructions are needed 881 // 882 // MOVW R0,n1(R3) 4 bytes 883 // MOVH R0,n2(R3) 2 bytes 884 // MOVB R0,n3(R3) 1 byte 885 // 886 // 7 bytes: MOVW, MOVH, MOVB 887 // 6 bytes: MOVW, MOVH 888 // 5 bytes: MOVW, MOVB 889 // 3 bytes: MOVH, MOVB 890 891 // each loop iteration does 32 bytes 892 ctr := v.AuxInt / 32 893 894 // remainder bytes 895 rem := v.AuxInt % 32 896 897 // only generate a loop if there is more 898 // than 1 iteration. 899 if ctr > 1 { 900 // Set up CTR loop counter 901 p := s.Prog(ppc64.AMOVD) 902 p.From.Type = obj.TYPE_CONST 903 p.From.Offset = ctr 904 p.To.Type = obj.TYPE_REG 905 p.To.Reg = ppc64.REGTMP 906 907 p = s.Prog(ppc64.AMOVD) 908 p.From.Type = obj.TYPE_REG 909 p.From.Reg = ppc64.REGTMP 910 p.To.Type = obj.TYPE_REG 911 p.To.Reg = ppc64.REG_CTR 912 913 // generate 4 MOVDs 914 // when this is a loop then the top must be saved 915 var top *obj.Prog 916 for offset := int64(0); offset < 32; offset += 8 { 917 // This is the top of loop 918 p := s.Prog(ppc64.AMOVD) 919 p.From.Type = obj.TYPE_REG 920 p.From.Reg = ppc64.REG_R0 921 p.To.Type = obj.TYPE_MEM 922 p.To.Reg = v.Args[0].Reg() 923 p.To.Offset = offset 924 // Save the top of loop 925 if top == nil { 926 top = p 927 } 928 } 929 930 // Increment address for the 931 // 4 doublewords just zeroed. 932 p = s.Prog(ppc64.AADD) 933 p.Reg = v.Args[0].Reg() 934 p.From.Type = obj.TYPE_CONST 935 p.From.Offset = 32 936 p.To.Type = obj.TYPE_REG 937 p.To.Reg = v.Args[0].Reg() 938 939 // Branch back to top of loop 940 // based on CTR 941 // BC with BO_BCTR generates bdnz 942 p = s.Prog(ppc64.ABC) 943 p.From.Type = obj.TYPE_CONST 944 p.From.Offset = ppc64.BO_BCTR 945 p.Reg = ppc64.REG_R0 946 p.To.Type = obj.TYPE_BRANCH 947 gc.Patch(p, top) 948 } 949 950 // when ctr == 1 the loop was not generated but 951 // there are at least 32 bytes to clear, so add 952 // that to the remainder to generate the code 953 // to clear those doublewords 954 if ctr == 1 { 955 rem += 32 956 } 957 958 // clear the remainder starting at offset zero 959 offset := int64(0) 960 961 // first clear as many doublewords as possible 962 // then clear remaining sizes as available 963 for rem > 0 { 964 op, size := ppc64.AMOVB, int64(1) 965 switch { 966 case rem >= 8: 967 op, size = ppc64.AMOVD, 8 968 case rem >= 4: 969 op, size = ppc64.AMOVW, 4 970 case rem >= 2: 971 op, size = ppc64.AMOVH, 2 972 } 973 p := s.Prog(op) 974 p.From.Type = obj.TYPE_REG 975 p.From.Reg = ppc64.REG_R0 976 p.To.Type = obj.TYPE_MEM 977 p.To.Reg = v.Args[0].Reg() 978 p.To.Offset = offset 979 rem -= size 980 offset += size 981 } 982 983 case ssa.OpPPC64LoweredMove: 984 985 // This will be used when moving more 986 // than 8 bytes. Moves start with 987 // as many 8 byte moves as possible, then 988 // 4, 2, or 1 byte(s) as remaining. This will 989 // work and be efficient for power8 or later. 990 // If there are 64 or more bytes, then a 991 // loop is generated to move 32 bytes and 992 // update the src and dst addresses on each 993 // iteration. When < 64 bytes, the appropriate 994 // number of moves are generated based on the 995 // size. 996 // When moving >= 64 bytes a loop is used 997 // MOVD len/32,REG_TMP 998 // MOVD REG_TMP,CTR 999 // top: 1000 // MOVD (R4),R7 1001 // MOVD 8(R4),R8 1002 // MOVD 16(R4),R9 1003 // MOVD 24(R4),R10 1004 // ADD R4,$32 1005 // MOVD R7,(R3) 1006 // MOVD R8,8(R3) 1007 // MOVD R9,16(R3) 1008 // MOVD R10,24(R3) 1009 // ADD R3,$32 1010 // BC 16,0,top 1011 // Bytes not moved by this loop are moved 1012 // with a combination of the following instructions, 1013 // starting with the largest sizes and generating as 1014 // many as needed, using the appropriate offset value. 1015 // MOVD n(R4),R7 1016 // MOVD R7,n(R3) 1017 // MOVW n1(R4),R7 1018 // MOVW R7,n1(R3) 1019 // MOVH n2(R4),R7 1020 // MOVH R7,n2(R3) 1021 // MOVB n3(R4),R7 1022 // MOVB R7,n3(R3) 1023 1024 // Each loop iteration moves 32 bytes 1025 ctr := v.AuxInt / 32 1026 1027 // Remainder after the loop 1028 rem := v.AuxInt % 32 1029 1030 dst_reg := v.Args[0].Reg() 1031 src_reg := v.Args[1].Reg() 1032 1033 // The set of registers used here, must match the clobbered reg list 1034 // in PPC64Ops.go. 1035 useregs := []int16{ppc64.REG_R7, ppc64.REG_R8, ppc64.REG_R9, ppc64.REG_R10} 1036 offset := int64(0) 1037 1038 // top of the loop 1039 var top *obj.Prog 1040 // Only generate looping code when loop counter is > 1 for >= 64 bytes 1041 if ctr > 1 { 1042 // Set up the CTR 1043 p := s.Prog(ppc64.AMOVD) 1044 p.From.Type = obj.TYPE_CONST 1045 p.From.Offset = ctr 1046 p.To.Type = obj.TYPE_REG 1047 p.To.Reg = ppc64.REGTMP 1048 1049 p = s.Prog(ppc64.AMOVD) 1050 p.From.Type = obj.TYPE_REG 1051 p.From.Reg = ppc64.REGTMP 1052 p.To.Type = obj.TYPE_REG 1053 p.To.Reg = ppc64.REG_CTR 1054 1055 // Generate all the MOVDs for loads 1056 // based off the same register, increasing 1057 // the offset by 8 for each instruction 1058 for _, rg := range useregs { 1059 p := s.Prog(ppc64.AMOVD) 1060 p.From.Type = obj.TYPE_MEM 1061 p.From.Reg = src_reg 1062 p.From.Offset = offset 1063 p.To.Type = obj.TYPE_REG 1064 p.To.Reg = rg 1065 if top == nil { 1066 top = p 1067 } 1068 offset += 8 1069 } 1070 // increment the src_reg for next iteration 1071 p = s.Prog(ppc64.AADD) 1072 p.Reg = src_reg 1073 p.From.Type = obj.TYPE_CONST 1074 p.From.Offset = 32 1075 p.To.Type = obj.TYPE_REG 1076 p.To.Reg = src_reg 1077 1078 // generate the MOVDs for stores, based 1079 // off the same register, using the same 1080 // offsets as in the loads. 1081 offset = int64(0) 1082 for _, rg := range useregs { 1083 p := s.Prog(ppc64.AMOVD) 1084 p.From.Type = obj.TYPE_REG 1085 p.From.Reg = rg 1086 p.To.Type = obj.TYPE_MEM 1087 p.To.Reg = dst_reg 1088 p.To.Offset = offset 1089 offset += 8 1090 } 1091 // increment the dst_reg for next iteration 1092 p = s.Prog(ppc64.AADD) 1093 p.Reg = dst_reg 1094 p.From.Type = obj.TYPE_CONST 1095 p.From.Offset = 32 1096 p.To.Type = obj.TYPE_REG 1097 p.To.Reg = dst_reg 1098 1099 // BC with BO_BCTR generates bdnz to branch on nonzero CTR 1100 // to loop top. 1101 p = s.Prog(ppc64.ABC) 1102 p.From.Type = obj.TYPE_CONST 1103 p.From.Offset = ppc64.BO_BCTR 1104 p.Reg = ppc64.REG_R0 1105 p.To.Type = obj.TYPE_BRANCH 1106 gc.Patch(p, top) 1107 1108 // src_reg and dst_reg were incremented in the loop, so 1109 // later instructions start with offset 0. 1110 offset = int64(0) 1111 } 1112 1113 // No loop was generated for one iteration, so 1114 // add 32 bytes to the remainder to move those bytes. 1115 if ctr == 1 { 1116 rem += 32 1117 } 1118 1119 // Generate all the remaining load and store pairs, starting with 1120 // as many 8 byte moves as possible, then 4, 2, 1. 1121 for rem > 0 { 1122 op, size := ppc64.AMOVB, int64(1) 1123 switch { 1124 case rem >= 8: 1125 op, size = ppc64.AMOVD, 8 1126 case rem >= 4: 1127 op, size = ppc64.AMOVW, 4 1128 case rem >= 2: 1129 op, size = ppc64.AMOVH, 2 1130 } 1131 // Load 1132 p := s.Prog(op) 1133 p.To.Type = obj.TYPE_REG 1134 p.To.Reg = ppc64.REG_R7 1135 p.From.Type = obj.TYPE_MEM 1136 p.From.Reg = src_reg 1137 p.From.Offset = offset 1138 1139 // Store 1140 p = s.Prog(op) 1141 p.From.Type = obj.TYPE_REG 1142 p.From.Reg = ppc64.REG_R7 1143 p.To.Type = obj.TYPE_MEM 1144 p.To.Reg = dst_reg 1145 p.To.Offset = offset 1146 rem -= size 1147 offset += size 1148 } 1149 1150 case ssa.OpPPC64CALLstatic: 1151 s.Call(v) 1152 1153 case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: 1154 p := s.Prog(ppc64.AMOVD) 1155 p.From.Type = obj.TYPE_REG 1156 p.From.Reg = v.Args[0].Reg() 1157 p.To.Type = obj.TYPE_REG 1158 p.To.Reg = ppc64.REG_CTR 1159 1160 if v.Args[0].Reg() != ppc64.REG_R12 { 1161 v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg) 1162 } 1163 1164 pp := s.Call(v) 1165 pp.To.Reg = ppc64.REG_CTR 1166 1167 if gc.Ctxt.Flag_shared { 1168 // When compiling Go into PIC, the function we just 1169 // called via pointer might have been implemented in 1170 // a separate module and so overwritten the TOC 1171 // pointer in R2; reload it. 1172 q := s.Prog(ppc64.AMOVD) 1173 q.From.Type = obj.TYPE_MEM 1174 q.From.Offset = 24 1175 q.From.Reg = ppc64.REGSP 1176 q.To.Type = obj.TYPE_REG 1177 q.To.Reg = ppc64.REG_R2 1178 } 1179 1180 case ssa.OpPPC64LoweredWB: 1181 p := s.Prog(obj.ACALL) 1182 p.To.Type = obj.TYPE_MEM 1183 p.To.Name = obj.NAME_EXTERN 1184 p.To.Sym = v.Aux.(*obj.LSym) 1185 1186 case ssa.OpPPC64LoweredNilCheck: 1187 if objabi.GOOS == "aix" { 1188 // CMP Rarg0, R0 1189 // BNE 2(PC) 1190 // STW R0, 0(R0) 1191 // NOP (so the BNE has somewhere to land) 1192 1193 // CMP Rarg0, R0 1194 p := s.Prog(ppc64.ACMP) 1195 p.From.Type = obj.TYPE_REG 1196 p.From.Reg = v.Args[0].Reg() 1197 p.To.Type = obj.TYPE_REG 1198 p.To.Reg = ppc64.REG_R0 1199 1200 // BNE 2(PC) 1201 p2 := s.Prog(ppc64.ABNE) 1202 p2.To.Type = obj.TYPE_BRANCH 1203 1204 // STW R0, 0(R0) 1205 // Write at 0 is forbidden and will trigger a SIGSEGV 1206 p = s.Prog(ppc64.AMOVW) 1207 p.From.Type = obj.TYPE_REG 1208 p.From.Reg = ppc64.REG_R0 1209 p.To.Type = obj.TYPE_MEM 1210 p.To.Reg = ppc64.REG_R0 1211 1212 // NOP (so the BNE has somewhere to land) 1213 nop := s.Prog(obj.ANOP) 1214 gc.Patch(p2, nop) 1215 1216 } else { 1217 // Issue a load which will fault if arg is nil. 1218 p := s.Prog(ppc64.AMOVBZ) 1219 p.From.Type = obj.TYPE_MEM 1220 p.From.Reg = v.Args[0].Reg() 1221 gc.AddAux(&p.From, v) 1222 p.To.Type = obj.TYPE_REG 1223 p.To.Reg = ppc64.REGTMP 1224 } 1225 if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers 1226 gc.Warnl(v.Pos, "generated nil check") 1227 } 1228 1229 case ssa.OpPPC64InvertFlags: 1230 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) 1231 case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: 1232 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) 1233 case ssa.OpClobber: 1234 // TODO: implement for clobberdead experiment. Nop is ok for now. 1235 default: 1236 v.Fatalf("genValue not implemented: %s", v.LongString()) 1237 } 1238 } 1239 1240 var blockJump = [...]struct { 1241 asm, invasm obj.As 1242 asmeq, invasmun bool 1243 }{ 1244 ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false}, 1245 ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false}, 1246 1247 ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1248 ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false}, 1249 ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false}, 1250 ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1251 1252 // TODO: need to work FP comparisons into block jumps 1253 ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false}, 1254 ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN 1255 ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN 1256 ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false}, 1257 } 1258 1259 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { 1260 switch b.Kind { 1261 case ssa.BlockDefer: 1262 // defer returns in R3: 1263 // 0 if we should continue executing 1264 // 1 if we should jump to deferreturn call 1265 p := s.Prog(ppc64.ACMP) 1266 p.From.Type = obj.TYPE_REG 1267 p.From.Reg = ppc64.REG_R3 1268 p.To.Type = obj.TYPE_REG 1269 p.To.Reg = ppc64.REG_R0 1270 1271 p = s.Prog(ppc64.ABNE) 1272 p.To.Type = obj.TYPE_BRANCH 1273 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) 1274 if b.Succs[0].Block() != next { 1275 p := s.Prog(obj.AJMP) 1276 p.To.Type = obj.TYPE_BRANCH 1277 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1278 } 1279 1280 case ssa.BlockPlain: 1281 if b.Succs[0].Block() != next { 1282 p := s.Prog(obj.AJMP) 1283 p.To.Type = obj.TYPE_BRANCH 1284 s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) 1285 } 1286 case ssa.BlockExit: 1287 s.Prog(obj.AUNDEF) // tell plive.go that we never reach here 1288 case ssa.BlockRet: 1289 s.Prog(obj.ARET) 1290 case ssa.BlockRetJmp: 1291 p := s.Prog(obj.AJMP) 1292 p.To.Type = obj.TYPE_MEM 1293 p.To.Name = obj.NAME_EXTERN 1294 p.To.Sym = b.Aux.(*obj.LSym) 1295 1296 case ssa.BlockPPC64EQ, ssa.BlockPPC64NE, 1297 ssa.BlockPPC64LT, ssa.BlockPPC64GE, 1298 ssa.BlockPPC64LE, ssa.BlockPPC64GT, 1299 ssa.BlockPPC64FLT, ssa.BlockPPC64FGE, 1300 ssa.BlockPPC64FLE, ssa.BlockPPC64FGT: 1301 jmp := blockJump[b.Kind] 1302 switch next { 1303 case b.Succs[0].Block(): 1304 s.Br(jmp.invasm, b.Succs[1].Block()) 1305 if jmp.invasmun { 1306 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 1307 s.Br(ppc64.ABVS, b.Succs[1].Block()) 1308 } 1309 case b.Succs[1].Block(): 1310 s.Br(jmp.asm, b.Succs[0].Block()) 1311 if jmp.asmeq { 1312 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 1313 } 1314 default: 1315 if b.Likely != ssa.BranchUnlikely { 1316 s.Br(jmp.asm, b.Succs[0].Block()) 1317 if jmp.asmeq { 1318 s.Br(ppc64.ABEQ, b.Succs[0].Block()) 1319 } 1320 s.Br(obj.AJMP, b.Succs[1].Block()) 1321 } else { 1322 s.Br(jmp.invasm, b.Succs[1].Block()) 1323 if jmp.invasmun { 1324 // TODO: The second branch is probably predict-not-taken since it is for FP unordered 1325 s.Br(ppc64.ABVS, b.Succs[1].Block()) 1326 } 1327 s.Br(obj.AJMP, b.Succs[0].Block()) 1328 } 1329 } 1330 default: 1331 b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) 1332 } 1333 }