github.com/mh-cbon/go@v0.0.0-20160603070303-9e112a3fe4c0/src/cmd/compile/internal/s390x/ggen.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package s390x 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/internal/obj" 10 "cmd/internal/obj/s390x" 11 "fmt" 12 ) 13 14 // clearLoopCutOff is the (somewhat arbitrary) value above which it is better 15 // to have a loop of clear instructions (e.g. XCs) rather than just generating 16 // multiple instructions (i.e. loop unrolling). 17 // Must be between 256 and 4096. 18 const clearLoopCutoff = 1024 19 20 func defframe(ptxt *obj.Prog) { 21 // fill in argument size, stack size 22 ptxt.To.Type = obj.TYPE_TEXTSIZE 23 24 ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.ArgWidth(), int64(gc.Widthptr))) 25 frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg))) 26 ptxt.To.Offset = int64(frame) 27 28 // insert code to zero ambiguously live variables 29 // so that the garbage collector only sees initialized values 30 // when it looks for pointers. 31 p := ptxt 32 33 hi := int64(0) 34 lo := hi 35 36 // iterate through declarations - they are sorted in decreasing xoffset order. 37 for _, n := range gc.Curfn.Func.Dcl { 38 if !n.Name.Needzero { 39 continue 40 } 41 if n.Class != gc.PAUTO { 42 gc.Fatalf("needzero class %d", n.Class) 43 } 44 if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 { 45 gc.Fatalf("var %v has size %d offset %d", gc.Nconv(n, gc.FmtLong), int(n.Type.Width), int(n.Xoffset)) 46 } 47 48 if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthreg) { 49 // merge with range we already have 50 lo = n.Xoffset 51 52 continue 53 } 54 55 // zero old range 56 p = zerorange(p, int64(frame), lo, hi) 57 58 // set new range 59 hi = n.Xoffset + n.Type.Width 60 61 lo = n.Xoffset 62 } 63 64 // zero final range 65 zerorange(p, int64(frame), lo, hi) 66 } 67 68 // zerorange clears the stack in the given range. 69 func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { 70 cnt := hi - lo 71 if cnt == 0 { 72 return p 73 } 74 75 // Adjust the frame to account for LR. 76 frame += gc.Ctxt.FixedFrameSize() 77 offset := frame + lo 78 reg := int16(s390x.REGSP) 79 80 // If the offset cannot fit in a 12-bit unsigned displacement then we 81 // need to create a copy of the stack pointer that we can adjust. 82 // We also need to do this if we are going to loop. 83 if offset < 0 || offset > 4096-clearLoopCutoff || cnt > clearLoopCutoff { 84 p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, offset, obj.TYPE_REG, s390x.REGRT1, 0) 85 p.Reg = int16(s390x.REGSP) 86 reg = s390x.REGRT1 87 offset = 0 88 } 89 90 // Generate a loop of large clears. 91 if cnt > clearLoopCutoff { 92 n := cnt - (cnt % 256) 93 end := int16(s390x.REGRT2) 94 p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, offset+n, obj.TYPE_REG, end, 0) 95 p.Reg = reg 96 p = appendpp(p, s390x.AXC, obj.TYPE_MEM, reg, offset, obj.TYPE_MEM, reg, offset) 97 p.From3 = new(obj.Addr) 98 p.From3.Type = obj.TYPE_CONST 99 p.From3.Offset = 256 100 pl := p 101 p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, 256, obj.TYPE_REG, reg, 0) 102 p = appendpp(p, s390x.ACMP, obj.TYPE_REG, reg, 0, obj.TYPE_REG, end, 0) 103 p = appendpp(p, s390x.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) 104 gc.Patch(p, pl) 105 106 cnt -= n 107 } 108 109 // Generate remaining clear instructions without a loop. 110 for cnt > 0 { 111 n := cnt 112 113 // Can clear at most 256 bytes per instruction. 114 if n > 256 { 115 n = 256 116 } 117 118 switch n { 119 // Handle very small clears with move instructions. 120 case 8, 4, 2, 1: 121 ins := s390x.AMOVB 122 switch n { 123 case 8: 124 ins = s390x.AMOVD 125 case 4: 126 ins = s390x.AMOVW 127 case 2: 128 ins = s390x.AMOVH 129 } 130 p = appendpp(p, ins, obj.TYPE_CONST, 0, 0, obj.TYPE_MEM, reg, offset) 131 132 // Handle clears that would require multiple move instructions with XC. 133 default: 134 p = appendpp(p, s390x.AXC, obj.TYPE_MEM, reg, offset, obj.TYPE_MEM, reg, offset) 135 p.From3 = new(obj.Addr) 136 p.From3.Type = obj.TYPE_CONST 137 p.From3.Offset = n 138 } 139 140 cnt -= n 141 offset += n 142 } 143 144 return p 145 } 146 147 func appendpp(p *obj.Prog, as obj.As, ftype obj.AddrType, freg int16, foffset int64, ttype obj.AddrType, treg int16, toffset int64) *obj.Prog { 148 q := gc.Ctxt.NewProg() 149 gc.Clearp(q) 150 q.As = as 151 q.Lineno = p.Lineno 152 q.From.Type = ftype 153 q.From.Reg = freg 154 q.From.Offset = foffset 155 q.To.Type = ttype 156 q.To.Reg = treg 157 q.To.Offset = toffset 158 q.Link = p.Link 159 p.Link = q 160 return q 161 } 162 163 func ginsnop() { 164 var reg gc.Node 165 gc.Nodreg(®, gc.Types[gc.TINT], s390x.REG_R0) 166 gins(s390x.AOR, ®, ®) 167 } 168 169 var panicdiv *gc.Node 170 171 /* 172 * generate division. 173 * generates one of: 174 * res = nl / nr 175 * res = nl % nr 176 * according to op. 177 */ 178 func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) { 179 // Have to be careful about handling 180 // most negative int divided by -1 correctly. 181 // The hardware will generate undefined result. 182 // Also need to explicitly trap on division on zero, 183 // the hardware will silently generate undefined result. 184 // DIVW will leave unpredicable result in higher 32-bit, 185 // so always use DIVD/DIVDU. 186 t := nl.Type 187 188 t0 := t 189 check := 0 190 if t.IsSigned() { 191 check = 1 192 if gc.Isconst(nl, gc.CTINT) && nl.Int64() != -(1<<uint64(t.Width*8-1)) { 193 check = 0 194 } else if gc.Isconst(nr, gc.CTINT) && nr.Int64() != -1 { 195 check = 0 196 } 197 } 198 199 if t.Width < 8 { 200 if t.IsSigned() { 201 t = gc.Types[gc.TINT64] 202 } else { 203 t = gc.Types[gc.TUINT64] 204 } 205 check = 0 206 } 207 208 a := optoas(gc.ODIV, t) 209 210 var tl gc.Node 211 gc.Regalloc(&tl, t0, nil) 212 var tr gc.Node 213 gc.Regalloc(&tr, t0, nil) 214 if nl.Ullman >= nr.Ullman { 215 gc.Cgen(nl, &tl) 216 gc.Cgen(nr, &tr) 217 } else { 218 gc.Cgen(nr, &tr) 219 gc.Cgen(nl, &tl) 220 } 221 222 if t != t0 { 223 // Convert 224 tl2 := tl 225 226 tr2 := tr 227 tl.Type = t 228 tr.Type = t 229 gmove(&tl2, &tl) 230 gmove(&tr2, &tr) 231 } 232 233 // Handle divide-by-zero panic. 234 p1 := gins(optoas(gc.OCMP, t), &tr, nil) 235 236 p1.To.Type = obj.TYPE_REG 237 p1.To.Reg = s390x.REGZERO 238 p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1) 239 if panicdiv == nil { 240 panicdiv = gc.Sysfunc("panicdivide") 241 } 242 gc.Ginscall(panicdiv, -1) 243 gc.Patch(p1, gc.Pc) 244 245 var p2 *obj.Prog 246 if check != 0 { 247 var nm1 gc.Node 248 gc.Nodconst(&nm1, t, -1) 249 gins(optoas(gc.OCMP, t), &tr, &nm1) 250 p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) 251 if op == gc.ODIV { 252 // a / (-1) is -a. 253 gins(optoas(gc.OMINUS, t), nil, &tl) 254 255 gmove(&tl, res) 256 } else { 257 // a % (-1) is 0. 258 var nz gc.Node 259 gc.Nodconst(&nz, t, 0) 260 261 gmove(&nz, res) 262 } 263 264 p2 = gc.Gbranch(obj.AJMP, nil, 0) 265 gc.Patch(p1, gc.Pc) 266 } 267 268 p1 = gins(a, &tr, &tl) 269 if op == gc.ODIV { 270 gc.Regfree(&tr) 271 gmove(&tl, res) 272 } else { 273 // A%B = A-(A/B*B) 274 var tm gc.Node 275 gc.Regalloc(&tm, t, nil) 276 277 // patch div to use the 3 register form 278 // TODO(minux): add gins3? 279 p1.Reg = p1.To.Reg 280 281 p1.To.Reg = tm.Reg 282 gins(optoas(gc.OMUL, t), &tr, &tm) 283 gc.Regfree(&tr) 284 gins(optoas(gc.OSUB, t), &tm, &tl) 285 gc.Regfree(&tm) 286 gmove(&tl, res) 287 } 288 289 gc.Regfree(&tl) 290 if check != 0 { 291 gc.Patch(p2, gc.Pc) 292 } 293 } 294 295 /* 296 * generate high multiply: 297 * res = (nl*nr) >> width 298 */ 299 func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { 300 // largest ullman on left. 301 if nl.Ullman < nr.Ullman { 302 nl, nr = nr, nl 303 } 304 305 t := nl.Type 306 w := int(t.Width) * 8 307 var n1 gc.Node 308 gc.Cgenr(nl, &n1, res) 309 var n2 gc.Node 310 gc.Cgenr(nr, &n2, nil) 311 switch gc.Simtype[t.Etype] { 312 case gc.TINT8, 313 gc.TINT16, 314 gc.TINT32: 315 gins(optoas(gc.OMUL, t), &n2, &n1) 316 p := gins(s390x.ASRAD, nil, &n1) 317 p.From.Type = obj.TYPE_CONST 318 p.From.Offset = int64(w) 319 320 case gc.TUINT8, 321 gc.TUINT16, 322 gc.TUINT32: 323 gins(optoas(gc.OMUL, t), &n2, &n1) 324 p := gins(s390x.ASRD, nil, &n1) 325 p.From.Type = obj.TYPE_CONST 326 p.From.Offset = int64(w) 327 328 case gc.TINT64: 329 gins(s390x.AMULHD, &n2, &n1) 330 331 case gc.TUINT64: 332 gins(s390x.AMULHDU, &n2, &n1) 333 334 default: 335 gc.Fatalf("cgen_hmul %v", t) 336 } 337 338 gc.Cgen(&n1, res) 339 gc.Regfree(&n1) 340 gc.Regfree(&n2) 341 } 342 343 /* 344 * generate shift according to op, one of: 345 * res = nl << nr 346 * res = nl >> nr 347 */ 348 func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { 349 a := optoas(op, nl.Type) 350 351 if nr.Op == gc.OLITERAL { 352 var n1 gc.Node 353 gc.Regalloc(&n1, nl.Type, res) 354 gc.Cgen(nl, &n1) 355 sc := uint64(nr.Int64()) 356 if sc >= uint64(nl.Type.Width*8) { 357 // large shift gets 2 shifts by width-1 358 var n3 gc.Node 359 gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) 360 361 gins(a, &n3, &n1) 362 gins(a, &n3, &n1) 363 } else { 364 gins(a, nr, &n1) 365 } 366 gmove(&n1, res) 367 gc.Regfree(&n1) 368 return 369 } 370 371 if nl.Ullman >= gc.UINF { 372 var n4 gc.Node 373 gc.Tempname(&n4, nl.Type) 374 gc.Cgen(nl, &n4) 375 nl = &n4 376 } 377 378 if nr.Ullman >= gc.UINF { 379 var n5 gc.Node 380 gc.Tempname(&n5, nr.Type) 381 gc.Cgen(nr, &n5) 382 nr = &n5 383 } 384 385 // Allow either uint32 or uint64 as shift type, 386 // to avoid unnecessary conversion from uint32 to uint64 387 // just to do the comparison. 388 tcount := gc.Types[gc.Simtype[nr.Type.Etype]] 389 390 if tcount.Etype < gc.TUINT32 { 391 tcount = gc.Types[gc.TUINT32] 392 } 393 394 var n1 gc.Node 395 gc.Regalloc(&n1, nr.Type, nil) // to hold the shift type in CX 396 var n3 gc.Node 397 gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX 398 399 var n2 gc.Node 400 gc.Regalloc(&n2, nl.Type, res) 401 402 if nl.Ullman >= nr.Ullman { 403 gc.Cgen(nl, &n2) 404 gc.Cgen(nr, &n1) 405 gmove(&n1, &n3) 406 } else { 407 gc.Cgen(nr, &n1) 408 gmove(&n1, &n3) 409 gc.Cgen(nl, &n2) 410 } 411 412 gc.Regfree(&n3) 413 414 // test and fix up large shifts 415 if !bounded { 416 gc.Nodconst(&n3, tcount, nl.Type.Width*8) 417 gins(optoas(gc.OCMP, tcount), &n1, &n3) 418 p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, 1) 419 if op == gc.ORSH && nl.Type.IsSigned() { 420 gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) 421 gins(a, &n3, &n2) 422 } else { 423 gc.Nodconst(&n3, nl.Type, 0) 424 gmove(&n3, &n2) 425 } 426 427 gc.Patch(p1, gc.Pc) 428 } 429 430 gins(a, &n1, &n2) 431 432 gmove(&n2, res) 433 434 gc.Regfree(&n1) 435 gc.Regfree(&n2) 436 } 437 438 // clearfat clears (i.e. replaces with zeros) the value pointed to by nl. 439 func clearfat(nl *gc.Node) { 440 if gc.Debug['g'] != 0 { 441 fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) 442 } 443 444 // Avoid taking the address for simple enough types. 445 if gc.Componentgen(nil, nl) { 446 return 447 } 448 449 var dst gc.Node 450 gc.Regalloc(&dst, gc.Types[gc.Tptr], nil) 451 gc.Agen(nl, &dst) 452 453 var boff int64 454 w := nl.Type.Width 455 if w > clearLoopCutoff { 456 // Generate a loop clearing 256 bytes per iteration using XCs. 457 var end gc.Node 458 gc.Regalloc(&end, gc.Types[gc.Tptr], nil) 459 p := gins(s390x.AMOVD, &dst, &end) 460 p.From.Type = obj.TYPE_ADDR 461 p.From.Offset = w - (w % 256) 462 463 p = gins(s390x.AXC, &dst, &dst) 464 p.From.Type = obj.TYPE_MEM 465 p.From.Offset = 0 466 p.To.Type = obj.TYPE_MEM 467 p.To.Offset = 0 468 p.From3 = new(obj.Addr) 469 p.From3.Offset = 256 470 p.From3.Type = obj.TYPE_CONST 471 pl := p 472 473 ginscon(s390x.AADD, 256, &dst) 474 gins(s390x.ACMP, &dst, &end) 475 gc.Patch(gc.Gbranch(s390x.ABNE, nil, 0), pl) 476 gc.Regfree(&end) 477 w = w % 256 478 } 479 480 // Generate instructions to clear the remaining memory. 481 for w > 0 { 482 n := w 483 484 // Can clear at most 256 bytes per instruction. 485 if n > 256 { 486 n = 256 487 } 488 489 switch n { 490 // Handle very small clears using moves. 491 case 8, 4, 2, 1: 492 ins := s390x.AMOVB 493 switch n { 494 case 8: 495 ins = s390x.AMOVD 496 case 4: 497 ins = s390x.AMOVW 498 case 2: 499 ins = s390x.AMOVH 500 } 501 p := gins(ins, nil, &dst) 502 p.From.Type = obj.TYPE_CONST 503 p.From.Offset = 0 504 p.To.Type = obj.TYPE_MEM 505 p.To.Offset = boff 506 507 // Handle clears that would require multiple moves with a XC. 508 default: 509 p := gins(s390x.AXC, &dst, &dst) 510 p.From.Type = obj.TYPE_MEM 511 p.From.Offset = boff 512 p.To.Type = obj.TYPE_MEM 513 p.To.Offset = boff 514 p.From3 = new(obj.Addr) 515 p.From3.Offset = n 516 p.From3.Type = obj.TYPE_CONST 517 } 518 519 boff += n 520 w -= n 521 } 522 523 gc.Regfree(&dst) 524 } 525 526 // Called after regopt and peep have run. 527 // Expand CHECKNIL pseudo-op into actual nil pointer check. 528 func expandchecks(firstp *obj.Prog) { 529 for p := firstp; p != nil; p = p.Link { 530 if gc.Debug_checknil != 0 && gc.Ctxt.Debugvlog != 0 { 531 fmt.Printf("expandchecks: %v\n", p) 532 } 533 if p.As != obj.ACHECKNIL { 534 continue 535 } 536 if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers 537 gc.Warnl(p.Lineno, "generated nil check") 538 } 539 if p.From.Type != obj.TYPE_REG { 540 gc.Fatalf("invalid nil check %v\n", p) 541 } 542 543 // check is 544 // CMPBNE arg, $0, 2(PC) [likely] 545 // MOVD R0, 0(R0) 546 p1 := gc.Ctxt.NewProg() 547 548 gc.Clearp(p1) 549 p1.Link = p.Link 550 p.Link = p1 551 p1.Lineno = p.Lineno 552 p1.Pc = 9999 553 p.As = s390x.ACMPBNE 554 p.From3 = new(obj.Addr) 555 p.From3.Type = obj.TYPE_CONST 556 p.From3.Offset = 0 557 558 p.To.Type = obj.TYPE_BRANCH 559 p.To.Val = p1.Link 560 561 // crash by write to memory address 0. 562 p1.As = s390x.AMOVD 563 564 p1.From.Type = obj.TYPE_REG 565 p1.From.Reg = s390x.REGZERO 566 p1.To.Type = obj.TYPE_MEM 567 p1.To.Reg = s390x.REGZERO 568 p1.To.Offset = 0 569 } 570 } 571 572 // res = runtime.getg() 573 func getg(res *gc.Node) { 574 var n1 gc.Node 575 gc.Nodreg(&n1, res.Type, s390x.REGG) 576 gmove(&n1, res) 577 }