github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/cmd/9g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog *appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset); 13 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi); 14 15 void 16 defframe(Prog *ptxt) 17 { 18 uint32 frame; 19 Prog *p; 20 vlong hi, lo; 21 NodeList *l; 22 Node *n; 23 24 // fill in argument size 25 ptxt->to.offset = rnd(curfn->type->argwid, widthptr); 26 27 // fill in final stack size 28 ptxt->to.offset <<= 32; 29 frame = rnd(stksize+maxarg, widthreg); 30 ptxt->to.offset |= frame; 31 32 // insert code to zero ambiguously live variables 33 // so that the garbage collector only sees initialized values 34 // when it looks for pointers. 35 p = ptxt; 36 lo = hi = 0; 37 // iterate through declarations - they are sorted in decreasing xoffset order. 38 for(l=curfn->dcl; l != nil; l = l->next) { 39 n = l->n; 40 if(!n->needzero) 41 continue; 42 if(n->class != PAUTO) 43 fatal("needzero class %d", n->class); 44 if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) 45 fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); 46 47 if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) { 48 // merge with range we already have 49 lo = n->xoffset; 50 continue; 51 } 52 // zero old range 53 p = zerorange(p, frame, lo, hi); 54 55 // set new range 56 hi = n->xoffset + n->type->width; 57 lo = n->xoffset; 58 } 59 // zero final range 60 zerorange(p, frame, lo, hi); 61 } 62 63 static Prog* 64 zerorange(Prog *p, vlong frame, vlong lo, vlong hi) 65 { 66 vlong cnt, i; 67 Prog *p1; 68 Node *f; 69 70 cnt = hi - lo; 71 if(cnt == 0) 72 return p; 73 if(cnt < 4*widthptr) { 74 for(i = 0; i < cnt; i += widthptr) 75 p = appendpp(p, AMOVD, D_REG, REGZERO, 0, D_OREG, REGSP, 8+frame+lo+i); 76 } else if(cnt <= 128*widthptr) { 77 p = appendpp(p, AADD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGRT1, 0); 78 p->reg = REGSP; 79 p = appendpp(p, ADUFFZERO, D_NONE, NREG, 0, D_OREG, NREG, 0); 80 f = sysfunc("duffzero"); 81 naddr(f, &p->to, 1); 82 afunclit(&p->to, f); 83 p->to.offset = 4*(128-cnt/widthptr); 84 } else { 85 p = appendpp(p, AMOVD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGTMP, 0); 86 p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT1, 0); 87 p->reg = REGSP; 88 p = appendpp(p, AMOVD, D_CONST, NREG, cnt, D_REG, REGTMP, 0); 89 p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT2, 0); 90 p->reg = REGRT1; 91 p1 = p = appendpp(p, AMOVDU, D_REG, REGZERO, 0, D_OREG, REGRT1, widthptr); 92 p = appendpp(p, ACMP, D_REG, REGRT1, 0, D_REG, REGRT2, 0); 93 p = appendpp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0); 94 patch(p, p1); 95 } 96 return p; 97 } 98 99 static Prog* 100 appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset) 101 { 102 Prog *q; 103 q = mal(sizeof(*q)); 104 clearp(q); 105 q->as = as; 106 q->lineno = p->lineno; 107 q->from.type = ftype; 108 q->from.reg = freg; 109 q->from.offset = foffset; 110 q->to.type = ttype; 111 q->to.reg = treg; 112 q->to.offset = toffset; 113 q->link = p->link; 114 p->link = q; 115 return q; 116 } 117 118 // Sweep the prog list to mark any used nodes. 119 void 120 markautoused(Prog *p) 121 { 122 for (; p; p = p->link) { 123 if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL) 124 continue; 125 126 if (p->from.node) 127 p->from.node->used = 1; 128 129 if (p->to.node) 130 p->to.node->used = 1; 131 } 132 } 133 134 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 135 void 136 fixautoused(Prog *p) 137 { 138 Prog **lp; 139 140 for (lp=&p; (p=*lp) != P; ) { 141 if (p->as == ATYPE && p->from.node && p->from.name == D_AUTO && !p->from.node->used) { 142 *lp = p->link; 143 continue; 144 } 145 if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) { 146 // Cannot remove VARDEF instruction, because - unlike TYPE handled above - 147 // VARDEFs are interspersed with other code, and a jump might be using the 148 // VARDEF as a target. Replace with a no-op instead. A later pass will remove 149 // the no-ops. 150 p->to.type = D_NONE; 151 p->to.node = N; 152 p->as = ANOP; 153 continue; 154 } 155 if (p->from.name == D_AUTO && p->from.node) 156 p->from.offset += p->from.node->stkdelta; 157 158 if (p->to.name == D_AUTO && p->to.node) 159 p->to.offset += p->to.node->stkdelta; 160 161 lp = &p->link; 162 } 163 } 164 165 /* 166 * generate: BL reg, f 167 * where both reg and f are registers. 168 * On power, f must be moved to CTR first. 169 */ 170 static void 171 ginsBL(Node *reg, Node *f) 172 { 173 Prog *p; 174 p = gins(AMOVD, f, N); 175 p->to.type = D_SPR; 176 p->to.offset = D_CTR; 177 p = gins(ABL, reg, N); 178 p->to.type = D_SPR; 179 p->to.offset = D_CTR; 180 } 181 182 /* 183 * generate: 184 * call f 185 * proc=-1 normal call but no return 186 * proc=0 normal call 187 * proc=1 goroutine run in new proc 188 * proc=2 defer call save away stack 189 * proc=3 normal call to C pointer (not Go func value) 190 */ 191 void 192 ginscall(Node *f, int proc) 193 { 194 Prog *p; 195 Node reg, con, reg2; 196 Node r1; 197 int32 extra; 198 199 if(f->type != T) { 200 extra = 0; 201 if(proc == 1 || proc == 2) 202 extra = 2 * widthptr; 203 setmaxarg(f->type, extra); 204 } 205 206 switch(proc) { 207 default: 208 fatal("ginscall: bad proc %d", proc); 209 break; 210 211 case 0: // normal call 212 case -1: // normal call but no return 213 if(f->op == ONAME && f->class == PFUNC) { 214 if(f == deferreturn) { 215 // Deferred calls will appear to be returning to 216 // the CALL deferreturn(SB) that we are about to emit. 217 // However, the stack trace code will show the line 218 // of the instruction byte before the return PC. 219 // To avoid that being an unrelated instruction, 220 // insert a ppc64 NOP that we will have the right line number. 221 // The ppc64 NOP is really or r0, r0, r0; use that description 222 // because the NOP pseudo-instruction would be removed by 223 // the linker. 224 nodreg(®, types[TINT], D_R0); 225 gins(AOR, ®, ®); 226 } 227 p = gins(ABL, N, f); 228 afunclit(&p->to, f); 229 if(proc == -1 || noreturn(p)) 230 gins(AUNDEF, N, N); 231 break; 232 } 233 nodreg(®, types[tptr], D_R0+REGENV); 234 nodreg(&r1, types[tptr], D_R0+3); 235 gmove(f, ®); 236 reg.op = OINDREG; 237 gmove(®, &r1); 238 reg.op = OREGISTER; 239 ginsBL(®, &r1); 240 break; 241 242 case 3: // normal call of c function pointer 243 ginsBL(N, f); 244 break; 245 246 case 1: // call in new proc (go) 247 case 2: // deferred call (defer) 248 nodconst(&con, types[TINT64], argsize(f->type)); 249 nodreg(®, types[TINT64], D_R0+3); 250 nodreg(®2, types[TINT64], D_R0+4); 251 gmove(f, ®); 252 253 gmove(&con, ®2); 254 p = gins(AMOVW, ®2, N); 255 p->to.type = D_OREG; 256 p->to.reg = REGSP; 257 p->to.offset = 8; 258 259 p = gins(AMOVD, ®, N); 260 p->to.type = D_OREG; 261 p->to.reg = REGSP; 262 p->to.offset = 16; 263 264 if(proc == 1) 265 ginscall(newproc, 0); 266 else { 267 if(!hasdefer) 268 fatal("hasdefer=0 but has defer"); 269 ginscall(deferproc, 0); 270 } 271 272 if(proc == 2) { 273 nodreg(®, types[TINT64], D_R0+3); 274 p = gins(ACMP, ®, N); 275 p->to.type = D_REG; 276 p->to.reg = D_R0; 277 p = gbranch(ABEQ, T, +1); 278 cgen_ret(N); 279 patch(p, pc); 280 } 281 break; 282 } 283 } 284 285 /* 286 * n is call to interface method. 287 * generate res = n. 288 */ 289 void 290 cgen_callinter(Node *n, Node *res, int proc) 291 { 292 Node *i, *f; 293 Node tmpi, nodi, nodo, nodr, nodsp; 294 Prog *p; 295 296 i = n->left; 297 if(i->op != ODOTINTER) 298 fatal("cgen_callinter: not ODOTINTER %O", i->op); 299 300 f = i->right; // field 301 if(f->op != ONAME) 302 fatal("cgen_callinter: not ONAME %O", f->op); 303 304 i = i->left; // interface 305 306 if(!i->addable) { 307 tempname(&tmpi, i->type); 308 cgen(i, &tmpi); 309 i = &tmpi; 310 } 311 312 genlist(n->list); // assign the args 313 314 // i is now addable, prepare an indirected 315 // register to hold its address. 316 igen(i, &nodi, res); // REG = &inter 317 318 nodindreg(&nodsp, types[tptr], D_R0+REGSP); 319 nodsp.xoffset = widthptr; 320 if(proc != 0) 321 nodsp.xoffset += 2 * widthptr; // leave room for size & fn 322 nodi.type = types[tptr]; 323 nodi.xoffset += widthptr; 324 cgen(&nodi, &nodsp); // {8 or 24}(SP) = 8(REG) -- i.data 325 326 regalloc(&nodo, types[tptr], res); 327 nodi.type = types[tptr]; 328 nodi.xoffset -= widthptr; 329 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 330 regfree(&nodi); 331 332 regalloc(&nodr, types[tptr], &nodo); 333 if(n->left->xoffset == BADWIDTH) 334 fatal("cgen_callinter: badwidth"); 335 cgen_checknil(&nodo); // in case offset is huge 336 nodo.op = OINDREG; 337 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 338 if(proc == 0) { 339 // plain call: use direct c function pointer - more efficient 340 cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] 341 proc = 3; 342 } else { 343 // go/defer. generate go func value. 344 p = gins(AMOVD, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] 345 p->from.type = D_CONST; 346 } 347 348 nodr.type = n->left->type; 349 ginscall(&nodr, proc); 350 351 regfree(&nodr); 352 regfree(&nodo); 353 } 354 355 /* 356 * generate function call; 357 * proc=0 normal call 358 * proc=1 goroutine run in new proc 359 * proc=2 defer call save away stack 360 */ 361 void 362 cgen_call(Node *n, int proc) 363 { 364 Type *t; 365 Node nod, afun; 366 367 if(n == N) 368 return; 369 370 if(n->left->ullman >= UINF) { 371 // if name involves a fn call 372 // precompute the address of the fn 373 tempname(&afun, types[tptr]); 374 cgen(n->left, &afun); 375 } 376 377 genlist(n->list); // assign the args 378 t = n->left->type; 379 380 // call tempname pointer 381 if(n->left->ullman >= UINF) { 382 regalloc(&nod, types[tptr], N); 383 cgen_as(&nod, &afun); 384 nod.type = t; 385 ginscall(&nod, proc); 386 regfree(&nod); 387 return; 388 } 389 390 // call pointer 391 if(n->left->op != ONAME || n->left->class != PFUNC) { 392 regalloc(&nod, types[tptr], N); 393 cgen_as(&nod, n->left); 394 nod.type = t; 395 ginscall(&nod, proc); 396 regfree(&nod); 397 return; 398 } 399 400 // call direct 401 n->left->method = 1; 402 ginscall(n->left, proc); 403 } 404 405 /* 406 * call to n has already been generated. 407 * generate: 408 * res = return value from call. 409 */ 410 void 411 cgen_callret(Node *n, Node *res) 412 { 413 Node nod; 414 Type *fp, *t; 415 Iter flist; 416 417 t = n->left->type; 418 if(t->etype == TPTR32 || t->etype == TPTR64) 419 t = t->type; 420 421 fp = structfirst(&flist, getoutarg(t)); 422 if(fp == T) 423 fatal("cgen_callret: nil"); 424 425 memset(&nod, 0, sizeof(nod)); 426 nod.op = OINDREG; 427 nod.val.u.reg = D_R0+REGSP; 428 nod.addable = 1; 429 430 nod.xoffset = fp->width + widthptr; // +widthptr: saved LR at 0(R1) 431 nod.type = fp->type; 432 cgen_as(res, &nod); 433 } 434 435 /* 436 * call to n has already been generated. 437 * generate: 438 * res = &return value from call. 439 */ 440 void 441 cgen_aret(Node *n, Node *res) 442 { 443 Node nod1, nod2; 444 Type *fp, *t; 445 Iter flist; 446 447 t = n->left->type; 448 if(isptr[t->etype]) 449 t = t->type; 450 451 fp = structfirst(&flist, getoutarg(t)); 452 if(fp == T) 453 fatal("cgen_aret: nil"); 454 455 memset(&nod1, 0, sizeof(nod1)); 456 nod1.op = OINDREG; 457 nod1.val.u.reg = D_R0 + REGSP; 458 nod1.addable = 1; 459 460 nod1.xoffset = fp->width + widthptr; // +widthptr: saved lr at 0(SP) 461 nod1.type = fp->type; 462 463 if(res->op != OREGISTER) { 464 regalloc(&nod2, types[tptr], res); 465 agen(&nod1, &nod2); 466 gins(AMOVD, &nod2, res); 467 regfree(&nod2); 468 } else 469 agen(&nod1, res); 470 } 471 472 /* 473 * generate return. 474 * n->left is assignments to return values. 475 */ 476 void 477 cgen_ret(Node *n) 478 { 479 Prog *p; 480 481 if(n != N) 482 genlist(n->list); // copy out args 483 if(hasdefer) 484 ginscall(deferreturn, 0); 485 genlist(curfn->exit); 486 p = gins(ARET, N, N); 487 if(n != N && n->op == ORETJMP) { 488 p->to.name = D_EXTERN; 489 p->to.type = D_CONST; 490 p->to.sym = linksym(n->left->sym); 491 } 492 } 493 494 void 495 cgen_asop(Node *n) 496 { 497 USED(n); 498 fatal("cgen_asop"); // no longer used 499 } 500 501 int 502 samereg(Node *a, Node *b) 503 { 504 if(a == N || b == N) 505 return 0; 506 if(a->op != OREGISTER) 507 return 0; 508 if(b->op != OREGISTER) 509 return 0; 510 if(a->val.u.reg != b->val.u.reg) 511 return 0; 512 return 1; 513 } 514 515 /* 516 * generate division. 517 * generates one of: 518 * res = nl / nr 519 * res = nl % nr 520 * according to op. 521 */ 522 void 523 dodiv(int op, Node *nl, Node *nr, Node *res) 524 { 525 int a, check; 526 Type *t, *t0; 527 Node tl, tr, tl2, tr2, nm1, nz, tm; 528 Prog *p1, *p2; 529 530 // Have to be careful about handling 531 // most negative int divided by -1 correctly. 532 // The hardware will generate undefined result. 533 // Also need to explicitly trap on division on zero, 534 // the hardware will silently generate undefined result. 535 // DIVW will leave unpredicable result in higher 32-bit, 536 // so always use DIVD/DIVDU. 537 t = nl->type; 538 t0 = t; 539 check = 0; 540 if(issigned[t->etype]) { 541 check = 1; 542 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1))) 543 check = 0; 544 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 545 check = 0; 546 } 547 if(t->width < 8) { 548 if(issigned[t->etype]) 549 t = types[TINT64]; 550 else 551 t = types[TUINT64]; 552 check = 0; 553 } 554 555 a = optoas(ODIV, t); 556 557 regalloc(&tl, t0, N); 558 regalloc(&tr, t0, N); 559 if(nl->ullman >= nr->ullman) { 560 cgen(nl, &tl); 561 cgen(nr, &tr); 562 } else { 563 cgen(nr, &tr); 564 cgen(nl, &tl); 565 } 566 if(t != t0) { 567 // Convert 568 tl2 = tl; 569 tr2 = tr; 570 tl.type = t; 571 tr.type = t; 572 gmove(&tl2, &tl); 573 gmove(&tr2, &tr); 574 } 575 576 // Handle divide-by-zero panic. 577 p1 = gins(optoas(OCMP, t), &tr, N); 578 p1->to.type = D_REG; 579 p1->to.reg = REGZERO; 580 p1 = gbranch(optoas(ONE, t), T, +1); 581 if(panicdiv == N) 582 panicdiv = sysfunc("panicdivide"); 583 ginscall(panicdiv, -1); 584 patch(p1, pc); 585 586 if(check) { 587 nodconst(&nm1, t, -1); 588 gins(optoas(OCMP, t), &tr, &nm1); 589 p1 = gbranch(optoas(ONE, t), T, +1); 590 if(op == ODIV) { 591 // a / (-1) is -a. 592 gins(optoas(OMINUS, t), N, &tl); 593 gmove(&tl, res); 594 } else { 595 // a % (-1) is 0. 596 nodconst(&nz, t, 0); 597 gmove(&nz, res); 598 } 599 p2 = gbranch(AJMP, T, 0); 600 patch(p1, pc); 601 } 602 p1 = gins(a, &tr, &tl); 603 if(op == ODIV) { 604 regfree(&tr); 605 gmove(&tl, res); 606 } else { 607 // A%B = A-(A/B*B) 608 regalloc(&tm, t, N); 609 // patch div to use the 3 register form 610 // TODO(minux): add gins3? 611 p1->reg = p1->to.reg; 612 p1->to.reg = tm.val.u.reg; 613 gins(optoas(OMUL, t), &tr, &tm); 614 regfree(&tr); 615 gins(optoas(OSUB, t), &tm, &tl); 616 regfree(&tm); 617 gmove(&tl, res); 618 } 619 regfree(&tl); 620 if(check) 621 patch(p2, pc); 622 } 623 624 /* 625 * generate division according to op, one of: 626 * res = nl / nr 627 * res = nl % nr 628 */ 629 void 630 cgen_div(int op, Node *nl, Node *nr, Node *res) 631 { 632 Node n1, n2, n3; 633 int w, a; 634 Magic m; 635 636 // TODO(minux): enable division by magic multiply (also need to fix longmod below) 637 //if(nr->op != OLITERAL) 638 goto longdiv; 639 w = nl->type->width*8; 640 641 // Front end handled 32-bit division. We only need to handle 64-bit. 642 // try to do division by multiply by (2^w)/d 643 // see hacker's delight chapter 10 644 switch(simtype[nl->type->etype]) { 645 default: 646 goto longdiv; 647 648 case TUINT64: 649 m.w = w; 650 m.ud = mpgetfix(nr->val.u.xval); 651 umagic(&m); 652 if(m.bad) 653 break; 654 if(op == OMOD) 655 goto longmod; 656 657 cgenr(nl, &n1, N); 658 nodconst(&n2, nl->type, m.um); 659 regalloc(&n3, nl->type, res); 660 cgen_hmul(&n1, &n2, &n3); 661 662 if(m.ua) { 663 // need to add numerator accounting for overflow 664 gins(optoas(OADD, nl->type), &n1, &n3); 665 nodconst(&n2, nl->type, 1); 666 gins(optoas(ORROTC, nl->type), &n2, &n3); 667 nodconst(&n2, nl->type, m.s-1); 668 gins(optoas(ORSH, nl->type), &n2, &n3); 669 } else { 670 nodconst(&n2, nl->type, m.s); 671 gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx 672 } 673 674 gmove(&n3, res); 675 regfree(&n1); 676 regfree(&n3); 677 return; 678 679 case TINT64: 680 m.w = w; 681 m.sd = mpgetfix(nr->val.u.xval); 682 smagic(&m); 683 if(m.bad) 684 break; 685 if(op == OMOD) 686 goto longmod; 687 688 cgenr(nl, &n1, res); 689 nodconst(&n2, nl->type, m.sm); 690 regalloc(&n3, nl->type, N); 691 cgen_hmul(&n1, &n2, &n3); 692 693 if(m.sm < 0) { 694 // need to add numerator 695 gins(optoas(OADD, nl->type), &n1, &n3); 696 } 697 698 nodconst(&n2, nl->type, m.s); 699 gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 700 701 nodconst(&n2, nl->type, w-1); 702 gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg 703 gins(optoas(OSUB, nl->type), &n1, &n3); // added 704 705 if(m.sd < 0) { 706 // this could probably be removed 707 // by factoring it into the multiplier 708 gins(optoas(OMINUS, nl->type), N, &n3); 709 } 710 711 gmove(&n3, res); 712 regfree(&n1); 713 regfree(&n3); 714 return; 715 } 716 goto longdiv; 717 718 longdiv: 719 // division and mod using (slow) hardware instruction 720 dodiv(op, nl, nr, res); 721 return; 722 723 longmod: 724 // mod using formula A%B = A-(A/B*B) but 725 // we know that there is a fast algorithm for A/B 726 regalloc(&n1, nl->type, res); 727 cgen(nl, &n1); 728 regalloc(&n2, nl->type, N); 729 cgen_div(ODIV, &n1, nr, &n2); 730 a = optoas(OMUL, nl->type); 731 if(w == 8) { 732 // use 2-operand 16-bit multiply 733 // because there is no 2-operand 8-bit multiply 734 //a = AIMULW; 735 } 736 if(!smallintconst(nr)) { 737 regalloc(&n3, nl->type, N); 738 cgen(nr, &n3); 739 gins(a, &n3, &n2); 740 regfree(&n3); 741 } else 742 gins(a, nr, &n2); 743 gins(optoas(OSUB, nl->type), &n2, &n1); 744 gmove(&n1, res); 745 regfree(&n1); 746 regfree(&n2); 747 } 748 749 /* 750 * generate high multiply: 751 * res = (nl*nr) >> width 752 */ 753 void 754 cgen_hmul(Node *nl, Node *nr, Node *res) 755 { 756 int w; 757 Node n1, n2, *tmp; 758 Type *t; 759 Prog *p; 760 761 // largest ullman on left. 762 if(nl->ullman < nr->ullman) { 763 tmp = nl; 764 nl = nr; 765 nr = tmp; 766 } 767 t = nl->type; 768 w = t->width * 8; 769 cgenr(nl, &n1, res); 770 cgenr(nr, &n2, N); 771 switch(simtype[t->etype]) { 772 case TINT8: 773 case TINT16: 774 case TINT32: 775 gins(optoas(OMUL, t), &n2, &n1); 776 p = gins(ASRAD, N, &n1); 777 p->from.type = D_CONST; 778 p->from.offset = w; 779 break; 780 case TUINT8: 781 case TUINT16: 782 case TUINT32: 783 gins(optoas(OMUL, t), &n2, &n1); 784 p = gins(ASRD, N, &n1); 785 p->from.type = D_CONST; 786 p->from.offset = w; 787 break; 788 case TINT64: 789 case TUINT64: 790 if(issigned[t->etype]) 791 p = gins(AMULHD, &n2, &n1); 792 else 793 p = gins(AMULHDU, &n2, &n1); 794 break; 795 default: 796 fatal("cgen_hmul %T", t); 797 break; 798 } 799 cgen(&n1, res); 800 regfree(&n1); 801 regfree(&n2); 802 } 803 804 /* 805 * generate shift according to op, one of: 806 * res = nl << nr 807 * res = nl >> nr 808 */ 809 void 810 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 811 { 812 Node n1, n2, n3, n4, n5; 813 int a; 814 Prog *p1; 815 uvlong sc; 816 Type *tcount; 817 818 a = optoas(op, nl->type); 819 820 if(nr->op == OLITERAL) { 821 regalloc(&n1, nl->type, res); 822 cgen(nl, &n1); 823 sc = mpgetfix(nr->val.u.xval); 824 if(sc >= nl->type->width*8) { 825 // large shift gets 2 shifts by width-1 826 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 827 gins(a, &n3, &n1); 828 gins(a, &n3, &n1); 829 } else 830 gins(a, nr, &n1); 831 gmove(&n1, res); 832 regfree(&n1); 833 goto ret; 834 } 835 836 if(nl->ullman >= UINF) { 837 tempname(&n4, nl->type); 838 cgen(nl, &n4); 839 nl = &n4; 840 } 841 if(nr->ullman >= UINF) { 842 tempname(&n5, nr->type); 843 cgen(nr, &n5); 844 nr = &n5; 845 } 846 847 // Allow either uint32 or uint64 as shift type, 848 // to avoid unnecessary conversion from uint32 to uint64 849 // just to do the comparison. 850 tcount = types[simtype[nr->type->etype]]; 851 if(tcount->etype < TUINT32) 852 tcount = types[TUINT32]; 853 854 regalloc(&n1, nr->type, N); // to hold the shift type in CX 855 regalloc(&n3, tcount, &n1); // to clear high bits of CX 856 857 regalloc(&n2, nl->type, res); 858 if(nl->ullman >= nr->ullman) { 859 cgen(nl, &n2); 860 cgen(nr, &n1); 861 gmove(&n1, &n3); 862 } else { 863 cgen(nr, &n1); 864 gmove(&n1, &n3); 865 cgen(nl, &n2); 866 } 867 regfree(&n3); 868 869 // test and fix up large shifts 870 if(!bounded) { 871 nodconst(&n3, tcount, nl->type->width*8); 872 gins(optoas(OCMP, tcount), &n1, &n3); 873 p1 = gbranch(optoas(OLT, tcount), T, +1); 874 if(op == ORSH && issigned[nl->type->etype]) { 875 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 876 gins(a, &n3, &n2); 877 } else { 878 nodconst(&n3, nl->type, 0); 879 gmove(&n3, &n2); 880 } 881 patch(p1, pc); 882 } 883 884 gins(a, &n1, &n2); 885 886 gmove(&n2, res); 887 888 regfree(&n1); 889 regfree(&n2); 890 891 ret: 892 ; 893 } 894 895 void 896 clearfat(Node *nl) 897 { 898 uint64 w, c, q, t, boff; 899 Node dst, end, r0, *f; 900 Prog *p, *pl; 901 902 /* clear a fat object */ 903 if(debug['g']) { 904 print("clearfat %N (%T, size: %lld)\n", nl, nl->type, nl->type->width); 905 } 906 907 w = nl->type->width; 908 // Avoid taking the address for simple enough types. 909 //if(componentgen(N, nl)) 910 // return; 911 912 c = w % 8; // bytes 913 q = w / 8; // dwords 914 915 if(reg[REGRT1] > 0) 916 fatal("R%d in use during clearfat", REGRT1); 917 918 nodreg(&r0, types[TUINT64], 0); // r0 is always zero 919 nodreg(&dst, types[tptr], D_R0+REGRT1); 920 reg[REGRT1]++; 921 agen(nl, &dst); 922 923 if(q > 128) { 924 p = gins(ASUB, N, &dst); 925 p->from.type = D_CONST; 926 p->from.offset = 8; 927 928 regalloc(&end, types[tptr], N); 929 p = gins(AMOVD, &dst, &end); 930 p->from.type = D_CONST; 931 p->from.offset = q*8; 932 933 p = gins(AMOVDU, &r0, &dst); 934 p->to.type = D_OREG; 935 p->to.offset = 8; 936 pl = p; 937 938 p = gins(ACMP, &dst, &end); 939 patch(gbranch(ABNE, T, 0), pl); 940 941 regfree(&end); 942 // The loop leaves R3 on the last zeroed dword 943 boff = 8; 944 } else if(q >= 4) { 945 p = gins(ASUB, N, &dst); 946 p->from.type = D_CONST; 947 p->from.offset = 8; 948 f = sysfunc("duffzero"); 949 p = gins(ADUFFZERO, N, f); 950 afunclit(&p->to, f); 951 // 4 and 128 = magic constants: see ../../runtime/asm_ppc64x.s 952 p->to.offset = 4*(128-q); 953 // duffzero leaves R3 on the last zeroed dword 954 boff = 8; 955 } else { 956 for(t = 0; t < q; t++) { 957 p = gins(AMOVD, &r0, &dst); 958 p->to.type = D_OREG; 959 p->to.offset = 8*t; 960 } 961 boff = 8*q; 962 } 963 964 for(t = 0; t < c; t++) { 965 p = gins(AMOVB, &r0, &dst); 966 p->to.type = D_OREG; 967 p->to.offset = t+boff; 968 } 969 reg[REGRT1]--; 970 } 971 972 // Called after regopt and peep have run. 973 // Expand CHECKNIL pseudo-op into actual nil pointer check. 974 void 975 expandchecks(Prog *firstp) 976 { 977 Prog *p, *p1, *p2; 978 979 for(p = firstp; p != P; p = p->link) { 980 if(debug_checknil && ctxt->debugvlog) 981 print("expandchecks: %P\n", p); 982 if(p->as != ACHECKNIL) 983 continue; 984 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 985 warnl(p->lineno, "generated nil check"); 986 if(p->from.type != D_REG) 987 fatal("invalid nil check %P\n", p); 988 /* 989 // check is 990 // TD $4, R0, arg (R0 is always zero) 991 // eqv. to: 992 // tdeq r0, arg 993 // NOTE: this needs special runtime support to make SIGTRAP recoverable. 994 reg = p->from.reg; 995 p->as = ATD; 996 p->from = p->to = p->from3 = zprog.from; 997 p->from.type = D_CONST; 998 p->from.offset = 4; 999 p->from.reg = NREG; 1000 p->reg = 0; 1001 p->to.type = D_REG; 1002 p->to.reg = reg; 1003 */ 1004 // check is 1005 // CMP arg, R0 1006 // BNE 2(PC) [likely] 1007 // MOVD R0, 0(R0) 1008 p1 = mal(sizeof *p1); 1009 p2 = mal(sizeof *p2); 1010 clearp(p1); 1011 clearp(p2); 1012 p1->link = p2; 1013 p2->link = p->link; 1014 p->link = p1; 1015 p1->lineno = p->lineno; 1016 p2->lineno = p->lineno; 1017 p1->pc = 9999; 1018 p2->pc = 9999; 1019 p->as = ACMP; 1020 p->to.type = D_REG; 1021 p->to.reg = REGZERO; 1022 p1->as = ABNE; 1023 //p1->from.type = D_CONST; 1024 //p1->from.offset = 1; // likely 1025 p1->to.type = D_BRANCH; 1026 p1->to.u.branch = p2->link; 1027 // crash by write to memory address 0. 1028 p2->as = AMOVD; 1029 p2->from.type = D_REG; 1030 p2->from.reg = 0; 1031 p2->to.type = D_OREG; 1032 p2->to.reg = 0; 1033 p2->to.offset = 0; 1034 } 1035 }