github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/cmd/9g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog *appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset); 13 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi); 14 15 void 16 defframe(Prog *ptxt) 17 { 18 uint32 frame; 19 Prog *p; 20 vlong hi, lo; 21 NodeList *l; 22 Node *n; 23 24 // fill in argument size 25 ptxt->to.offset = rnd(curfn->type->argwid, widthptr); 26 27 // fill in final stack size 28 ptxt->to.offset <<= 32; 29 frame = rnd(stksize+maxarg, widthreg); 30 ptxt->to.offset |= frame; 31 32 // insert code to zero ambiguously live variables 33 // so that the garbage collector only sees initialized values 34 // when it looks for pointers. 35 p = ptxt; 36 lo = hi = 0; 37 // iterate through declarations - they are sorted in decreasing xoffset order. 38 for(l=curfn->dcl; l != nil; l = l->next) { 39 n = l->n; 40 if(!n->needzero) 41 continue; 42 if(n->class != PAUTO) 43 fatal("needzero class %d", n->class); 44 if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) 45 fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); 46 47 if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) { 48 // merge with range we already have 49 lo = n->xoffset; 50 continue; 51 } 52 // zero old range 53 p = zerorange(p, frame, lo, hi); 54 55 // set new range 56 hi = n->xoffset + n->type->width; 57 lo = n->xoffset; 58 } 59 // zero final range 60 zerorange(p, frame, lo, hi); 61 } 62 63 static Prog* 64 zerorange(Prog *p, vlong frame, vlong lo, vlong hi) 65 { 66 vlong cnt, i; 67 Prog *p1; 68 Node *f; 69 70 cnt = hi - lo; 71 if(cnt == 0) 72 return p; 73 if(cnt < 4*widthptr) { 74 for(i = 0; i < cnt; i += widthptr) 75 p = appendpp(p, AMOVD, D_REG, REGZERO, 0, D_OREG, REGSP, 8+frame+lo+i); 76 } else if(cnt <= 128*widthptr) { 77 p = appendpp(p, AADD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGRT1, 0); 78 p->reg = REGSP; 79 p = appendpp(p, ADUFFZERO, D_NONE, NREG, 0, D_OREG, NREG, 0); 80 f = sysfunc("duffzero"); 81 naddr(f, &p->to, 1); 82 afunclit(&p->to, f); 83 p->to.offset = 4*(128-cnt/widthptr); 84 } else { 85 p = appendpp(p, AMOVD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGTMP, 0); 86 p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT1, 0); 87 p->reg = REGSP; 88 p = appendpp(p, AMOVD, D_CONST, NREG, cnt, D_REG, REGTMP, 0); 89 p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT2, 0); 90 p->reg = REGRT1; 91 p1 = p = appendpp(p, AMOVDU, D_REG, REGZERO, 0, D_OREG, REGRT1, widthptr); 92 p = appendpp(p, ACMP, D_REG, REGRT1, 0, D_REG, REGRT2, 0); 93 p = appendpp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0); 94 patch(p, p1); 95 } 96 return p; 97 } 98 99 static Prog* 100 appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset) 101 { 102 Prog *q; 103 q = mal(sizeof(*q)); 104 clearp(q); 105 q->as = as; 106 q->lineno = p->lineno; 107 q->from.type = ftype; 108 q->from.reg = freg; 109 q->from.offset = foffset; 110 q->to.type = ttype; 111 q->to.reg = treg; 112 q->to.offset = toffset; 113 q->link = p->link; 114 p->link = q; 115 return q; 116 } 117 118 // Sweep the prog list to mark any used nodes. 119 void 120 markautoused(Prog *p) 121 { 122 for (; p; p = p->link) { 123 if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL) 124 continue; 125 126 if (p->from.node) 127 p->from.node->used = 1; 128 129 if (p->to.node) 130 p->to.node->used = 1; 131 } 132 } 133 134 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 135 void 136 fixautoused(Prog *p) 137 { 138 Prog **lp; 139 140 for (lp=&p; (p=*lp) != P; ) { 141 if (p->as == ATYPE && p->from.node && p->from.name == D_AUTO && !p->from.node->used) { 142 *lp = p->link; 143 continue; 144 } 145 if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) { 146 // Cannot remove VARDEF instruction, because - unlike TYPE handled above - 147 // VARDEFs are interspersed with other code, and a jump might be using the 148 // VARDEF as a target. Replace with a no-op instead. A later pass will remove 149 // the no-ops. 150 p->to.type = D_NONE; 151 p->to.node = N; 152 p->as = ANOP; 153 continue; 154 } 155 if (p->from.name == D_AUTO && p->from.node) 156 p->from.offset += p->from.node->stkdelta; 157 158 if (p->to.name == D_AUTO && p->to.node) 159 p->to.offset += p->to.node->stkdelta; 160 161 lp = &p->link; 162 } 163 } 164 165 /* 166 * generate: BL reg, f 167 * where both reg and f are registers. 168 * On power, f must be moved to CTR first. 169 */ 170 static void 171 ginsBL(Node *reg, Node *f) 172 { 173 Prog *p; 174 p = gins(AMOVD, f, N); 175 p->to.type = D_SPR; 176 p->to.offset = D_CTR; 177 p = gins(ABL, reg, N); 178 p->to.type = D_SPR; 179 p->to.offset = D_CTR; 180 } 181 182 /* 183 * generate: 184 * call f 185 * proc=-1 normal call but no return 186 * proc=0 normal call 187 * proc=1 goroutine run in new proc 188 * proc=2 defer call save away stack 189 * proc=3 normal call to C pointer (not Go func value) 190 */ 191 void 192 ginscall(Node *f, int proc) 193 { 194 Prog *p; 195 Node reg, con, reg2; 196 Node r1; 197 198 if(f->type != T) 199 setmaxarg(f->type); 200 201 switch(proc) { 202 default: 203 fatal("ginscall: bad proc %d", proc); 204 break; 205 206 case 0: // normal call 207 case -1: // normal call but no return 208 if(f->op == ONAME && f->class == PFUNC) { 209 if(f == deferreturn) { 210 // Deferred calls will appear to be returning to 211 // the CALL deferreturn(SB) that we are about to emit. 212 // However, the stack trace code will show the line 213 // of the instruction byte before the return PC. 214 // To avoid that being an unrelated instruction, 215 // insert a ppc64 NOP that we will have the right line number. 216 // The ppc64 NOP is really or r0, r0, r0; use that description 217 // because the NOP pseudo-instruction would be removed by 218 // the linker. 219 nodreg(®, types[TINT], D_R0); 220 gins(AOR, ®, ®); 221 } 222 p = gins(ABL, N, f); 223 afunclit(&p->to, f); 224 if(proc == -1 || noreturn(p)) 225 gins(AUNDEF, N, N); 226 break; 227 } 228 nodreg(®, types[tptr], D_R0+REGENV); 229 nodreg(&r1, types[tptr], D_R0+3); 230 gmove(f, ®); 231 reg.op = OINDREG; 232 gmove(®, &r1); 233 reg.op = OREGISTER; 234 ginsBL(®, &r1); 235 break; 236 237 case 3: // normal call of c function pointer 238 ginsBL(N, f); 239 break; 240 241 case 1: // call in new proc (go) 242 case 2: // deferred call (defer) 243 nodconst(&con, types[TINT64], argsize(f->type)); 244 nodreg(®, types[TINT64], D_R0+3); 245 nodreg(®2, types[TINT64], D_R0+4); 246 gmove(f, ®); 247 248 p = gins(ASUB, N, N); 249 p->from.type = D_CONST; 250 p->from.offset = 3 * 8; 251 p->to.type = D_REG; 252 p->to.reg = REGSP; 253 254 gmove(&con, ®2); 255 p = gins(AMOVW, ®2, N); 256 p->to.type = D_OREG; 257 p->to.reg = REGSP; 258 p->to.offset = 8; 259 260 p = gins(AMOVD, ®, N); 261 p->to.type = D_OREG; 262 p->to.reg = REGSP; 263 p->to.offset = 16; 264 265 if(proc == 1) 266 ginscall(newproc, 0); 267 else { 268 if(!hasdefer) 269 fatal("hasdefer=0 but has defer"); 270 ginscall(deferproc, 0); 271 } 272 273 p = gins(AADD, N, N); 274 p->from.type = D_CONST; 275 p->from.offset = 3 * 8; 276 p->to.type = D_REG; 277 p->to.reg = REGSP; 278 279 if(proc == 2) { 280 nodreg(®, types[TINT64], D_R0+3); 281 p = gins(ACMP, ®, N); 282 p->to.type = D_REG; 283 p->to.reg = D_R0; 284 p = gbranch(ABEQ, T, +1); 285 cgen_ret(N); 286 patch(p, pc); 287 } 288 break; 289 } 290 } 291 292 /* 293 * n is call to interface method. 294 * generate res = n. 295 */ 296 void 297 cgen_callinter(Node *n, Node *res, int proc) 298 { 299 Node *i, *f; 300 Node tmpi, nodi, nodo, nodr, nodsp; 301 Prog *p; 302 303 i = n->left; 304 if(i->op != ODOTINTER) 305 fatal("cgen_callinter: not ODOTINTER %O", i->op); 306 307 f = i->right; // field 308 if(f->op != ONAME) 309 fatal("cgen_callinter: not ONAME %O", f->op); 310 311 i = i->left; // interface 312 313 if(!i->addable) { 314 tempname(&tmpi, i->type); 315 cgen(i, &tmpi); 316 i = &tmpi; 317 } 318 319 genlist(n->list); // assign the args 320 321 // i is now addable, prepare an indirected 322 // register to hold its address. 323 igen(i, &nodi, res); // REG = &inter 324 325 nodindreg(&nodsp, types[tptr], D_R0+REGSP); 326 nodsp.xoffset = widthptr; 327 nodi.type = types[tptr]; 328 nodi.xoffset += widthptr; 329 cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data 330 331 regalloc(&nodo, types[tptr], res); 332 nodi.type = types[tptr]; 333 nodi.xoffset -= widthptr; 334 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 335 regfree(&nodi); 336 337 regalloc(&nodr, types[tptr], &nodo); 338 if(n->left->xoffset == BADWIDTH) 339 fatal("cgen_callinter: badwidth"); 340 cgen_checknil(&nodo); // in case offset is huge 341 nodo.op = OINDREG; 342 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 343 if(proc == 0) { 344 // plain call: use direct c function pointer - more efficient 345 cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] 346 proc = 3; 347 } else { 348 // go/defer. generate go func value. 349 p = gins(AMOVD, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] 350 p->from.type = D_CONST; 351 } 352 353 nodr.type = n->left->type; 354 ginscall(&nodr, proc); 355 356 regfree(&nodr); 357 regfree(&nodo); 358 } 359 360 /* 361 * generate function call; 362 * proc=0 normal call 363 * proc=1 goroutine run in new proc 364 * proc=2 defer call save away stack 365 */ 366 void 367 cgen_call(Node *n, int proc) 368 { 369 Type *t; 370 Node nod, afun; 371 372 if(n == N) 373 return; 374 375 if(n->left->ullman >= UINF) { 376 // if name involves a fn call 377 // precompute the address of the fn 378 tempname(&afun, types[tptr]); 379 cgen(n->left, &afun); 380 } 381 382 genlist(n->list); // assign the args 383 t = n->left->type; 384 385 // call tempname pointer 386 if(n->left->ullman >= UINF) { 387 regalloc(&nod, types[tptr], N); 388 cgen_as(&nod, &afun); 389 nod.type = t; 390 ginscall(&nod, proc); 391 regfree(&nod); 392 return; 393 } 394 395 // call pointer 396 if(n->left->op != ONAME || n->left->class != PFUNC) { 397 regalloc(&nod, types[tptr], N); 398 cgen_as(&nod, n->left); 399 nod.type = t; 400 ginscall(&nod, proc); 401 regfree(&nod); 402 return; 403 } 404 405 // call direct 406 n->left->method = 1; 407 ginscall(n->left, proc); 408 } 409 410 /* 411 * call to n has already been generated. 412 * generate: 413 * res = return value from call. 414 */ 415 void 416 cgen_callret(Node *n, Node *res) 417 { 418 Node nod; 419 Type *fp, *t; 420 Iter flist; 421 422 t = n->left->type; 423 if(t->etype == TPTR32 || t->etype == TPTR64) 424 t = t->type; 425 426 fp = structfirst(&flist, getoutarg(t)); 427 if(fp == T) 428 fatal("cgen_callret: nil"); 429 430 memset(&nod, 0, sizeof(nod)); 431 nod.op = OINDREG; 432 nod.val.u.reg = D_R0+REGSP; 433 nod.addable = 1; 434 435 nod.xoffset = fp->width + widthptr; // +widthptr: saved LR at 0(R1) 436 nod.type = fp->type; 437 cgen_as(res, &nod); 438 } 439 440 /* 441 * call to n has already been generated. 442 * generate: 443 * res = &return value from call. 444 */ 445 void 446 cgen_aret(Node *n, Node *res) 447 { 448 Node nod1, nod2; 449 Type *fp, *t; 450 Iter flist; 451 452 t = n->left->type; 453 if(isptr[t->etype]) 454 t = t->type; 455 456 fp = structfirst(&flist, getoutarg(t)); 457 if(fp == T) 458 fatal("cgen_aret: nil"); 459 460 memset(&nod1, 0, sizeof(nod1)); 461 nod1.op = OINDREG; 462 nod1.val.u.reg = D_R0 + REGSP; 463 nod1.addable = 1; 464 465 nod1.xoffset = fp->width + widthptr; // +widthptr: saved lr at 0(SP) 466 nod1.type = fp->type; 467 468 if(res->op != OREGISTER) { 469 regalloc(&nod2, types[tptr], res); 470 agen(&nod1, &nod2); 471 gins(AMOVD, &nod2, res); 472 regfree(&nod2); 473 } else 474 agen(&nod1, res); 475 } 476 477 /* 478 * generate return. 479 * n->left is assignments to return values. 480 */ 481 void 482 cgen_ret(Node *n) 483 { 484 Prog *p; 485 486 if(n != N) 487 genlist(n->list); // copy out args 488 if(hasdefer) 489 ginscall(deferreturn, 0); 490 genlist(curfn->exit); 491 p = gins(ARET, N, N); 492 if(n != N && n->op == ORETJMP) { 493 p->to.name = D_EXTERN; 494 p->to.type = D_CONST; 495 p->to.sym = linksym(n->left->sym); 496 } 497 } 498 499 void 500 cgen_asop(Node *n) 501 { 502 USED(n); 503 fatal("cgen_asop"); // no longer used 504 } 505 506 int 507 samereg(Node *a, Node *b) 508 { 509 if(a == N || b == N) 510 return 0; 511 if(a->op != OREGISTER) 512 return 0; 513 if(b->op != OREGISTER) 514 return 0; 515 if(a->val.u.reg != b->val.u.reg) 516 return 0; 517 return 1; 518 } 519 520 /* 521 * generate division. 522 * generates one of: 523 * res = nl / nr 524 * res = nl % nr 525 * according to op. 526 */ 527 void 528 dodiv(int op, Node *nl, Node *nr, Node *res) 529 { 530 int a, check; 531 Type *t, *t0; 532 Node tl, tr, tl2, tr2, nm1, nz, tm; 533 Prog *p1, *p2; 534 535 // Have to be careful about handling 536 // most negative int divided by -1 correctly. 537 // The hardware will generate undefined result. 538 // Also need to explicitly trap on division on zero, 539 // the hardware will silently generate undefined result. 540 // DIVW will leave unpredicable result in higher 32-bit, 541 // so always use DIVD/DIVDU. 542 t = nl->type; 543 t0 = t; 544 check = 0; 545 if(issigned[t->etype]) { 546 check = 1; 547 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1))) 548 check = 0; 549 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 550 check = 0; 551 } 552 if(t->width < 8) { 553 if(issigned[t->etype]) 554 t = types[TINT64]; 555 else 556 t = types[TUINT64]; 557 check = 0; 558 } 559 560 a = optoas(ODIV, t); 561 562 regalloc(&tl, t0, N); 563 regalloc(&tr, t0, N); 564 if(nl->ullman >= nr->ullman) { 565 cgen(nl, &tl); 566 cgen(nr, &tr); 567 } else { 568 cgen(nr, &tr); 569 cgen(nl, &tl); 570 } 571 if(t != t0) { 572 // Convert 573 tl2 = tl; 574 tr2 = tr; 575 tl.type = t; 576 tr.type = t; 577 gmove(&tl2, &tl); 578 gmove(&tr2, &tr); 579 } 580 581 // Handle divide-by-zero panic. 582 p1 = gins(optoas(OCMP, t), &tr, N); 583 p1->to.type = D_REG; 584 p1->to.reg = REGZERO; 585 p1 = gbranch(optoas(ONE, t), T, +1); 586 if(panicdiv == N) 587 panicdiv = sysfunc("panicdivide"); 588 ginscall(panicdiv, -1); 589 patch(p1, pc); 590 591 if(check) { 592 nodconst(&nm1, t, -1); 593 gins(optoas(OCMP, t), &tr, &nm1); 594 p1 = gbranch(optoas(ONE, t), T, +1); 595 if(op == ODIV) { 596 // a / (-1) is -a. 597 gins(optoas(OMINUS, t), N, &tl); 598 gmove(&tl, res); 599 } else { 600 // a % (-1) is 0. 601 nodconst(&nz, t, 0); 602 gmove(&nz, res); 603 } 604 p2 = gbranch(AJMP, T, 0); 605 patch(p1, pc); 606 } 607 p1 = gins(a, &tr, &tl); 608 if(op == ODIV) { 609 regfree(&tr); 610 gmove(&tl, res); 611 } else { 612 // A%B = A-(A/B*B) 613 regalloc(&tm, t, N); 614 // patch div to use the 3 register form 615 // TODO(minux): add gins3? 616 p1->reg = p1->to.reg; 617 p1->to.reg = tm.val.u.reg; 618 gins(optoas(OMUL, t), &tr, &tm); 619 regfree(&tr); 620 gins(optoas(OSUB, t), &tm, &tl); 621 regfree(&tm); 622 gmove(&tl, res); 623 } 624 regfree(&tl); 625 if(check) 626 patch(p2, pc); 627 } 628 629 /* 630 * generate division according to op, one of: 631 * res = nl / nr 632 * res = nl % nr 633 */ 634 void 635 cgen_div(int op, Node *nl, Node *nr, Node *res) 636 { 637 Node n1, n2, n3; 638 int w, a; 639 Magic m; 640 641 // TODO(minux): enable division by magic multiply (also need to fix longmod below) 642 //if(nr->op != OLITERAL) 643 goto longdiv; 644 w = nl->type->width*8; 645 646 // Front end handled 32-bit division. We only need to handle 64-bit. 647 // try to do division by multiply by (2^w)/d 648 // see hacker's delight chapter 10 649 switch(simtype[nl->type->etype]) { 650 default: 651 goto longdiv; 652 653 case TUINT64: 654 m.w = w; 655 m.ud = mpgetfix(nr->val.u.xval); 656 umagic(&m); 657 if(m.bad) 658 break; 659 if(op == OMOD) 660 goto longmod; 661 662 cgenr(nl, &n1, N); 663 nodconst(&n2, nl->type, m.um); 664 regalloc(&n3, nl->type, res); 665 cgen_hmul(&n1, &n2, &n3); 666 667 if(m.ua) { 668 // need to add numerator accounting for overflow 669 gins(optoas(OADD, nl->type), &n1, &n3); 670 nodconst(&n2, nl->type, 1); 671 gins(optoas(ORROTC, nl->type), &n2, &n3); 672 nodconst(&n2, nl->type, m.s-1); 673 gins(optoas(ORSH, nl->type), &n2, &n3); 674 } else { 675 nodconst(&n2, nl->type, m.s); 676 gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx 677 } 678 679 gmove(&n3, res); 680 regfree(&n1); 681 regfree(&n3); 682 return; 683 684 case TINT64: 685 m.w = w; 686 m.sd = mpgetfix(nr->val.u.xval); 687 smagic(&m); 688 if(m.bad) 689 break; 690 if(op == OMOD) 691 goto longmod; 692 693 cgenr(nl, &n1, res); 694 nodconst(&n2, nl->type, m.sm); 695 regalloc(&n3, nl->type, N); 696 cgen_hmul(&n1, &n2, &n3); 697 698 if(m.sm < 0) { 699 // need to add numerator 700 gins(optoas(OADD, nl->type), &n1, &n3); 701 } 702 703 nodconst(&n2, nl->type, m.s); 704 gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 705 706 nodconst(&n2, nl->type, w-1); 707 gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg 708 gins(optoas(OSUB, nl->type), &n1, &n3); // added 709 710 if(m.sd < 0) { 711 // this could probably be removed 712 // by factoring it into the multiplier 713 gins(optoas(OMINUS, nl->type), N, &n3); 714 } 715 716 gmove(&n3, res); 717 regfree(&n1); 718 regfree(&n3); 719 return; 720 } 721 goto longdiv; 722 723 longdiv: 724 // division and mod using (slow) hardware instruction 725 dodiv(op, nl, nr, res); 726 return; 727 728 longmod: 729 // mod using formula A%B = A-(A/B*B) but 730 // we know that there is a fast algorithm for A/B 731 regalloc(&n1, nl->type, res); 732 cgen(nl, &n1); 733 regalloc(&n2, nl->type, N); 734 cgen_div(ODIV, &n1, nr, &n2); 735 a = optoas(OMUL, nl->type); 736 if(w == 8) { 737 // use 2-operand 16-bit multiply 738 // because there is no 2-operand 8-bit multiply 739 //a = AIMULW; 740 } 741 if(!smallintconst(nr)) { 742 regalloc(&n3, nl->type, N); 743 cgen(nr, &n3); 744 gins(a, &n3, &n2); 745 regfree(&n3); 746 } else 747 gins(a, nr, &n2); 748 gins(optoas(OSUB, nl->type), &n2, &n1); 749 gmove(&n1, res); 750 regfree(&n1); 751 regfree(&n2); 752 } 753 754 /* 755 * generate high multiply: 756 * res = (nl*nr) >> width 757 */ 758 void 759 cgen_hmul(Node *nl, Node *nr, Node *res) 760 { 761 int w; 762 Node n1, n2, *tmp; 763 Type *t; 764 Prog *p; 765 766 // largest ullman on left. 767 if(nl->ullman < nr->ullman) { 768 tmp = nl; 769 nl = nr; 770 nr = tmp; 771 } 772 t = nl->type; 773 w = t->width * 8; 774 cgenr(nl, &n1, res); 775 cgenr(nr, &n2, N); 776 switch(simtype[t->etype]) { 777 case TINT8: 778 case TINT16: 779 case TINT32: 780 gins(optoas(OMUL, t), &n2, &n1); 781 p = gins(ASRAD, N, &n1); 782 p->from.type = D_CONST; 783 p->from.offset = w; 784 break; 785 case TUINT8: 786 case TUINT16: 787 case TUINT32: 788 gins(optoas(OMUL, t), &n2, &n1); 789 p = gins(ASRD, N, &n1); 790 p->from.type = D_CONST; 791 p->from.offset = w; 792 break; 793 case TINT64: 794 case TUINT64: 795 if(issigned[t->etype]) 796 p = gins(AMULHD, &n2, &n1); 797 else 798 p = gins(AMULHDU, &n2, &n1); 799 break; 800 default: 801 fatal("cgen_hmul %T", t); 802 break; 803 } 804 cgen(&n1, res); 805 regfree(&n1); 806 regfree(&n2); 807 } 808 809 /* 810 * generate shift according to op, one of: 811 * res = nl << nr 812 * res = nl >> nr 813 */ 814 void 815 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 816 { 817 Node n1, n2, n3, n4, n5; 818 int a; 819 Prog *p1; 820 uvlong sc; 821 Type *tcount; 822 823 a = optoas(op, nl->type); 824 825 if(nr->op == OLITERAL) { 826 regalloc(&n1, nl->type, res); 827 cgen(nl, &n1); 828 sc = mpgetfix(nr->val.u.xval); 829 if(sc >= nl->type->width*8) { 830 // large shift gets 2 shifts by width-1 831 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 832 gins(a, &n3, &n1); 833 gins(a, &n3, &n1); 834 } else 835 gins(a, nr, &n1); 836 gmove(&n1, res); 837 regfree(&n1); 838 goto ret; 839 } 840 841 if(nl->ullman >= UINF) { 842 tempname(&n4, nl->type); 843 cgen(nl, &n4); 844 nl = &n4; 845 } 846 if(nr->ullman >= UINF) { 847 tempname(&n5, nr->type); 848 cgen(nr, &n5); 849 nr = &n5; 850 } 851 852 // Allow either uint32 or uint64 as shift type, 853 // to avoid unnecessary conversion from uint32 to uint64 854 // just to do the comparison. 855 tcount = types[simtype[nr->type->etype]]; 856 if(tcount->etype < TUINT32) 857 tcount = types[TUINT32]; 858 859 regalloc(&n1, nr->type, N); // to hold the shift type in CX 860 regalloc(&n3, tcount, &n1); // to clear high bits of CX 861 862 regalloc(&n2, nl->type, res); 863 if(nl->ullman >= nr->ullman) { 864 cgen(nl, &n2); 865 cgen(nr, &n1); 866 gmove(&n1, &n3); 867 } else { 868 cgen(nr, &n1); 869 gmove(&n1, &n3); 870 cgen(nl, &n2); 871 } 872 regfree(&n3); 873 874 // test and fix up large shifts 875 if(!bounded) { 876 nodconst(&n3, tcount, nl->type->width*8); 877 gins(optoas(OCMP, tcount), &n1, &n3); 878 p1 = gbranch(optoas(OLT, tcount), T, +1); 879 if(op == ORSH && issigned[nl->type->etype]) { 880 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 881 gins(a, &n3, &n2); 882 } else { 883 nodconst(&n3, nl->type, 0); 884 gmove(&n3, &n2); 885 } 886 patch(p1, pc); 887 } 888 889 gins(a, &n1, &n2); 890 891 gmove(&n2, res); 892 893 regfree(&n1); 894 regfree(&n2); 895 896 ret: 897 ; 898 } 899 900 void 901 clearfat(Node *nl) 902 { 903 uint64 w, c, q, t, boff; 904 Node dst, end, r0, *f; 905 Prog *p, *pl; 906 907 /* clear a fat object */ 908 if(debug['g']) { 909 print("clearfat %N (%T, size: %lld)\n", nl, nl->type, nl->type->width); 910 } 911 912 w = nl->type->width; 913 // Avoid taking the address for simple enough types. 914 //if(componentgen(N, nl)) 915 // return; 916 917 c = w % 8; // bytes 918 q = w / 8; // dwords 919 920 if(reg[REGRT1] > 0) 921 fatal("R%d in use during clearfat", REGRT1); 922 923 nodreg(&r0, types[TUINT64], 0); // r0 is always zero 924 nodreg(&dst, types[tptr], D_R0+REGRT1); 925 reg[REGRT1]++; 926 agen(nl, &dst); 927 928 if(q > 128) { 929 p = gins(ASUB, N, &dst); 930 p->from.type = D_CONST; 931 p->from.offset = 8; 932 933 regalloc(&end, types[tptr], N); 934 p = gins(AMOVD, &dst, &end); 935 p->from.type = D_CONST; 936 p->from.offset = q*8; 937 938 p = gins(AMOVDU, &r0, &dst); 939 p->to.type = D_OREG; 940 p->to.offset = 8; 941 pl = p; 942 943 p = gins(ACMP, &dst, &end); 944 patch(gbranch(ABNE, T, 0), pl); 945 946 regfree(&end); 947 // The loop leaves R3 on the last zeroed dword 948 boff = 8; 949 } else if(q >= 4) { 950 p = gins(ASUB, N, &dst); 951 p->from.type = D_CONST; 952 p->from.offset = 8; 953 f = sysfunc("duffzero"); 954 p = gins(ADUFFZERO, N, f); 955 afunclit(&p->to, f); 956 // 4 and 128 = magic constants: see ../../runtime/asm_ppc64x.s 957 p->to.offset = 4*(128-q); 958 // duffzero leaves R3 on the last zeroed dword 959 boff = 8; 960 } else { 961 for(t = 0; t < q; t++) { 962 p = gins(AMOVD, &r0, &dst); 963 p->to.type = D_OREG; 964 p->to.offset = 8*t; 965 } 966 boff = 8*q; 967 } 968 969 for(t = 0; t < c; t++) { 970 p = gins(AMOVB, &r0, &dst); 971 p->to.type = D_OREG; 972 p->to.offset = t+boff; 973 } 974 reg[REGRT1]--; 975 } 976 977 // Called after regopt and peep have run. 978 // Expand CHECKNIL pseudo-op into actual nil pointer check. 979 void 980 expandchecks(Prog *firstp) 981 { 982 Prog *p, *p1, *p2; 983 984 for(p = firstp; p != P; p = p->link) { 985 if(debug_checknil && ctxt->debugvlog) 986 print("expandchecks: %P\n", p); 987 if(p->as != ACHECKNIL) 988 continue; 989 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 990 warnl(p->lineno, "generated nil check"); 991 if(p->from.type != D_REG) 992 fatal("invalid nil check %P\n", p); 993 /* 994 // check is 995 // TD $4, R0, arg (R0 is always zero) 996 // eqv. to: 997 // tdeq r0, arg 998 // NOTE: this needs special runtime support to make SIGTRAP recoverable. 999 reg = p->from.reg; 1000 p->as = ATD; 1001 p->from = p->to = p->from3 = zprog.from; 1002 p->from.type = D_CONST; 1003 p->from.offset = 4; 1004 p->from.reg = NREG; 1005 p->reg = 0; 1006 p->to.type = D_REG; 1007 p->to.reg = reg; 1008 */ 1009 // check is 1010 // CMP arg, R0 1011 // BNE 2(PC) [likely] 1012 // MOVD R0, 0(R0) 1013 p1 = mal(sizeof *p1); 1014 p2 = mal(sizeof *p2); 1015 clearp(p1); 1016 clearp(p2); 1017 p1->link = p2; 1018 p2->link = p->link; 1019 p->link = p1; 1020 p1->lineno = p->lineno; 1021 p2->lineno = p->lineno; 1022 p1->pc = 9999; 1023 p2->pc = 9999; 1024 p->as = ACMP; 1025 p->to.type = D_REG; 1026 p->to.reg = REGZERO; 1027 p1->as = ABNE; 1028 //p1->from.type = D_CONST; 1029 //p1->from.offset = 1; // likely 1030 p1->to.type = D_BRANCH; 1031 p1->to.u.branch = p2->link; 1032 // crash by write to memory address 0. 1033 p2->as = AMOVD; 1034 p2->from.type = D_REG; 1035 p2->from.reg = 0; 1036 p2->to.type = D_OREG; 1037 p2->to.reg = 0; 1038 p2->to.offset = 0; 1039 } 1040 }