github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/cmd/6g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog *appendpp(Prog*, int, int, vlong, int, vlong); 13 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); 14 15 void 16 defframe(Prog *ptxt) 17 { 18 uint32 frame, ax; 19 Prog *p; 20 vlong hi, lo; 21 NodeList *l; 22 Node *n; 23 24 // fill in argument size 25 ptxt->to.offset = rnd(curfn->type->argwid, widthptr); 26 27 // fill in final stack size 28 ptxt->to.offset <<= 32; 29 frame = rnd(stksize+maxarg, widthreg); 30 ptxt->to.offset |= frame; 31 32 // insert code to zero ambiguously live variables 33 // so that the garbage collector only sees initialized values 34 // when it looks for pointers. 35 p = ptxt; 36 lo = hi = 0; 37 ax = 0; 38 // iterate through declarations - they are sorted in decreasing xoffset order. 39 for(l=curfn->dcl; l != nil; l = l->next) { 40 n = l->n; 41 if(!n->needzero) 42 continue; 43 if(n->class != PAUTO) 44 fatal("needzero class %d", n->class); 45 if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) 46 fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); 47 48 if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) { 49 // merge with range we already have 50 lo = n->xoffset; 51 continue; 52 } 53 // zero old range 54 p = zerorange(p, frame, lo, hi, &ax); 55 56 // set new range 57 hi = n->xoffset + n->type->width; 58 lo = n->xoffset; 59 } 60 // zero final range 61 zerorange(p, frame, lo, hi, &ax); 62 } 63 64 static Prog* 65 zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax) 66 { 67 vlong cnt, i; 68 69 cnt = hi - lo; 70 if(cnt == 0) 71 return p; 72 if(*ax == 0) { 73 p = appendpp(p, AMOVQ, D_CONST, 0, D_AX, 0); 74 *ax = 1; 75 } 76 if(cnt % widthreg != 0) { 77 // should only happen with nacl 78 if(cnt % widthptr != 0) 79 fatal("zerorange count not a multiple of widthptr %d", cnt); 80 p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo); 81 lo += widthptr; 82 cnt -= widthptr; 83 } 84 if(cnt <= 4*widthreg) { 85 for(i = 0; i < cnt; i += widthreg) { 86 p = appendpp(p, AMOVQ, D_AX, 0, D_SP+D_INDIR, frame+lo+i); 87 } 88 } else if(!nacl && (cnt <= 128*widthreg)) { 89 p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0); 90 p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 2*(128-cnt/widthreg)); 91 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 92 } else { 93 p = appendpp(p, AMOVQ, D_CONST, cnt/widthreg, D_CX, 0); 94 p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0); 95 p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0); 96 p = appendpp(p, ASTOSQ, D_NONE, 0, D_NONE, 0); 97 } 98 return p; 99 } 100 101 static Prog* 102 appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) 103 { 104 Prog *q; 105 q = mal(sizeof(*q)); 106 clearp(q); 107 q->as = as; 108 q->lineno = p->lineno; 109 q->from.type = ftype; 110 q->from.offset = foffset; 111 q->to.type = ttype; 112 q->to.offset = toffset; 113 q->link = p->link; 114 p->link = q; 115 return q; 116 } 117 118 // Sweep the prog list to mark any used nodes. 119 void 120 markautoused(Prog* p) 121 { 122 for (; p; p = p->link) { 123 if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL) 124 continue; 125 126 if (p->from.node) 127 p->from.node->used = 1; 128 129 if (p->to.node) 130 p->to.node->used = 1; 131 } 132 } 133 134 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 135 void 136 fixautoused(Prog *p) 137 { 138 Prog **lp; 139 140 for (lp=&p; (p=*lp) != P; ) { 141 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 142 *lp = p->link; 143 continue; 144 } 145 if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) { 146 // Cannot remove VARDEF instruction, because - unlike TYPE handled above - 147 // VARDEFs are interspersed with other code, and a jump might be using the 148 // VARDEF as a target. Replace with a no-op instead. A later pass will remove 149 // the no-ops. 150 p->to.type = D_NONE; 151 p->to.node = N; 152 p->as = ANOP; 153 continue; 154 } 155 if (p->from.type == D_AUTO && p->from.node) 156 p->from.offset += p->from.node->stkdelta; 157 158 if (p->to.type == D_AUTO && p->to.node) 159 p->to.offset += p->to.node->stkdelta; 160 161 lp = &p->link; 162 } 163 } 164 165 166 /* 167 * generate: 168 * call f 169 * proc=-1 normal call but no return 170 * proc=0 normal call 171 * proc=1 goroutine run in new proc 172 * proc=2 defer call save away stack 173 * proc=3 normal call to C pointer (not Go func value) 174 */ 175 void 176 ginscall(Node *f, int proc) 177 { 178 Prog *p; 179 Node reg, stk; 180 Node r1; 181 int32 extra; 182 183 if(f->type != T) { 184 extra = 0; 185 if(proc == 1 || proc == 2) 186 extra = 2 * widthptr; 187 setmaxarg(f->type, extra); 188 } 189 190 switch(proc) { 191 default: 192 fatal("ginscall: bad proc %d", proc); 193 break; 194 195 case 0: // normal call 196 case -1: // normal call but no return 197 if(f->op == ONAME && f->class == PFUNC) { 198 if(f == deferreturn) { 199 // Deferred calls will appear to be returning to 200 // the CALL deferreturn(SB) that we are about to emit. 201 // However, the stack trace code will show the line 202 // of the instruction byte before the return PC. 203 // To avoid that being an unrelated instruction, 204 // insert an x86 NOP that we will have the right line number. 205 // x86 NOP 0x90 is really XCHG AX, AX; use that description 206 // because the NOP pseudo-instruction would be removed by 207 // the linker. 208 nodreg(®, types[TINT], D_AX); 209 gins(AXCHGL, ®, ®); 210 } 211 p = gins(ACALL, N, f); 212 afunclit(&p->to, f); 213 if(proc == -1 || noreturn(p)) 214 gins(AUNDEF, N, N); 215 break; 216 } 217 nodreg(®, types[tptr], D_DX); 218 nodreg(&r1, types[tptr], D_BX); 219 gmove(f, ®); 220 reg.op = OINDREG; 221 gmove(®, &r1); 222 reg.op = OREGISTER; 223 gins(ACALL, ®, &r1); 224 break; 225 226 case 3: // normal call of c function pointer 227 gins(ACALL, N, f); 228 break; 229 230 case 1: // call in new proc (go) 231 case 2: // deferred call (defer) 232 memset(&stk, 0, sizeof(stk)); 233 stk.op = OINDREG; 234 stk.val.u.reg = D_SP; 235 stk.xoffset = 0; 236 237 if(widthptr == 8) { 238 // size of arguments at 0(SP) 239 ginscon(AMOVQ, argsize(f->type), &stk); 240 241 // FuncVal* at 8(SP) 242 stk.xoffset = widthptr; 243 nodreg(®, types[TINT64], D_AX); 244 gmove(f, ®); 245 gins(AMOVQ, ®, &stk); 246 } else { 247 // size of arguments at 0(SP) 248 ginscon(AMOVL, argsize(f->type), &stk); 249 250 // FuncVal* at 4(SP) 251 stk.xoffset = widthptr; 252 nodreg(®, types[TINT32], D_AX); 253 gmove(f, ®); 254 gins(AMOVL, ®, &stk); 255 } 256 257 if(proc == 1) 258 ginscall(newproc, 0); 259 else { 260 if(!hasdefer) 261 fatal("hasdefer=0 but has defer"); 262 ginscall(deferproc, 0); 263 } 264 if(proc == 2) { 265 nodreg(®, types[TINT32], D_AX); 266 gins(ATESTL, ®, ®); 267 p = gbranch(AJEQ, T, +1); 268 cgen_ret(N); 269 patch(p, pc); 270 } 271 break; 272 } 273 } 274 275 /* 276 * n is call to interface method. 277 * generate res = n. 278 */ 279 void 280 cgen_callinter(Node *n, Node *res, int proc) 281 { 282 Node *i, *f; 283 Node tmpi, nodi, nodo, nodr, nodsp; 284 285 i = n->left; 286 if(i->op != ODOTINTER) 287 fatal("cgen_callinter: not ODOTINTER %O", i->op); 288 289 f = i->right; // field 290 if(f->op != ONAME) 291 fatal("cgen_callinter: not ONAME %O", f->op); 292 293 i = i->left; // interface 294 295 if(!i->addable) { 296 tempname(&tmpi, i->type); 297 cgen(i, &tmpi); 298 i = &tmpi; 299 } 300 301 genlist(n->list); // assign the args 302 303 // i is now addable, prepare an indirected 304 // register to hold its address. 305 igen(i, &nodi, res); // REG = &inter 306 307 nodindreg(&nodsp, types[tptr], D_SP); 308 nodsp.xoffset = 0; 309 if(proc != 0) 310 nodsp.xoffset += 2 * widthptr; // leave room for size & fn 311 nodi.type = types[tptr]; 312 nodi.xoffset += widthptr; 313 cgen(&nodi, &nodsp); // {0, 8(nacl), or 16}(SP) = 8(REG) -- i.data 314 315 regalloc(&nodo, types[tptr], res); 316 nodi.type = types[tptr]; 317 nodi.xoffset -= widthptr; 318 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 319 regfree(&nodi); 320 321 regalloc(&nodr, types[tptr], &nodo); 322 if(n->left->xoffset == BADWIDTH) 323 fatal("cgen_callinter: badwidth"); 324 cgen_checknil(&nodo); // in case offset is huge 325 nodo.op = OINDREG; 326 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 327 if(proc == 0) { 328 // plain call: use direct c function pointer - more efficient 329 cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] 330 proc = 3; 331 } else { 332 // go/defer. generate go func value. 333 gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] 334 } 335 336 nodr.type = n->left->type; 337 ginscall(&nodr, proc); 338 339 regfree(&nodr); 340 regfree(&nodo); 341 } 342 343 /* 344 * generate function call; 345 * proc=0 normal call 346 * proc=1 goroutine run in new proc 347 * proc=2 defer call save away stack 348 */ 349 void 350 cgen_call(Node *n, int proc) 351 { 352 Type *t; 353 Node nod, afun; 354 355 if(n == N) 356 return; 357 358 if(n->left->ullman >= UINF) { 359 // if name involves a fn call 360 // precompute the address of the fn 361 tempname(&afun, types[tptr]); 362 cgen(n->left, &afun); 363 } 364 365 genlist(n->list); // assign the args 366 t = n->left->type; 367 368 // call tempname pointer 369 if(n->left->ullman >= UINF) { 370 regalloc(&nod, types[tptr], N); 371 cgen_as(&nod, &afun); 372 nod.type = t; 373 ginscall(&nod, proc); 374 regfree(&nod); 375 return; 376 } 377 378 // call pointer 379 if(n->left->op != ONAME || n->left->class != PFUNC) { 380 regalloc(&nod, types[tptr], N); 381 cgen_as(&nod, n->left); 382 nod.type = t; 383 ginscall(&nod, proc); 384 regfree(&nod); 385 return; 386 } 387 388 // call direct 389 n->left->method = 1; 390 ginscall(n->left, proc); 391 } 392 393 /* 394 * call to n has already been generated. 395 * generate: 396 * res = return value from call. 397 */ 398 void 399 cgen_callret(Node *n, Node *res) 400 { 401 Node nod; 402 Type *fp, *t; 403 Iter flist; 404 405 t = n->left->type; 406 if(t->etype == TPTR32 || t->etype == TPTR64) 407 t = t->type; 408 409 fp = structfirst(&flist, getoutarg(t)); 410 if(fp == T) 411 fatal("cgen_callret: nil"); 412 413 memset(&nod, 0, sizeof(nod)); 414 nod.op = OINDREG; 415 nod.val.u.reg = D_SP; 416 nod.addable = 1; 417 418 nod.xoffset = fp->width; 419 nod.type = fp->type; 420 cgen_as(res, &nod); 421 } 422 423 /* 424 * call to n has already been generated. 425 * generate: 426 * res = &return value from call. 427 */ 428 void 429 cgen_aret(Node *n, Node *res) 430 { 431 Node nod1, nod2; 432 Type *fp, *t; 433 Iter flist; 434 435 t = n->left->type; 436 if(isptr[t->etype]) 437 t = t->type; 438 439 fp = structfirst(&flist, getoutarg(t)); 440 if(fp == T) 441 fatal("cgen_aret: nil"); 442 443 memset(&nod1, 0, sizeof(nod1)); 444 nod1.op = OINDREG; 445 nod1.val.u.reg = D_SP; 446 nod1.addable = 1; 447 448 nod1.xoffset = fp->width; 449 nod1.type = fp->type; 450 451 if(res->op != OREGISTER) { 452 regalloc(&nod2, types[tptr], res); 453 gins(leaptr, &nod1, &nod2); 454 gins(movptr, &nod2, res); 455 regfree(&nod2); 456 } else 457 gins(leaptr, &nod1, res); 458 } 459 460 /* 461 * generate return. 462 * n->left is assignments to return values. 463 */ 464 void 465 cgen_ret(Node *n) 466 { 467 Prog *p; 468 469 if(n != N) 470 genlist(n->list); // copy out args 471 if(hasdefer) 472 ginscall(deferreturn, 0); 473 genlist(curfn->exit); 474 p = gins(ARET, N, N); 475 if(n != N && n->op == ORETJMP) { 476 p->to.type = D_EXTERN; 477 p->to.sym = linksym(n->left->sym); 478 } 479 } 480 481 /* 482 * generate += *= etc. 483 */ 484 void 485 cgen_asop(Node *n) 486 { 487 Node n1, n2, n3, n4; 488 Node *nl, *nr; 489 Prog *p1; 490 Addr addr; 491 int a; 492 493 nl = n->left; 494 nr = n->right; 495 496 if(nr->ullman >= UINF && nl->ullman >= UINF) { 497 tempname(&n1, nr->type); 498 cgen(nr, &n1); 499 n2 = *n; 500 n2.right = &n1; 501 cgen_asop(&n2); 502 goto ret; 503 } 504 505 if(!isint[nl->type->etype]) 506 goto hard; 507 if(!isint[nr->type->etype]) 508 goto hard; 509 510 switch(n->etype) { 511 case OADD: 512 if(smallintconst(nr)) 513 if(mpgetfix(nr->val.u.xval) == 1) { 514 a = optoas(OINC, nl->type); 515 if(nl->addable) { 516 gins(a, N, nl); 517 goto ret; 518 } 519 if(sudoaddable(a, nl, &addr)) { 520 p1 = gins(a, N, N); 521 p1->to = addr; 522 sudoclean(); 523 goto ret; 524 } 525 } 526 break; 527 528 case OSUB: 529 if(smallintconst(nr)) 530 if(mpgetfix(nr->val.u.xval) == 1) { 531 a = optoas(ODEC, nl->type); 532 if(nl->addable) { 533 gins(a, N, nl); 534 goto ret; 535 } 536 if(sudoaddable(a, nl, &addr)) { 537 p1 = gins(a, N, N); 538 p1->to = addr; 539 sudoclean(); 540 goto ret; 541 } 542 } 543 break; 544 } 545 546 switch(n->etype) { 547 case OADD: 548 case OSUB: 549 case OXOR: 550 case OAND: 551 case OOR: 552 a = optoas(n->etype, nl->type); 553 if(nl->addable) { 554 if(smallintconst(nr)) { 555 gins(a, nr, nl); 556 goto ret; 557 } 558 regalloc(&n2, nr->type, N); 559 cgen(nr, &n2); 560 gins(a, &n2, nl); 561 regfree(&n2); 562 goto ret; 563 } 564 if(nr->ullman < UINF) 565 if(sudoaddable(a, nl, &addr)) { 566 if(smallintconst(nr)) { 567 p1 = gins(a, nr, N); 568 p1->to = addr; 569 sudoclean(); 570 goto ret; 571 } 572 regalloc(&n2, nr->type, N); 573 cgen(nr, &n2); 574 p1 = gins(a, &n2, N); 575 p1->to = addr; 576 regfree(&n2); 577 sudoclean(); 578 goto ret; 579 } 580 } 581 582 hard: 583 n2.op = 0; 584 n1.op = 0; 585 if(nr->op == OLITERAL) { 586 // don't allocate a register for literals. 587 } else if(nr->ullman >= nl->ullman || nl->addable) { 588 regalloc(&n2, nr->type, N); 589 cgen(nr, &n2); 590 nr = &n2; 591 } else { 592 tempname(&n2, nr->type); 593 cgen(nr, &n2); 594 nr = &n2; 595 } 596 if(!nl->addable) { 597 igen(nl, &n1, N); 598 nl = &n1; 599 } 600 601 n3 = *n; 602 n3.left = nl; 603 n3.right = nr; 604 n3.op = n->etype; 605 606 regalloc(&n4, nl->type, N); 607 cgen(&n3, &n4); 608 gmove(&n4, nl); 609 610 if(n1.op) 611 regfree(&n1); 612 if(n2.op == OREGISTER) 613 regfree(&n2); 614 regfree(&n4); 615 616 ret: 617 ; 618 } 619 620 int 621 samereg(Node *a, Node *b) 622 { 623 if(a == N || b == N) 624 return 0; 625 if(a->op != OREGISTER) 626 return 0; 627 if(b->op != OREGISTER) 628 return 0; 629 if(a->val.u.reg != b->val.u.reg) 630 return 0; 631 return 1; 632 } 633 634 /* 635 * generate division. 636 * generates one of: 637 * res = nl / nr 638 * res = nl % nr 639 * according to op. 640 */ 641 void 642 dodiv(int op, Node *nl, Node *nr, Node *res) 643 { 644 int a, check; 645 Node n3, n4; 646 Type *t, *t0; 647 Node ax, dx, ax1, n31, oldax, olddx; 648 Prog *p1, *p2; 649 650 // Have to be careful about handling 651 // most negative int divided by -1 correctly. 652 // The hardware will trap. 653 // Also the byte divide instruction needs AH, 654 // which we otherwise don't have to deal with. 655 // Easiest way to avoid for int8, int16: use int32. 656 // For int32 and int64, use explicit test. 657 // Could use int64 hw for int32. 658 t = nl->type; 659 t0 = t; 660 check = 0; 661 if(issigned[t->etype]) { 662 check = 1; 663 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1))) 664 check = 0; 665 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 666 check = 0; 667 } 668 if(t->width < 4) { 669 if(issigned[t->etype]) 670 t = types[TINT32]; 671 else 672 t = types[TUINT32]; 673 check = 0; 674 } 675 a = optoas(op, t); 676 677 regalloc(&n3, t0, N); 678 if(nl->ullman >= nr->ullman) { 679 savex(D_AX, &ax, &oldax, res, t0); 680 cgen(nl, &ax); 681 regalloc(&ax, t0, &ax); // mark ax live during cgen 682 cgen(nr, &n3); 683 regfree(&ax); 684 } else { 685 cgen(nr, &n3); 686 savex(D_AX, &ax, &oldax, res, t0); 687 cgen(nl, &ax); 688 } 689 if(t != t0) { 690 // Convert 691 ax1 = ax; 692 n31 = n3; 693 ax.type = t; 694 n3.type = t; 695 gmove(&ax1, &ax); 696 gmove(&n31, &n3); 697 } 698 699 p2 = P; 700 if(nacl) { 701 // Native Client does not relay the divide-by-zero trap 702 // to the executing program, so we must insert a check 703 // for ourselves. 704 nodconst(&n4, t, 0); 705 gins(optoas(OCMP, t), &n3, &n4); 706 p1 = gbranch(optoas(ONE, t), T, +1); 707 if(panicdiv == N) 708 panicdiv = sysfunc("panicdivide"); 709 ginscall(panicdiv, -1); 710 patch(p1, pc); 711 } 712 if(check) { 713 nodconst(&n4, t, -1); 714 gins(optoas(OCMP, t), &n3, &n4); 715 p1 = gbranch(optoas(ONE, t), T, +1); 716 if(op == ODIV) { 717 // a / (-1) is -a. 718 gins(optoas(OMINUS, t), N, &ax); 719 gmove(&ax, res); 720 } else { 721 // a % (-1) is 0. 722 nodconst(&n4, t, 0); 723 gmove(&n4, res); 724 } 725 p2 = gbranch(AJMP, T, 0); 726 patch(p1, pc); 727 } 728 savex(D_DX, &dx, &olddx, res, t); 729 if(!issigned[t->etype]) { 730 nodconst(&n4, t, 0); 731 gmove(&n4, &dx); 732 } else 733 gins(optoas(OEXTEND, t), N, N); 734 gins(a, &n3, N); 735 regfree(&n3); 736 if(op == ODIV) 737 gmove(&ax, res); 738 else 739 gmove(&dx, res); 740 restx(&dx, &olddx); 741 if(check) 742 patch(p2, pc); 743 restx(&ax, &oldax); 744 } 745 746 /* 747 * register dr is one of the special ones (AX, CX, DI, SI, etc.). 748 * we need to use it. if it is already allocated as a temporary 749 * (r > 1; can only happen if a routine like sgen passed a 750 * special as cgen's res and then cgen used regalloc to reuse 751 * it as its own temporary), then move it for now to another 752 * register. caller must call restx to move it back. 753 * the move is not necessary if dr == res, because res is 754 * known to be dead. 755 */ 756 void 757 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 758 { 759 int r; 760 761 r = reg[dr]; 762 763 // save current ax and dx if they are live 764 // and not the destination 765 memset(oldx, 0, sizeof *oldx); 766 nodreg(x, t, dr); 767 if(r > 1 && !samereg(x, res)) { 768 regalloc(oldx, types[TINT64], N); 769 x->type = types[TINT64]; 770 gmove(x, oldx); 771 x->type = t; 772 oldx->ostk = r; // squirrel away old r value 773 reg[dr] = 1; 774 } 775 } 776 777 void 778 restx(Node *x, Node *oldx) 779 { 780 if(oldx->op != 0) { 781 x->type = types[TINT64]; 782 reg[x->val.u.reg] = oldx->ostk; 783 gmove(oldx, x); 784 regfree(oldx); 785 } 786 } 787 788 /* 789 * generate division according to op, one of: 790 * res = nl / nr 791 * res = nl % nr 792 */ 793 void 794 cgen_div(int op, Node *nl, Node *nr, Node *res) 795 { 796 Node n1, n2, n3; 797 int w, a; 798 Magic m; 799 800 if(nr->op != OLITERAL) 801 goto longdiv; 802 w = nl->type->width*8; 803 804 // Front end handled 32-bit division. We only need to handle 64-bit. 805 // try to do division by multiply by (2^w)/d 806 // see hacker's delight chapter 10 807 switch(simtype[nl->type->etype]) { 808 default: 809 goto longdiv; 810 811 case TUINT64: 812 m.w = w; 813 m.ud = mpgetfix(nr->val.u.xval); 814 umagic(&m); 815 if(m.bad) 816 break; 817 if(op == OMOD) 818 goto longmod; 819 820 cgenr(nl, &n1, N); 821 nodconst(&n2, nl->type, m.um); 822 regalloc(&n3, nl->type, res); 823 cgen_hmul(&n1, &n2, &n3); 824 825 if(m.ua) { 826 // need to add numerator accounting for overflow 827 gins(optoas(OADD, nl->type), &n1, &n3); 828 nodconst(&n2, nl->type, 1); 829 gins(optoas(ORROTC, nl->type), &n2, &n3); 830 nodconst(&n2, nl->type, m.s-1); 831 gins(optoas(ORSH, nl->type), &n2, &n3); 832 } else { 833 nodconst(&n2, nl->type, m.s); 834 gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx 835 } 836 837 gmove(&n3, res); 838 regfree(&n1); 839 regfree(&n3); 840 return; 841 842 case TINT64: 843 m.w = w; 844 m.sd = mpgetfix(nr->val.u.xval); 845 smagic(&m); 846 if(m.bad) 847 break; 848 if(op == OMOD) 849 goto longmod; 850 851 cgenr(nl, &n1, res); 852 nodconst(&n2, nl->type, m.sm); 853 regalloc(&n3, nl->type, N); 854 cgen_hmul(&n1, &n2, &n3); 855 856 if(m.sm < 0) { 857 // need to add numerator 858 gins(optoas(OADD, nl->type), &n1, &n3); 859 } 860 861 nodconst(&n2, nl->type, m.s); 862 gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 863 864 nodconst(&n2, nl->type, w-1); 865 gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg 866 gins(optoas(OSUB, nl->type), &n1, &n3); // added 867 868 if(m.sd < 0) { 869 // this could probably be removed 870 // by factoring it into the multiplier 871 gins(optoas(OMINUS, nl->type), N, &n3); 872 } 873 874 gmove(&n3, res); 875 regfree(&n1); 876 regfree(&n3); 877 return; 878 } 879 goto longdiv; 880 881 longdiv: 882 // division and mod using (slow) hardware instruction 883 dodiv(op, nl, nr, res); 884 return; 885 886 longmod: 887 // mod using formula A%B = A-(A/B*B) but 888 // we know that there is a fast algorithm for A/B 889 regalloc(&n1, nl->type, res); 890 cgen(nl, &n1); 891 regalloc(&n2, nl->type, N); 892 cgen_div(ODIV, &n1, nr, &n2); 893 a = optoas(OMUL, nl->type); 894 if(w == 8) { 895 // use 2-operand 16-bit multiply 896 // because there is no 2-operand 8-bit multiply 897 a = AIMULW; 898 } 899 if(!smallintconst(nr)) { 900 regalloc(&n3, nl->type, N); 901 cgen(nr, &n3); 902 gins(a, &n3, &n2); 903 regfree(&n3); 904 } else 905 gins(a, nr, &n2); 906 gins(optoas(OSUB, nl->type), &n2, &n1); 907 gmove(&n1, res); 908 regfree(&n1); 909 regfree(&n2); 910 } 911 912 /* 913 * generate high multiply: 914 * res = (nl*nr) >> width 915 */ 916 void 917 cgen_hmul(Node *nl, Node *nr, Node *res) 918 { 919 Type *t; 920 int a; 921 Node n1, n2, ax, dx, *tmp; 922 923 t = nl->type; 924 a = optoas(OHMUL, t); 925 if(nl->ullman < nr->ullman) { 926 tmp = nl; 927 nl = nr; 928 nr = tmp; 929 } 930 cgenr(nl, &n1, res); 931 cgenr(nr, &n2, N); 932 nodreg(&ax, t, D_AX); 933 gmove(&n1, &ax); 934 gins(a, &n2, N); 935 regfree(&n2); 936 regfree(&n1); 937 938 if(t->width == 1) { 939 // byte multiply behaves differently. 940 nodreg(&ax, t, D_AH); 941 nodreg(&dx, t, D_DX); 942 gmove(&ax, &dx); 943 } 944 nodreg(&dx, t, D_DX); 945 gmove(&dx, res); 946 } 947 948 /* 949 * generate shift according to op, one of: 950 * res = nl << nr 951 * res = nl >> nr 952 */ 953 void 954 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 955 { 956 Node n1, n2, n3, n4, n5, cx, oldcx; 957 int a, rcx; 958 Prog *p1; 959 uvlong sc; 960 Type *tcount; 961 962 a = optoas(op, nl->type); 963 964 if(nr->op == OLITERAL) { 965 regalloc(&n1, nl->type, res); 966 cgen(nl, &n1); 967 sc = mpgetfix(nr->val.u.xval); 968 if(sc >= nl->type->width*8) { 969 // large shift gets 2 shifts by width-1 970 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 971 gins(a, &n3, &n1); 972 gins(a, &n3, &n1); 973 } else 974 gins(a, nr, &n1); 975 gmove(&n1, res); 976 regfree(&n1); 977 goto ret; 978 } 979 980 if(nl->ullman >= UINF) { 981 tempname(&n4, nl->type); 982 cgen(nl, &n4); 983 nl = &n4; 984 } 985 if(nr->ullman >= UINF) { 986 tempname(&n5, nr->type); 987 cgen(nr, &n5); 988 nr = &n5; 989 } 990 991 rcx = reg[D_CX]; 992 nodreg(&n1, types[TUINT32], D_CX); 993 994 // Allow either uint32 or uint64 as shift type, 995 // to avoid unnecessary conversion from uint32 to uint64 996 // just to do the comparison. 997 tcount = types[simtype[nr->type->etype]]; 998 if(tcount->etype < TUINT32) 999 tcount = types[TUINT32]; 1000 1001 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 1002 regalloc(&n3, tcount, &n1); // to clear high bits of CX 1003 1004 nodreg(&cx, types[TUINT64], D_CX); 1005 memset(&oldcx, 0, sizeof oldcx); 1006 if(rcx > 0 && !samereg(&cx, res)) { 1007 regalloc(&oldcx, types[TUINT64], N); 1008 gmove(&cx, &oldcx); 1009 } 1010 cx.type = tcount; 1011 1012 if(samereg(&cx, res)) 1013 regalloc(&n2, nl->type, N); 1014 else 1015 regalloc(&n2, nl->type, res); 1016 if(nl->ullman >= nr->ullman) { 1017 cgen(nl, &n2); 1018 cgen(nr, &n1); 1019 gmove(&n1, &n3); 1020 } else { 1021 cgen(nr, &n1); 1022 gmove(&n1, &n3); 1023 cgen(nl, &n2); 1024 } 1025 regfree(&n3); 1026 1027 // test and fix up large shifts 1028 if(!bounded) { 1029 nodconst(&n3, tcount, nl->type->width*8); 1030 gins(optoas(OCMP, tcount), &n1, &n3); 1031 p1 = gbranch(optoas(OLT, tcount), T, +1); 1032 if(op == ORSH && issigned[nl->type->etype]) { 1033 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 1034 gins(a, &n3, &n2); 1035 } else { 1036 nodconst(&n3, nl->type, 0); 1037 gmove(&n3, &n2); 1038 } 1039 patch(p1, pc); 1040 } 1041 1042 gins(a, &n1, &n2); 1043 1044 if(oldcx.op != 0) { 1045 cx.type = types[TUINT64]; 1046 gmove(&oldcx, &cx); 1047 regfree(&oldcx); 1048 } 1049 1050 gmove(&n2, res); 1051 1052 regfree(&n1); 1053 regfree(&n2); 1054 1055 ret: 1056 ; 1057 } 1058 1059 /* 1060 * generate byte multiply: 1061 * res = nl * nr 1062 * there is no 2-operand byte multiply instruction so 1063 * we do a full-width multiplication and truncate afterwards. 1064 */ 1065 void 1066 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 1067 { 1068 Node n1, n2, n1b, n2b, *tmp; 1069 Type *t; 1070 int a; 1071 1072 // largest ullman on left. 1073 if(nl->ullman < nr->ullman) { 1074 tmp = nl; 1075 nl = nr; 1076 nr = tmp; 1077 } 1078 1079 // generate operands in "8-bit" registers. 1080 regalloc(&n1b, nl->type, res); 1081 cgen(nl, &n1b); 1082 regalloc(&n2b, nr->type, N); 1083 cgen(nr, &n2b); 1084 1085 // perform full-width multiplication. 1086 t = types[TUINT64]; 1087 if(issigned[nl->type->etype]) 1088 t = types[TINT64]; 1089 nodreg(&n1, t, n1b.val.u.reg); 1090 nodreg(&n2, t, n2b.val.u.reg); 1091 a = optoas(op, t); 1092 gins(a, &n2, &n1); 1093 1094 // truncate. 1095 gmove(&n1, res); 1096 regfree(&n1b); 1097 regfree(&n2b); 1098 } 1099 1100 void 1101 clearfat(Node *nl) 1102 { 1103 int64 w, c, q; 1104 Node n1, oldn1, ax, oldax, di, z; 1105 Prog *p; 1106 1107 /* clear a fat object */ 1108 if(debug['g']) 1109 dump("\nclearfat", nl); 1110 1111 w = nl->type->width; 1112 // Avoid taking the address for simple enough types. 1113 if(componentgen(N, nl)) 1114 return; 1115 1116 c = w % 8; // bytes 1117 q = w / 8; // quads 1118 1119 if(q < 4) { 1120 // Write sequence of MOV 0, off(base) instead of using STOSQ. 1121 // The hope is that although the code will be slightly longer, 1122 // the MOVs will have no dependencies and pipeline better 1123 // than the unrolled STOSQ loop. 1124 // NOTE: Must use agen, not igen, so that optimizer sees address 1125 // being taken. We are not writing on field boundaries. 1126 agenr(nl, &n1, N); 1127 n1.op = OINDREG; 1128 nodconst(&z, types[TUINT64], 0); 1129 while(q-- > 0) { 1130 n1.type = z.type; 1131 gins(AMOVQ, &z, &n1); 1132 n1.xoffset += 8; 1133 } 1134 if(c >= 4) { 1135 nodconst(&z, types[TUINT32], 0); 1136 n1.type = z.type; 1137 gins(AMOVL, &z, &n1); 1138 n1.xoffset += 4; 1139 c -= 4; 1140 } 1141 nodconst(&z, types[TUINT8], 0); 1142 while(c-- > 0) { 1143 n1.type = z.type; 1144 gins(AMOVB, &z, &n1); 1145 n1.xoffset++; 1146 } 1147 regfree(&n1); 1148 return; 1149 } 1150 1151 savex(D_DI, &n1, &oldn1, N, types[tptr]); 1152 agen(nl, &n1); 1153 1154 savex(D_AX, &ax, &oldax, N, types[tptr]); 1155 gconreg(AMOVL, 0, D_AX); 1156 1157 if(q > 128 || nacl) { 1158 gconreg(movptr, q, D_CX); 1159 gins(AREP, N, N); // repeat 1160 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 1161 } else { 1162 p = gins(ADUFFZERO, N, N); 1163 p->to.type = D_ADDR; 1164 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 1165 // 2 and 128 = magic constants: see ../../runtime/asm_amd64.s 1166 p->to.offset = 2*(128-q); 1167 } 1168 1169 z = ax; 1170 di = n1; 1171 if(w >= 8 && c >= 4) { 1172 di.op = OINDREG; 1173 di.type = z.type = types[TINT64]; 1174 p = gins(AMOVQ, &z, &di); 1175 p->to.scale = 1; 1176 p->to.offset = c-8; 1177 } else if(c >= 4) { 1178 di.op = OINDREG; 1179 di.type = z.type = types[TINT32]; 1180 p = gins(AMOVL, &z, &di); 1181 if(c > 4) { 1182 p = gins(AMOVL, &z, &di); 1183 p->to.scale = 1; 1184 p->to.offset = c-4; 1185 } 1186 } else 1187 while(c > 0) { 1188 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 1189 c--; 1190 } 1191 1192 restx(&n1, &oldn1); 1193 restx(&ax, &oldax); 1194 } 1195 1196 // Called after regopt and peep have run. 1197 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1198 void 1199 expandchecks(Prog *firstp) 1200 { 1201 Prog *p, *p1, *p2; 1202 1203 for(p = firstp; p != P; p = p->link) { 1204 if(p->as != ACHECKNIL) 1205 continue; 1206 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1207 warnl(p->lineno, "generated nil check"); 1208 // check is 1209 // CMP arg, $0 1210 // JNE 2(PC) (likely) 1211 // MOV AX, 0 1212 p1 = mal(sizeof *p1); 1213 p2 = mal(sizeof *p2); 1214 clearp(p1); 1215 clearp(p2); 1216 p1->link = p2; 1217 p2->link = p->link; 1218 p->link = p1; 1219 p1->lineno = p->lineno; 1220 p2->lineno = p->lineno; 1221 p1->pc = 9999; 1222 p2->pc = 9999; 1223 p->as = cmpptr; 1224 p->to.type = D_CONST; 1225 p->to.offset = 0; 1226 p1->as = AJNE; 1227 p1->from.type = D_CONST; 1228 p1->from.offset = 1; // likely 1229 p1->to.type = D_BRANCH; 1230 p1->to.u.branch = p2->link; 1231 // crash by write to memory address 0. 1232 // if possible, since we know arg is 0, use 0(arg), 1233 // which will be shorter to encode than plain 0. 1234 p2->as = AMOVL; 1235 p2->from.type = D_AX; 1236 if(regtyp(&p->from)) 1237 p2->to.type = p->from.type + D_INDIR; 1238 else 1239 p2->to.type = D_INDIR+D_NONE; 1240 p2->to.offset = 0; 1241 } 1242 }