github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/6g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 void 13 defframe(Prog *ptxt) 14 { 15 // fill in argument size 16 ptxt->to.offset = rnd(curfn->type->argwid, widthptr); 17 18 // fill in final stack size 19 ptxt->to.offset <<= 32; 20 ptxt->to.offset |= rnd(stksize+maxarg, widthptr); 21 } 22 23 // Sweep the prog list to mark any used nodes. 24 void 25 markautoused(Prog* p) 26 { 27 for (; p; p = p->link) { 28 if (p->as == ATYPE) 29 continue; 30 31 if (p->from.type == D_AUTO && p->from.node) 32 p->from.node->used = 1; 33 34 if (p->to.type == D_AUTO && p->to.node) 35 p->to.node->used = 1; 36 } 37 } 38 39 // Fixup instructions after compactframe has moved all autos around. 40 void 41 fixautoused(Prog *p) 42 { 43 Prog **lp; 44 45 for (lp=&p; (p=*lp) != P; ) { 46 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 47 *lp = p->link; 48 continue; 49 } 50 if (p->from.type == D_AUTO && p->from.node) 51 p->from.offset += p->from.node->stkdelta; 52 53 if (p->to.type == D_AUTO && p->to.node) 54 p->to.offset += p->to.node->stkdelta; 55 56 lp = &p->link; 57 } 58 } 59 60 61 /* 62 * generate: 63 * call f 64 * proc=-1 normal call but no return 65 * proc=0 normal call 66 * proc=1 goroutine run in new proc 67 * proc=2 defer call save away stack 68 * proc=3 normal call to C pointer (not Go func value) 69 */ 70 void 71 ginscall(Node *f, int proc) 72 { 73 Prog *p; 74 Node reg, con; 75 Node r1; 76 77 switch(proc) { 78 default: 79 fatal("ginscall: bad proc %d", proc); 80 break; 81 82 case 0: // normal call 83 case -1: // normal call but no return 84 if(f->op == ONAME && f->class == PFUNC) { 85 p = gins(ACALL, N, f); 86 afunclit(&p->to, f); 87 if(proc == -1 || noreturn(p)) 88 gins(AUNDEF, N, N); 89 break; 90 } 91 nodreg(®, types[tptr], D_DX); 92 nodreg(&r1, types[tptr], D_BX); 93 gmove(f, ®); 94 reg.op = OINDREG; 95 gmove(®, &r1); 96 reg.op = OREGISTER; 97 gins(ACALL, ®, &r1); 98 break; 99 100 case 3: // normal call of c function pointer 101 gins(ACALL, N, f); 102 break; 103 104 case 1: // call in new proc (go) 105 case 2: // deferred call (defer) 106 nodreg(®, types[TINT64], D_CX); 107 if(flag_largemodel) { 108 regalloc(&r1, f->type, f); 109 gmove(f, &r1); 110 gins(APUSHQ, &r1, N); 111 regfree(&r1); 112 } else { 113 gins(APUSHQ, f, N); 114 } 115 nodconst(&con, types[TINT32], argsize(f->type)); 116 gins(APUSHQ, &con, N); 117 if(proc == 1) 118 ginscall(newproc, 0); 119 else { 120 if(!hasdefer) 121 fatal("hasdefer=0 but has defer"); 122 ginscall(deferproc, 0); 123 } 124 gins(APOPQ, N, ®); 125 gins(APOPQ, N, ®); 126 if(proc == 2) { 127 nodreg(®, types[TINT64], D_AX); 128 gins(ATESTQ, ®, ®); 129 patch(gbranch(AJNE, T, -1), retpc); 130 } 131 break; 132 } 133 } 134 135 /* 136 * n is call to interface method. 137 * generate res = n. 138 */ 139 void 140 cgen_callinter(Node *n, Node *res, int proc) 141 { 142 Node *i, *f; 143 Node tmpi, nodi, nodo, nodr, nodsp; 144 145 i = n->left; 146 if(i->op != ODOTINTER) 147 fatal("cgen_callinter: not ODOTINTER %O", i->op); 148 149 f = i->right; // field 150 if(f->op != ONAME) 151 fatal("cgen_callinter: not ONAME %O", f->op); 152 153 i = i->left; // interface 154 155 if(!i->addable) { 156 tempname(&tmpi, i->type); 157 cgen(i, &tmpi); 158 i = &tmpi; 159 } 160 161 genlist(n->list); // assign the args 162 163 // i is now addable, prepare an indirected 164 // register to hold its address. 165 igen(i, &nodi, res); // REG = &inter 166 167 nodindreg(&nodsp, types[tptr], D_SP); 168 nodi.type = types[tptr]; 169 nodi.xoffset += widthptr; 170 cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data 171 172 regalloc(&nodo, types[tptr], res); 173 nodi.type = types[tptr]; 174 nodi.xoffset -= widthptr; 175 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 176 regfree(&nodi); 177 178 regalloc(&nodr, types[tptr], &nodo); 179 if(n->left->xoffset == BADWIDTH) 180 fatal("cgen_callinter: badwidth"); 181 nodo.op = OINDREG; 182 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 183 if(proc == 0) { 184 // plain call: use direct c function pointer - more efficient 185 cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] 186 proc = 3; 187 } else { 188 // go/defer. generate go func value. 189 gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] 190 } 191 192 // BOTCH nodr.type = fntype; 193 nodr.type = n->left->type; 194 ginscall(&nodr, proc); 195 196 regfree(&nodr); 197 regfree(&nodo); 198 199 setmaxarg(n->left->type); 200 } 201 202 /* 203 * generate function call; 204 * proc=0 normal call 205 * proc=1 goroutine run in new proc 206 * proc=2 defer call save away stack 207 */ 208 void 209 cgen_call(Node *n, int proc) 210 { 211 Type *t; 212 Node nod, afun; 213 214 if(n == N) 215 return; 216 217 if(n->left->ullman >= UINF) { 218 // if name involves a fn call 219 // precompute the address of the fn 220 tempname(&afun, types[tptr]); 221 cgen(n->left, &afun); 222 } 223 224 genlist(n->list); // assign the args 225 t = n->left->type; 226 227 setmaxarg(t); 228 229 // call tempname pointer 230 if(n->left->ullman >= UINF) { 231 regalloc(&nod, types[tptr], N); 232 cgen_as(&nod, &afun); 233 nod.type = t; 234 ginscall(&nod, proc); 235 regfree(&nod); 236 return; 237 } 238 239 // call pointer 240 if(n->left->op != ONAME || n->left->class != PFUNC) { 241 regalloc(&nod, types[tptr], N); 242 cgen_as(&nod, n->left); 243 nod.type = t; 244 ginscall(&nod, proc); 245 regfree(&nod); 246 return; 247 } 248 249 // call direct 250 n->left->method = 1; 251 ginscall(n->left, proc); 252 } 253 254 /* 255 * call to n has already been generated. 256 * generate: 257 * res = return value from call. 258 */ 259 void 260 cgen_callret(Node *n, Node *res) 261 { 262 Node nod; 263 Type *fp, *t; 264 Iter flist; 265 266 t = n->left->type; 267 if(t->etype == TPTR32 || t->etype == TPTR64) 268 t = t->type; 269 270 fp = structfirst(&flist, getoutarg(t)); 271 if(fp == T) 272 fatal("cgen_callret: nil"); 273 274 memset(&nod, 0, sizeof(nod)); 275 nod.op = OINDREG; 276 nod.val.u.reg = D_SP; 277 nod.addable = 1; 278 279 nod.xoffset = fp->width; 280 nod.type = fp->type; 281 cgen_as(res, &nod); 282 } 283 284 /* 285 * call to n has already been generated. 286 * generate: 287 * res = &return value from call. 288 */ 289 void 290 cgen_aret(Node *n, Node *res) 291 { 292 Node nod1, nod2; 293 Type *fp, *t; 294 Iter flist; 295 296 t = n->left->type; 297 if(isptr[t->etype]) 298 t = t->type; 299 300 fp = structfirst(&flist, getoutarg(t)); 301 if(fp == T) 302 fatal("cgen_aret: nil"); 303 304 memset(&nod1, 0, sizeof(nod1)); 305 nod1.op = OINDREG; 306 nod1.val.u.reg = D_SP; 307 nod1.addable = 1; 308 309 nod1.xoffset = fp->width; 310 nod1.type = fp->type; 311 312 if(res->op != OREGISTER) { 313 regalloc(&nod2, types[tptr], res); 314 gins(ALEAQ, &nod1, &nod2); 315 gins(AMOVQ, &nod2, res); 316 regfree(&nod2); 317 } else 318 gins(ALEAQ, &nod1, res); 319 } 320 321 /* 322 * generate return. 323 * n->left is assignments to return values. 324 */ 325 void 326 cgen_ret(Node *n) 327 { 328 genlist(n->list); // copy out args 329 if(hasdefer || curfn->exit) 330 gjmp(retpc); 331 else 332 gins(ARET, N, N); 333 } 334 335 /* 336 * generate += *= etc. 337 */ 338 void 339 cgen_asop(Node *n) 340 { 341 Node n1, n2, n3, n4; 342 Node *nl, *nr; 343 Prog *p1; 344 Addr addr; 345 int a; 346 347 nl = n->left; 348 nr = n->right; 349 350 if(nr->ullman >= UINF && nl->ullman >= UINF) { 351 tempname(&n1, nr->type); 352 cgen(nr, &n1); 353 n2 = *n; 354 n2.right = &n1; 355 cgen_asop(&n2); 356 goto ret; 357 } 358 359 if(!isint[nl->type->etype]) 360 goto hard; 361 if(!isint[nr->type->etype]) 362 goto hard; 363 364 switch(n->etype) { 365 case OADD: 366 if(smallintconst(nr)) 367 if(mpgetfix(nr->val.u.xval) == 1) { 368 a = optoas(OINC, nl->type); 369 if(nl->addable) { 370 gins(a, N, nl); 371 goto ret; 372 } 373 if(sudoaddable(a, nl, &addr)) { 374 p1 = gins(a, N, N); 375 p1->to = addr; 376 sudoclean(); 377 goto ret; 378 } 379 } 380 break; 381 382 case OSUB: 383 if(smallintconst(nr)) 384 if(mpgetfix(nr->val.u.xval) == 1) { 385 a = optoas(ODEC, nl->type); 386 if(nl->addable) { 387 gins(a, N, nl); 388 goto ret; 389 } 390 if(sudoaddable(a, nl, &addr)) { 391 p1 = gins(a, N, N); 392 p1->to = addr; 393 sudoclean(); 394 goto ret; 395 } 396 } 397 break; 398 } 399 400 switch(n->etype) { 401 case OADD: 402 case OSUB: 403 case OXOR: 404 case OAND: 405 case OOR: 406 a = optoas(n->etype, nl->type); 407 if(nl->addable) { 408 if(smallintconst(nr)) { 409 gins(a, nr, nl); 410 goto ret; 411 } 412 regalloc(&n2, nr->type, N); 413 cgen(nr, &n2); 414 gins(a, &n2, nl); 415 regfree(&n2); 416 goto ret; 417 } 418 if(nr->ullman < UINF) 419 if(sudoaddable(a, nl, &addr)) { 420 if(smallintconst(nr)) { 421 p1 = gins(a, nr, N); 422 p1->to = addr; 423 sudoclean(); 424 goto ret; 425 } 426 regalloc(&n2, nr->type, N); 427 cgen(nr, &n2); 428 p1 = gins(a, &n2, N); 429 p1->to = addr; 430 regfree(&n2); 431 sudoclean(); 432 goto ret; 433 } 434 } 435 436 hard: 437 n2.op = 0; 438 n1.op = 0; 439 if(nr->op == OLITERAL) { 440 // don't allocate a register for literals. 441 } else if(nr->ullman >= nl->ullman || nl->addable) { 442 regalloc(&n2, nr->type, N); 443 cgen(nr, &n2); 444 nr = &n2; 445 } else { 446 tempname(&n2, nr->type); 447 cgen(nr, &n2); 448 nr = &n2; 449 } 450 if(!nl->addable) { 451 igen(nl, &n1, N); 452 nl = &n1; 453 } 454 455 n3 = *n; 456 n3.left = nl; 457 n3.right = nr; 458 n3.op = n->etype; 459 460 regalloc(&n4, nl->type, N); 461 cgen(&n3, &n4); 462 gmove(&n4, nl); 463 464 if(n1.op) 465 regfree(&n1); 466 if(n2.op == OREGISTER) 467 regfree(&n2); 468 regfree(&n4); 469 470 ret: 471 ; 472 } 473 474 int 475 samereg(Node *a, Node *b) 476 { 477 if(a == N || b == N) 478 return 0; 479 if(a->op != OREGISTER) 480 return 0; 481 if(b->op != OREGISTER) 482 return 0; 483 if(a->val.u.reg != b->val.u.reg) 484 return 0; 485 return 1; 486 } 487 488 /* 489 * generate division. 490 * generates one of: 491 * res = nl / nr 492 * res = nl % nr 493 * according to op. 494 */ 495 void 496 dodiv(int op, Node *nl, Node *nr, Node *res) 497 { 498 int a, check; 499 Node n3, n4; 500 Type *t, *t0; 501 Node ax, dx, ax1, n31, oldax, olddx; 502 Prog *p1, *p2; 503 504 // Have to be careful about handling 505 // most negative int divided by -1 correctly. 506 // The hardware will trap. 507 // Also the byte divide instruction needs AH, 508 // which we otherwise don't have to deal with. 509 // Easiest way to avoid for int8, int16: use int32. 510 // For int32 and int64, use explicit test. 511 // Could use int64 hw for int32. 512 t = nl->type; 513 t0 = t; 514 check = 0; 515 if(issigned[t->etype]) { 516 check = 1; 517 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) 518 check = 0; 519 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 520 check = 0; 521 } 522 if(t->width < 4) { 523 if(issigned[t->etype]) 524 t = types[TINT32]; 525 else 526 t = types[TUINT32]; 527 check = 0; 528 } 529 a = optoas(op, t); 530 531 regalloc(&n3, t0, N); 532 if(nl->ullman >= nr->ullman) { 533 savex(D_AX, &ax, &oldax, res, t0); 534 cgen(nl, &ax); 535 regalloc(&ax, t0, &ax); // mark ax live during cgen 536 cgen(nr, &n3); 537 regfree(&ax); 538 } else { 539 cgen(nr, &n3); 540 savex(D_AX, &ax, &oldax, res, t0); 541 cgen(nl, &ax); 542 } 543 if(t != t0) { 544 // Convert 545 ax1 = ax; 546 n31 = n3; 547 ax.type = t; 548 n3.type = t; 549 gmove(&ax1, &ax); 550 gmove(&n31, &n3); 551 } 552 553 p2 = P; 554 if(check) { 555 nodconst(&n4, t, -1); 556 gins(optoas(OCMP, t), &n3, &n4); 557 p1 = gbranch(optoas(ONE, t), T, +1); 558 if(op == ODIV) { 559 // a / (-1) is -a. 560 gins(optoas(OMINUS, t), N, &ax); 561 gmove(&ax, res); 562 } else { 563 // a % (-1) is 0. 564 nodconst(&n4, t, 0); 565 gmove(&n4, res); 566 } 567 p2 = gbranch(AJMP, T, 0); 568 patch(p1, pc); 569 } 570 savex(D_DX, &dx, &olddx, res, t); 571 if(!issigned[t->etype]) { 572 nodconst(&n4, t, 0); 573 gmove(&n4, &dx); 574 } else 575 gins(optoas(OEXTEND, t), N, N); 576 gins(a, &n3, N); 577 regfree(&n3); 578 if(op == ODIV) 579 gmove(&ax, res); 580 else 581 gmove(&dx, res); 582 restx(&dx, &olddx); 583 if(check) 584 patch(p2, pc); 585 restx(&ax, &oldax); 586 } 587 588 /* 589 * register dr is one of the special ones (AX, CX, DI, SI, etc.). 590 * we need to use it. if it is already allocated as a temporary 591 * (r > 1; can only happen if a routine like sgen passed a 592 * special as cgen's res and then cgen used regalloc to reuse 593 * it as its own temporary), then move it for now to another 594 * register. caller must call restx to move it back. 595 * the move is not necessary if dr == res, because res is 596 * known to be dead. 597 */ 598 void 599 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 600 { 601 int r; 602 603 r = reg[dr]; 604 605 // save current ax and dx if they are live 606 // and not the destination 607 memset(oldx, 0, sizeof *oldx); 608 nodreg(x, t, dr); 609 if(r > 1 && !samereg(x, res)) { 610 regalloc(oldx, types[TINT64], N); 611 x->type = types[TINT64]; 612 gmove(x, oldx); 613 x->type = t; 614 oldx->ostk = r; // squirrel away old r value 615 reg[dr] = 1; 616 } 617 } 618 619 void 620 restx(Node *x, Node *oldx) 621 { 622 if(oldx->op != 0) { 623 x->type = types[TINT64]; 624 reg[x->val.u.reg] = oldx->ostk; 625 gmove(oldx, x); 626 regfree(oldx); 627 } 628 } 629 630 /* 631 * generate division according to op, one of: 632 * res = nl / nr 633 * res = nl % nr 634 */ 635 void 636 cgen_div(int op, Node *nl, Node *nr, Node *res) 637 { 638 Node n1, n2, n3; 639 int w, a; 640 Magic m; 641 642 if(nr->op != OLITERAL) 643 goto longdiv; 644 w = nl->type->width*8; 645 646 // Front end handled 32-bit division. We only need to handle 64-bit. 647 // try to do division by multiply by (2^w)/d 648 // see hacker's delight chapter 10 649 switch(simtype[nl->type->etype]) { 650 default: 651 goto longdiv; 652 653 case TUINT64: 654 m.w = w; 655 m.ud = mpgetfix(nr->val.u.xval); 656 umagic(&m); 657 if(m.bad) 658 break; 659 if(op == OMOD) 660 goto longmod; 661 662 cgenr(nl, &n1, N); 663 nodconst(&n2, nl->type, m.um); 664 regalloc(&n3, nl->type, res); 665 cgen_hmul(&n1, &n2, &n3); 666 667 if(m.ua) { 668 // need to add numerator accounting for overflow 669 gins(optoas(OADD, nl->type), &n1, &n3); 670 nodconst(&n2, nl->type, 1); 671 gins(optoas(ORROTC, nl->type), &n2, &n3); 672 nodconst(&n2, nl->type, m.s-1); 673 gins(optoas(ORSH, nl->type), &n2, &n3); 674 } else { 675 nodconst(&n2, nl->type, m.s); 676 gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx 677 } 678 679 gmove(&n3, res); 680 regfree(&n1); 681 regfree(&n3); 682 return; 683 684 case TINT64: 685 m.w = w; 686 m.sd = mpgetfix(nr->val.u.xval); 687 smagic(&m); 688 if(m.bad) 689 break; 690 if(op == OMOD) 691 goto longmod; 692 693 cgenr(nl, &n1, res); 694 nodconst(&n2, nl->type, m.sm); 695 regalloc(&n3, nl->type, N); 696 cgen_hmul(&n1, &n2, &n3); 697 698 if(m.sm < 0) { 699 // need to add numerator 700 gins(optoas(OADD, nl->type), &n1, &n3); 701 } 702 703 nodconst(&n2, nl->type, m.s); 704 gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 705 706 nodconst(&n2, nl->type, w-1); 707 gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg 708 gins(optoas(OSUB, nl->type), &n1, &n3); // added 709 710 if(m.sd < 0) { 711 // this could probably be removed 712 // by factoring it into the multiplier 713 gins(optoas(OMINUS, nl->type), N, &n3); 714 } 715 716 gmove(&n3, res); 717 regfree(&n1); 718 regfree(&n3); 719 return; 720 } 721 goto longdiv; 722 723 longdiv: 724 // division and mod using (slow) hardware instruction 725 dodiv(op, nl, nr, res); 726 return; 727 728 longmod: 729 // mod using formula A%B = A-(A/B*B) but 730 // we know that there is a fast algorithm for A/B 731 regalloc(&n1, nl->type, res); 732 cgen(nl, &n1); 733 regalloc(&n2, nl->type, N); 734 cgen_div(ODIV, &n1, nr, &n2); 735 a = optoas(OMUL, nl->type); 736 if(w == 8) { 737 // use 2-operand 16-bit multiply 738 // because there is no 2-operand 8-bit multiply 739 a = AIMULW; 740 } 741 if(!smallintconst(nr)) { 742 regalloc(&n3, nl->type, N); 743 cgen(nr, &n3); 744 gins(a, &n3, &n2); 745 regfree(&n3); 746 } else 747 gins(a, nr, &n2); 748 gins(optoas(OSUB, nl->type), &n2, &n1); 749 gmove(&n1, res); 750 regfree(&n1); 751 regfree(&n2); 752 } 753 754 /* 755 * generate high multiply: 756 * res = (nl*nr) >> width 757 */ 758 void 759 cgen_hmul(Node *nl, Node *nr, Node *res) 760 { 761 Type *t; 762 int a; 763 Node n1, n2, ax, dx, *tmp; 764 765 t = nl->type; 766 a = optoas(OHMUL, t); 767 if(nl->ullman < nr->ullman) { 768 tmp = nl; 769 nl = nr; 770 nr = tmp; 771 } 772 cgenr(nl, &n1, res); 773 cgenr(nr, &n2, N); 774 nodreg(&ax, t, D_AX); 775 gmove(&n1, &ax); 776 gins(a, &n2, N); 777 regfree(&n2); 778 regfree(&n1); 779 780 if(t->width == 1) { 781 // byte multiply behaves differently. 782 nodreg(&ax, t, D_AH); 783 nodreg(&dx, t, D_DL); 784 gmove(&ax, &dx); 785 } 786 nodreg(&dx, t, D_DX); 787 gmove(&dx, res); 788 } 789 790 /* 791 * generate shift according to op, one of: 792 * res = nl << nr 793 * res = nl >> nr 794 */ 795 void 796 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 797 { 798 Node n1, n2, n3, n4, n5, cx, oldcx; 799 int a, rcx; 800 Prog *p1; 801 uvlong sc; 802 Type *tcount; 803 804 a = optoas(op, nl->type); 805 806 if(nr->op == OLITERAL) { 807 regalloc(&n1, nl->type, res); 808 cgen(nl, &n1); 809 sc = mpgetfix(nr->val.u.xval); 810 if(sc >= nl->type->width*8) { 811 // large shift gets 2 shifts by width-1 812 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 813 gins(a, &n3, &n1); 814 gins(a, &n3, &n1); 815 } else 816 gins(a, nr, &n1); 817 gmove(&n1, res); 818 regfree(&n1); 819 goto ret; 820 } 821 822 if(nl->ullman >= UINF) { 823 tempname(&n4, nl->type); 824 cgen(nl, &n4); 825 nl = &n4; 826 } 827 if(nr->ullman >= UINF) { 828 tempname(&n5, nr->type); 829 cgen(nr, &n5); 830 nr = &n5; 831 } 832 833 rcx = reg[D_CX]; 834 nodreg(&n1, types[TUINT32], D_CX); 835 836 // Allow either uint32 or uint64 as shift type, 837 // to avoid unnecessary conversion from uint32 to uint64 838 // just to do the comparison. 839 tcount = types[simtype[nr->type->etype]]; 840 if(tcount->etype < TUINT32) 841 tcount = types[TUINT32]; 842 843 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 844 regalloc(&n3, tcount, &n1); // to clear high bits of CX 845 846 nodreg(&cx, types[TUINT64], D_CX); 847 memset(&oldcx, 0, sizeof oldcx); 848 if(rcx > 0 && !samereg(&cx, res)) { 849 regalloc(&oldcx, types[TUINT64], N); 850 gmove(&cx, &oldcx); 851 } 852 cx.type = tcount; 853 854 if(samereg(&cx, res)) 855 regalloc(&n2, nl->type, N); 856 else 857 regalloc(&n2, nl->type, res); 858 if(nl->ullman >= nr->ullman) { 859 cgen(nl, &n2); 860 cgen(nr, &n1); 861 gmove(&n1, &n3); 862 } else { 863 cgen(nr, &n1); 864 gmove(&n1, &n3); 865 cgen(nl, &n2); 866 } 867 regfree(&n3); 868 869 // test and fix up large shifts 870 if(!bounded) { 871 nodconst(&n3, tcount, nl->type->width*8); 872 gins(optoas(OCMP, tcount), &n1, &n3); 873 p1 = gbranch(optoas(OLT, tcount), T, +1); 874 if(op == ORSH && issigned[nl->type->etype]) { 875 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 876 gins(a, &n3, &n2); 877 } else { 878 nodconst(&n3, nl->type, 0); 879 gmove(&n3, &n2); 880 } 881 patch(p1, pc); 882 } 883 884 gins(a, &n1, &n2); 885 886 if(oldcx.op != 0) { 887 cx.type = types[TUINT64]; 888 gmove(&oldcx, &cx); 889 regfree(&oldcx); 890 } 891 892 gmove(&n2, res); 893 894 regfree(&n1); 895 regfree(&n2); 896 897 ret: 898 ; 899 } 900 901 /* 902 * generate byte multiply: 903 * res = nl * nr 904 * there is no 2-operand byte multiply instruction so 905 * we do a full-width multiplication and truncate afterwards. 906 */ 907 void 908 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 909 { 910 Node n1, n2, n1b, n2b, *tmp; 911 Type *t; 912 int a; 913 914 // largest ullman on left. 915 if(nl->ullman < nr->ullman) { 916 tmp = nl; 917 nl = nr; 918 nr = tmp; 919 } 920 921 // generate operands in "8-bit" registers. 922 regalloc(&n1b, nl->type, res); 923 cgen(nl, &n1b); 924 regalloc(&n2b, nr->type, N); 925 cgen(nr, &n2b); 926 927 // perform full-width multiplication. 928 t = types[TUINT64]; 929 if(issigned[nl->type->etype]) 930 t = types[TINT64]; 931 nodreg(&n1, t, n1b.val.u.reg); 932 nodreg(&n2, t, n2b.val.u.reg); 933 a = optoas(op, t); 934 gins(a, &n2, &n1); 935 936 // truncate. 937 gmove(&n1, res); 938 regfree(&n1b); 939 regfree(&n2b); 940 } 941 942 void 943 clearfat(Node *nl) 944 { 945 int64 w, c, q; 946 Node n1, oldn1, ax, oldax; 947 948 /* clear a fat object */ 949 if(debug['g']) 950 dump("\nclearfat", nl); 951 952 953 w = nl->type->width; 954 // Avoid taking the address for simple enough types. 955 if(componentgen(N, nl)) 956 return; 957 958 c = w % 8; // bytes 959 q = w / 8; // quads 960 961 savex(D_DI, &n1, &oldn1, N, types[tptr]); 962 agen(nl, &n1); 963 964 savex(D_AX, &ax, &oldax, N, types[tptr]); 965 gconreg(AMOVQ, 0, D_AX); 966 967 if(q >= 4) { 968 gconreg(AMOVQ, q, D_CX); 969 gins(AREP, N, N); // repeat 970 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 971 } else 972 while(q > 0) { 973 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 974 q--; 975 } 976 977 if(c >= 4) { 978 gconreg(AMOVQ, c, D_CX); 979 gins(AREP, N, N); // repeat 980 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 981 } else 982 while(c > 0) { 983 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 984 c--; 985 } 986 987 restx(&n1, &oldn1); 988 restx(&ax, &oldax); 989 }