github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/cmd/6g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog* appendp(Prog*, int, int, vlong, int, vlong); 13 14 void 15 defframe(Prog *ptxt, Bvec *bv) 16 { 17 int i, j; 18 uint32 frame; 19 Prog *p; 20 21 // fill in argument size 22 ptxt->to.offset = rnd(curfn->type->argwid, widthptr); 23 24 // fill in final stack size 25 ptxt->to.offset <<= 32; 26 frame = rnd(stksize+maxarg, widthptr); 27 ptxt->to.offset |= frame; 28 29 // insert code to clear pointered part of the frame, 30 // so that garbage collector only sees initialized values 31 // when it looks for pointers. 32 p = ptxt; 33 if(stkzerosize >= 8*widthptr) { 34 p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0); 35 p = appendp(p, AMOVQ, D_CONST, stkzerosize/widthptr, D_CX, 0); 36 p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0); 37 p = appendp(p, AREP, D_NONE, 0, D_NONE, 0); 38 appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0); 39 } else { 40 for(i=0, j=(stkptrsize-stkzerosize)/widthptr*2; i<stkzerosize; i+=widthptr, j+=2) 41 if(bvget(bv, j) || bvget(bv, j+1)) 42 p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i); 43 } 44 } 45 46 static Prog* 47 appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) 48 { 49 Prog *q; 50 51 q = mal(sizeof(*q)); 52 clearp(q); 53 q->as = as; 54 q->lineno = p->lineno; 55 q->from.type = ftype; 56 q->from.offset = foffset; 57 q->to.type = ttype; 58 q->to.offset = toffset; 59 q->link = p->link; 60 p->link = q; 61 return q; 62 } 63 64 // Sweep the prog list to mark any used nodes. 65 void 66 markautoused(Prog* p) 67 { 68 for (; p; p = p->link) { 69 if (p->as == ATYPE) 70 continue; 71 72 if (p->from.type == D_AUTO && p->from.node) 73 p->from.node->used = 1; 74 75 if (p->to.type == D_AUTO && p->to.node) 76 p->to.node->used = 1; 77 } 78 } 79 80 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 81 void 82 fixautoused(Prog *p) 83 { 84 Prog **lp; 85 86 for (lp=&p; (p=*lp) != P; ) { 87 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 88 *lp = p->link; 89 continue; 90 } 91 if (p->from.type == D_AUTO && p->from.node) 92 p->from.offset += p->from.node->stkdelta; 93 94 if (p->to.type == D_AUTO && p->to.node) 95 p->to.offset += p->to.node->stkdelta; 96 97 lp = &p->link; 98 } 99 } 100 101 102 /* 103 * generate: 104 * call f 105 * proc=-1 normal call but no return 106 * proc=0 normal call 107 * proc=1 goroutine run in new proc 108 * proc=2 defer call save away stack 109 * proc=3 normal call to C pointer (not Go func value) 110 */ 111 void 112 ginscall(Node *f, int proc) 113 { 114 int32 arg; 115 Prog *p; 116 Node reg, con; 117 Node r1; 118 119 if(f->type != T) 120 setmaxarg(f->type); 121 122 arg = -1; 123 // Most functions have a fixed-size argument block, so traceback uses that during unwind. 124 // Not all, though: there are some variadic functions in package runtime, 125 // and for those we emit call-specific metadata recorded by caller. 126 // Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub), 127 // so we do this for all indirect calls as well. 128 if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) { 129 arg = f->type->argwid; 130 if(proc == 1 || proc == 2) 131 arg += 2*widthptr; 132 } 133 134 if(arg != -1) 135 gargsize(arg); 136 137 switch(proc) { 138 default: 139 fatal("ginscall: bad proc %d", proc); 140 break; 141 142 case 0: // normal call 143 case -1: // normal call but no return 144 if(f->op == ONAME && f->class == PFUNC) { 145 if(f == deferreturn) { 146 // Deferred calls will appear to be returning to 147 // the CALL deferreturn(SB) that we are about to emit. 148 // However, the stack trace code will show the line 149 // of the instruction byte before the return PC. 150 // To avoid that being an unrelated instruction, 151 // insert an x86 NOP that we will have the right line number. 152 // x86 NOP 0x90 is really XCHG AX, AX; use that description 153 // because the NOP pseudo-instruction would be removed by 154 // the linker. 155 nodreg(®, types[TINT], D_AX); 156 gins(AXCHGL, ®, ®); 157 } 158 p = gins(ACALL, N, f); 159 afunclit(&p->to, f); 160 if(proc == -1 || noreturn(p)) 161 gins(AUNDEF, N, N); 162 break; 163 } 164 nodreg(®, types[tptr], D_DX); 165 nodreg(&r1, types[tptr], D_BX); 166 gmove(f, ®); 167 reg.op = OINDREG; 168 gmove(®, &r1); 169 reg.op = OREGISTER; 170 gins(ACALL, ®, &r1); 171 break; 172 173 case 3: // normal call of c function pointer 174 gins(ACALL, N, f); 175 break; 176 177 case 1: // call in new proc (go) 178 case 2: // deferred call (defer) 179 nodreg(®, types[TINT64], D_CX); 180 if(flag_largemodel) { 181 regalloc(&r1, f->type, f); 182 gmove(f, &r1); 183 gins(APUSHQ, &r1, N); 184 regfree(&r1); 185 } else { 186 gins(APUSHQ, f, N); 187 } 188 nodconst(&con, types[TINT32], argsize(f->type)); 189 gins(APUSHQ, &con, N); 190 if(proc == 1) 191 ginscall(newproc, 0); 192 else { 193 if(!hasdefer) 194 fatal("hasdefer=0 but has defer"); 195 ginscall(deferproc, 0); 196 } 197 gins(APOPQ, N, ®); 198 gins(APOPQ, N, ®); 199 if(proc == 2) { 200 nodreg(®, types[TINT64], D_AX); 201 gins(ATESTQ, ®, ®); 202 patch(gbranch(AJNE, T, -1), retpc); 203 } 204 break; 205 } 206 207 if(arg != -1) 208 gargsize(-1); 209 } 210 211 /* 212 * n is call to interface method. 213 * generate res = n. 214 */ 215 void 216 cgen_callinter(Node *n, Node *res, int proc) 217 { 218 Node *i, *f; 219 Node tmpi, nodi, nodo, nodr, nodsp; 220 221 i = n->left; 222 if(i->op != ODOTINTER) 223 fatal("cgen_callinter: not ODOTINTER %O", i->op); 224 225 f = i->right; // field 226 if(f->op != ONAME) 227 fatal("cgen_callinter: not ONAME %O", f->op); 228 229 i = i->left; // interface 230 231 if(!i->addable) { 232 tempname(&tmpi, i->type); 233 cgen(i, &tmpi); 234 i = &tmpi; 235 } 236 237 genlist(n->list); // assign the args 238 239 // i is now addable, prepare an indirected 240 // register to hold its address. 241 igen(i, &nodi, res); // REG = &inter 242 243 nodindreg(&nodsp, types[tptr], D_SP); 244 nodi.type = types[tptr]; 245 nodi.xoffset += widthptr; 246 cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data 247 248 regalloc(&nodo, types[tptr], res); 249 nodi.type = types[tptr]; 250 nodi.xoffset -= widthptr; 251 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 252 regfree(&nodi); 253 254 regalloc(&nodr, types[tptr], &nodo); 255 if(n->left->xoffset == BADWIDTH) 256 fatal("cgen_callinter: badwidth"); 257 cgen_checknil(&nodo); // in case offset is huge 258 nodo.op = OINDREG; 259 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 260 if(proc == 0) { 261 // plain call: use direct c function pointer - more efficient 262 cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] 263 proc = 3; 264 } else { 265 // go/defer. generate go func value. 266 gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] 267 } 268 269 nodr.type = n->left->type; 270 ginscall(&nodr, proc); 271 272 regfree(&nodr); 273 regfree(&nodo); 274 } 275 276 /* 277 * generate function call; 278 * proc=0 normal call 279 * proc=1 goroutine run in new proc 280 * proc=2 defer call save away stack 281 */ 282 void 283 cgen_call(Node *n, int proc) 284 { 285 Type *t; 286 Node nod, afun; 287 288 if(n == N) 289 return; 290 291 if(n->left->ullman >= UINF) { 292 // if name involves a fn call 293 // precompute the address of the fn 294 tempname(&afun, types[tptr]); 295 cgen(n->left, &afun); 296 } 297 298 genlist(n->list); // assign the args 299 t = n->left->type; 300 301 // call tempname pointer 302 if(n->left->ullman >= UINF) { 303 regalloc(&nod, types[tptr], N); 304 cgen_as(&nod, &afun); 305 nod.type = t; 306 ginscall(&nod, proc); 307 regfree(&nod); 308 return; 309 } 310 311 // call pointer 312 if(n->left->op != ONAME || n->left->class != PFUNC) { 313 regalloc(&nod, types[tptr], N); 314 cgen_as(&nod, n->left); 315 nod.type = t; 316 ginscall(&nod, proc); 317 regfree(&nod); 318 return; 319 } 320 321 // call direct 322 n->left->method = 1; 323 ginscall(n->left, proc); 324 } 325 326 /* 327 * call to n has already been generated. 328 * generate: 329 * res = return value from call. 330 */ 331 void 332 cgen_callret(Node *n, Node *res) 333 { 334 Node nod; 335 Type *fp, *t; 336 Iter flist; 337 338 t = n->left->type; 339 if(t->etype == TPTR32 || t->etype == TPTR64) 340 t = t->type; 341 342 fp = structfirst(&flist, getoutarg(t)); 343 if(fp == T) 344 fatal("cgen_callret: nil"); 345 346 memset(&nod, 0, sizeof(nod)); 347 nod.op = OINDREG; 348 nod.val.u.reg = D_SP; 349 nod.addable = 1; 350 351 nod.xoffset = fp->width; 352 nod.type = fp->type; 353 cgen_as(res, &nod); 354 } 355 356 /* 357 * call to n has already been generated. 358 * generate: 359 * res = &return value from call. 360 */ 361 void 362 cgen_aret(Node *n, Node *res) 363 { 364 Node nod1, nod2; 365 Type *fp, *t; 366 Iter flist; 367 368 t = n->left->type; 369 if(isptr[t->etype]) 370 t = t->type; 371 372 fp = structfirst(&flist, getoutarg(t)); 373 if(fp == T) 374 fatal("cgen_aret: nil"); 375 376 memset(&nod1, 0, sizeof(nod1)); 377 nod1.op = OINDREG; 378 nod1.val.u.reg = D_SP; 379 nod1.addable = 1; 380 381 nod1.xoffset = fp->width; 382 nod1.type = fp->type; 383 384 if(res->op != OREGISTER) { 385 regalloc(&nod2, types[tptr], res); 386 gins(ALEAQ, &nod1, &nod2); 387 gins(AMOVQ, &nod2, res); 388 regfree(&nod2); 389 } else 390 gins(ALEAQ, &nod1, res); 391 } 392 393 /* 394 * generate return. 395 * n->left is assignments to return values. 396 */ 397 void 398 cgen_ret(Node *n) 399 { 400 Prog *p; 401 402 genlist(n->list); // copy out args 403 if(hasdefer || curfn->exit) { 404 gjmp(retpc); 405 return; 406 } 407 p = gins(ARET, N, N); 408 if(n->op == ORETJMP) { 409 p->to.type = D_EXTERN; 410 p->to.sym = n->left->sym; 411 } 412 } 413 414 /* 415 * generate += *= etc. 416 */ 417 void 418 cgen_asop(Node *n) 419 { 420 Node n1, n2, n3, n4; 421 Node *nl, *nr; 422 Prog *p1; 423 Addr addr; 424 int a; 425 426 nl = n->left; 427 nr = n->right; 428 429 if(nr->ullman >= UINF && nl->ullman >= UINF) { 430 tempname(&n1, nr->type); 431 cgen(nr, &n1); 432 n2 = *n; 433 n2.right = &n1; 434 cgen_asop(&n2); 435 goto ret; 436 } 437 438 if(!isint[nl->type->etype]) 439 goto hard; 440 if(!isint[nr->type->etype]) 441 goto hard; 442 443 switch(n->etype) { 444 case OADD: 445 if(smallintconst(nr)) 446 if(mpgetfix(nr->val.u.xval) == 1) { 447 a = optoas(OINC, nl->type); 448 if(nl->addable) { 449 gins(a, N, nl); 450 goto ret; 451 } 452 if(sudoaddable(a, nl, &addr)) { 453 p1 = gins(a, N, N); 454 p1->to = addr; 455 sudoclean(); 456 goto ret; 457 } 458 } 459 break; 460 461 case OSUB: 462 if(smallintconst(nr)) 463 if(mpgetfix(nr->val.u.xval) == 1) { 464 a = optoas(ODEC, nl->type); 465 if(nl->addable) { 466 gins(a, N, nl); 467 goto ret; 468 } 469 if(sudoaddable(a, nl, &addr)) { 470 p1 = gins(a, N, N); 471 p1->to = addr; 472 sudoclean(); 473 goto ret; 474 } 475 } 476 break; 477 } 478 479 switch(n->etype) { 480 case OADD: 481 case OSUB: 482 case OXOR: 483 case OAND: 484 case OOR: 485 a = optoas(n->etype, nl->type); 486 if(nl->addable) { 487 if(smallintconst(nr)) { 488 gins(a, nr, nl); 489 goto ret; 490 } 491 regalloc(&n2, nr->type, N); 492 cgen(nr, &n2); 493 gins(a, &n2, nl); 494 regfree(&n2); 495 goto ret; 496 } 497 if(nr->ullman < UINF) 498 if(sudoaddable(a, nl, &addr)) { 499 if(smallintconst(nr)) { 500 p1 = gins(a, nr, N); 501 p1->to = addr; 502 sudoclean(); 503 goto ret; 504 } 505 regalloc(&n2, nr->type, N); 506 cgen(nr, &n2); 507 p1 = gins(a, &n2, N); 508 p1->to = addr; 509 regfree(&n2); 510 sudoclean(); 511 goto ret; 512 } 513 } 514 515 hard: 516 n2.op = 0; 517 n1.op = 0; 518 if(nr->op == OLITERAL) { 519 // don't allocate a register for literals. 520 } else if(nr->ullman >= nl->ullman || nl->addable) { 521 regalloc(&n2, nr->type, N); 522 cgen(nr, &n2); 523 nr = &n2; 524 } else { 525 tempname(&n2, nr->type); 526 cgen(nr, &n2); 527 nr = &n2; 528 } 529 if(!nl->addable) { 530 igen(nl, &n1, N); 531 nl = &n1; 532 } 533 534 n3 = *n; 535 n3.left = nl; 536 n3.right = nr; 537 n3.op = n->etype; 538 539 regalloc(&n4, nl->type, N); 540 cgen(&n3, &n4); 541 gmove(&n4, nl); 542 543 if(n1.op) 544 regfree(&n1); 545 if(n2.op == OREGISTER) 546 regfree(&n2); 547 regfree(&n4); 548 549 ret: 550 ; 551 } 552 553 int 554 samereg(Node *a, Node *b) 555 { 556 if(a == N || b == N) 557 return 0; 558 if(a->op != OREGISTER) 559 return 0; 560 if(b->op != OREGISTER) 561 return 0; 562 if(a->val.u.reg != b->val.u.reg) 563 return 0; 564 return 1; 565 } 566 567 /* 568 * generate division. 569 * generates one of: 570 * res = nl / nr 571 * res = nl % nr 572 * according to op. 573 */ 574 void 575 dodiv(int op, Node *nl, Node *nr, Node *res) 576 { 577 int a, check; 578 Node n3, n4; 579 Type *t, *t0; 580 Node ax, dx, ax1, n31, oldax, olddx; 581 Prog *p1, *p2; 582 583 // Have to be careful about handling 584 // most negative int divided by -1 correctly. 585 // The hardware will trap. 586 // Also the byte divide instruction needs AH, 587 // which we otherwise don't have to deal with. 588 // Easiest way to avoid for int8, int16: use int32. 589 // For int32 and int64, use explicit test. 590 // Could use int64 hw for int32. 591 t = nl->type; 592 t0 = t; 593 check = 0; 594 if(issigned[t->etype]) { 595 check = 1; 596 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) 597 check = 0; 598 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 599 check = 0; 600 } 601 if(t->width < 4) { 602 if(issigned[t->etype]) 603 t = types[TINT32]; 604 else 605 t = types[TUINT32]; 606 check = 0; 607 } 608 a = optoas(op, t); 609 610 regalloc(&n3, t0, N); 611 if(nl->ullman >= nr->ullman) { 612 savex(D_AX, &ax, &oldax, res, t0); 613 cgen(nl, &ax); 614 regalloc(&ax, t0, &ax); // mark ax live during cgen 615 cgen(nr, &n3); 616 regfree(&ax); 617 } else { 618 cgen(nr, &n3); 619 savex(D_AX, &ax, &oldax, res, t0); 620 cgen(nl, &ax); 621 } 622 if(t != t0) { 623 // Convert 624 ax1 = ax; 625 n31 = n3; 626 ax.type = t; 627 n3.type = t; 628 gmove(&ax1, &ax); 629 gmove(&n31, &n3); 630 } 631 632 p2 = P; 633 if(check) { 634 nodconst(&n4, t, -1); 635 gins(optoas(OCMP, t), &n3, &n4); 636 p1 = gbranch(optoas(ONE, t), T, +1); 637 if(op == ODIV) { 638 // a / (-1) is -a. 639 gins(optoas(OMINUS, t), N, &ax); 640 gmove(&ax, res); 641 } else { 642 // a % (-1) is 0. 643 nodconst(&n4, t, 0); 644 gmove(&n4, res); 645 } 646 p2 = gbranch(AJMP, T, 0); 647 patch(p1, pc); 648 } 649 savex(D_DX, &dx, &olddx, res, t); 650 if(!issigned[t->etype]) { 651 nodconst(&n4, t, 0); 652 gmove(&n4, &dx); 653 } else 654 gins(optoas(OEXTEND, t), N, N); 655 gins(a, &n3, N); 656 regfree(&n3); 657 if(op == ODIV) 658 gmove(&ax, res); 659 else 660 gmove(&dx, res); 661 restx(&dx, &olddx); 662 if(check) 663 patch(p2, pc); 664 restx(&ax, &oldax); 665 } 666 667 /* 668 * register dr is one of the special ones (AX, CX, DI, SI, etc.). 669 * we need to use it. if it is already allocated as a temporary 670 * (r > 1; can only happen if a routine like sgen passed a 671 * special as cgen's res and then cgen used regalloc to reuse 672 * it as its own temporary), then move it for now to another 673 * register. caller must call restx to move it back. 674 * the move is not necessary if dr == res, because res is 675 * known to be dead. 676 */ 677 void 678 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 679 { 680 int r; 681 682 r = reg[dr]; 683 684 // save current ax and dx if they are live 685 // and not the destination 686 memset(oldx, 0, sizeof *oldx); 687 nodreg(x, t, dr); 688 if(r > 1 && !samereg(x, res)) { 689 regalloc(oldx, types[TINT64], N); 690 x->type = types[TINT64]; 691 gmove(x, oldx); 692 x->type = t; 693 oldx->ostk = r; // squirrel away old r value 694 reg[dr] = 1; 695 } 696 } 697 698 void 699 restx(Node *x, Node *oldx) 700 { 701 if(oldx->op != 0) { 702 x->type = types[TINT64]; 703 reg[x->val.u.reg] = oldx->ostk; 704 gmove(oldx, x); 705 regfree(oldx); 706 } 707 } 708 709 /* 710 * generate division according to op, one of: 711 * res = nl / nr 712 * res = nl % nr 713 */ 714 void 715 cgen_div(int op, Node *nl, Node *nr, Node *res) 716 { 717 Node n1, n2, n3; 718 int w, a; 719 Magic m; 720 721 if(nr->op != OLITERAL) 722 goto longdiv; 723 w = nl->type->width*8; 724 725 // Front end handled 32-bit division. We only need to handle 64-bit. 726 // try to do division by multiply by (2^w)/d 727 // see hacker's delight chapter 10 728 switch(simtype[nl->type->etype]) { 729 default: 730 goto longdiv; 731 732 case TUINT64: 733 m.w = w; 734 m.ud = mpgetfix(nr->val.u.xval); 735 umagic(&m); 736 if(m.bad) 737 break; 738 if(op == OMOD) 739 goto longmod; 740 741 cgenr(nl, &n1, N); 742 nodconst(&n2, nl->type, m.um); 743 regalloc(&n3, nl->type, res); 744 cgen_hmul(&n1, &n2, &n3); 745 746 if(m.ua) { 747 // need to add numerator accounting for overflow 748 gins(optoas(OADD, nl->type), &n1, &n3); 749 nodconst(&n2, nl->type, 1); 750 gins(optoas(ORROTC, nl->type), &n2, &n3); 751 nodconst(&n2, nl->type, m.s-1); 752 gins(optoas(ORSH, nl->type), &n2, &n3); 753 } else { 754 nodconst(&n2, nl->type, m.s); 755 gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx 756 } 757 758 gmove(&n3, res); 759 regfree(&n1); 760 regfree(&n3); 761 return; 762 763 case TINT64: 764 m.w = w; 765 m.sd = mpgetfix(nr->val.u.xval); 766 smagic(&m); 767 if(m.bad) 768 break; 769 if(op == OMOD) 770 goto longmod; 771 772 cgenr(nl, &n1, res); 773 nodconst(&n2, nl->type, m.sm); 774 regalloc(&n3, nl->type, N); 775 cgen_hmul(&n1, &n2, &n3); 776 777 if(m.sm < 0) { 778 // need to add numerator 779 gins(optoas(OADD, nl->type), &n1, &n3); 780 } 781 782 nodconst(&n2, nl->type, m.s); 783 gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 784 785 nodconst(&n2, nl->type, w-1); 786 gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg 787 gins(optoas(OSUB, nl->type), &n1, &n3); // added 788 789 if(m.sd < 0) { 790 // this could probably be removed 791 // by factoring it into the multiplier 792 gins(optoas(OMINUS, nl->type), N, &n3); 793 } 794 795 gmove(&n3, res); 796 regfree(&n1); 797 regfree(&n3); 798 return; 799 } 800 goto longdiv; 801 802 longdiv: 803 // division and mod using (slow) hardware instruction 804 dodiv(op, nl, nr, res); 805 return; 806 807 longmod: 808 // mod using formula A%B = A-(A/B*B) but 809 // we know that there is a fast algorithm for A/B 810 regalloc(&n1, nl->type, res); 811 cgen(nl, &n1); 812 regalloc(&n2, nl->type, N); 813 cgen_div(ODIV, &n1, nr, &n2); 814 a = optoas(OMUL, nl->type); 815 if(w == 8) { 816 // use 2-operand 16-bit multiply 817 // because there is no 2-operand 8-bit multiply 818 a = AIMULW; 819 } 820 if(!smallintconst(nr)) { 821 regalloc(&n3, nl->type, N); 822 cgen(nr, &n3); 823 gins(a, &n3, &n2); 824 regfree(&n3); 825 } else 826 gins(a, nr, &n2); 827 gins(optoas(OSUB, nl->type), &n2, &n1); 828 gmove(&n1, res); 829 regfree(&n1); 830 regfree(&n2); 831 } 832 833 /* 834 * generate high multiply: 835 * res = (nl*nr) >> width 836 */ 837 void 838 cgen_hmul(Node *nl, Node *nr, Node *res) 839 { 840 Type *t; 841 int a; 842 Node n1, n2, ax, dx, *tmp; 843 844 t = nl->type; 845 a = optoas(OHMUL, t); 846 if(nl->ullman < nr->ullman) { 847 tmp = nl; 848 nl = nr; 849 nr = tmp; 850 } 851 cgenr(nl, &n1, res); 852 cgenr(nr, &n2, N); 853 nodreg(&ax, t, D_AX); 854 gmove(&n1, &ax); 855 gins(a, &n2, N); 856 regfree(&n2); 857 regfree(&n1); 858 859 if(t->width == 1) { 860 // byte multiply behaves differently. 861 nodreg(&ax, t, D_AH); 862 nodreg(&dx, t, D_DL); 863 gmove(&ax, &dx); 864 } 865 nodreg(&dx, t, D_DX); 866 gmove(&dx, res); 867 } 868 869 /* 870 * generate shift according to op, one of: 871 * res = nl << nr 872 * res = nl >> nr 873 */ 874 void 875 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 876 { 877 Node n1, n2, n3, n4, n5, cx, oldcx; 878 int a, rcx; 879 Prog *p1; 880 uvlong sc; 881 Type *tcount; 882 883 a = optoas(op, nl->type); 884 885 if(nr->op == OLITERAL) { 886 regalloc(&n1, nl->type, res); 887 cgen(nl, &n1); 888 sc = mpgetfix(nr->val.u.xval); 889 if(sc >= nl->type->width*8) { 890 // large shift gets 2 shifts by width-1 891 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 892 gins(a, &n3, &n1); 893 gins(a, &n3, &n1); 894 } else 895 gins(a, nr, &n1); 896 gmove(&n1, res); 897 regfree(&n1); 898 goto ret; 899 } 900 901 if(nl->ullman >= UINF) { 902 tempname(&n4, nl->type); 903 cgen(nl, &n4); 904 nl = &n4; 905 } 906 if(nr->ullman >= UINF) { 907 tempname(&n5, nr->type); 908 cgen(nr, &n5); 909 nr = &n5; 910 } 911 912 rcx = reg[D_CX]; 913 nodreg(&n1, types[TUINT32], D_CX); 914 915 // Allow either uint32 or uint64 as shift type, 916 // to avoid unnecessary conversion from uint32 to uint64 917 // just to do the comparison. 918 tcount = types[simtype[nr->type->etype]]; 919 if(tcount->etype < TUINT32) 920 tcount = types[TUINT32]; 921 922 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 923 regalloc(&n3, tcount, &n1); // to clear high bits of CX 924 925 nodreg(&cx, types[TUINT64], D_CX); 926 memset(&oldcx, 0, sizeof oldcx); 927 if(rcx > 0 && !samereg(&cx, res)) { 928 regalloc(&oldcx, types[TUINT64], N); 929 gmove(&cx, &oldcx); 930 } 931 cx.type = tcount; 932 933 if(samereg(&cx, res)) 934 regalloc(&n2, nl->type, N); 935 else 936 regalloc(&n2, nl->type, res); 937 if(nl->ullman >= nr->ullman) { 938 cgen(nl, &n2); 939 cgen(nr, &n1); 940 gmove(&n1, &n3); 941 } else { 942 cgen(nr, &n1); 943 gmove(&n1, &n3); 944 cgen(nl, &n2); 945 } 946 regfree(&n3); 947 948 // test and fix up large shifts 949 if(!bounded) { 950 nodconst(&n3, tcount, nl->type->width*8); 951 gins(optoas(OCMP, tcount), &n1, &n3); 952 p1 = gbranch(optoas(OLT, tcount), T, +1); 953 if(op == ORSH && issigned[nl->type->etype]) { 954 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 955 gins(a, &n3, &n2); 956 } else { 957 nodconst(&n3, nl->type, 0); 958 gmove(&n3, &n2); 959 } 960 patch(p1, pc); 961 } 962 963 gins(a, &n1, &n2); 964 965 if(oldcx.op != 0) { 966 cx.type = types[TUINT64]; 967 gmove(&oldcx, &cx); 968 regfree(&oldcx); 969 } 970 971 gmove(&n2, res); 972 973 regfree(&n1); 974 regfree(&n2); 975 976 ret: 977 ; 978 } 979 980 /* 981 * generate byte multiply: 982 * res = nl * nr 983 * there is no 2-operand byte multiply instruction so 984 * we do a full-width multiplication and truncate afterwards. 985 */ 986 void 987 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 988 { 989 Node n1, n2, n1b, n2b, *tmp; 990 Type *t; 991 int a; 992 993 // largest ullman on left. 994 if(nl->ullman < nr->ullman) { 995 tmp = nl; 996 nl = nr; 997 nr = tmp; 998 } 999 1000 // generate operands in "8-bit" registers. 1001 regalloc(&n1b, nl->type, res); 1002 cgen(nl, &n1b); 1003 regalloc(&n2b, nr->type, N); 1004 cgen(nr, &n2b); 1005 1006 // perform full-width multiplication. 1007 t = types[TUINT64]; 1008 if(issigned[nl->type->etype]) 1009 t = types[TINT64]; 1010 nodreg(&n1, t, n1b.val.u.reg); 1011 nodreg(&n2, t, n2b.val.u.reg); 1012 a = optoas(op, t); 1013 gins(a, &n2, &n1); 1014 1015 // truncate. 1016 gmove(&n1, res); 1017 regfree(&n1b); 1018 regfree(&n2b); 1019 } 1020 1021 void 1022 clearfat(Node *nl) 1023 { 1024 int64 w, c, q; 1025 Node n1, oldn1, ax, oldax; 1026 1027 /* clear a fat object */ 1028 if(debug['g']) 1029 dump("\nclearfat", nl); 1030 1031 1032 w = nl->type->width; 1033 // Avoid taking the address for simple enough types. 1034 if(componentgen(N, nl)) 1035 return; 1036 1037 c = w % 8; // bytes 1038 q = w / 8; // quads 1039 1040 savex(D_DI, &n1, &oldn1, N, types[tptr]); 1041 agen(nl, &n1); 1042 1043 savex(D_AX, &ax, &oldax, N, types[tptr]); 1044 gconreg(AMOVQ, 0, D_AX); 1045 1046 if(q >= 4) { 1047 gconreg(AMOVQ, q, D_CX); 1048 gins(AREP, N, N); // repeat 1049 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 1050 } else 1051 while(q > 0) { 1052 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 1053 q--; 1054 } 1055 1056 if(c >= 4) { 1057 gconreg(AMOVQ, c, D_CX); 1058 gins(AREP, N, N); // repeat 1059 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 1060 } else 1061 while(c > 0) { 1062 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 1063 c--; 1064 } 1065 1066 restx(&n1, &oldn1); 1067 restx(&ax, &oldax); 1068 } 1069 1070 // Called after regopt and peep have run. 1071 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1072 void 1073 expandchecks(Prog *firstp) 1074 { 1075 Prog *p, *p1, *p2; 1076 1077 for(p = firstp; p != P; p = p->link) { 1078 if(p->as != ACHECKNIL) 1079 continue; 1080 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1081 warnl(p->lineno, "nil check %D", &p->from); 1082 // check is 1083 // CMP arg, $0 1084 // JNE 2(PC) (likely) 1085 // MOV AX, 0 1086 p1 = mal(sizeof *p1); 1087 p2 = mal(sizeof *p2); 1088 clearp(p1); 1089 clearp(p2); 1090 p1->link = p2; 1091 p2->link = p->link; 1092 p->link = p1; 1093 p1->lineno = p->lineno; 1094 p2->lineno = p->lineno; 1095 p1->loc = 9999; 1096 p2->loc = 9999; 1097 p->as = ACMPQ; 1098 p->to.type = D_CONST; 1099 p->to.offset = 0; 1100 p1->as = AJNE; 1101 p1->from.type = D_CONST; 1102 p1->from.offset = 1; // likely 1103 p1->to.type = D_BRANCH; 1104 p1->to.u.branch = p2->link; 1105 // crash by write to memory address 0. 1106 // if possible, since we know arg is 0, use 0(arg), 1107 // which will be shorter to encode than plain 0. 1108 p2->as = AMOVL; 1109 p2->from.type = D_AX; 1110 if(regtyp(&p->from)) 1111 p2->to.type = p->from.type + D_INDIR; 1112 else 1113 p2->to.type = D_INDIR+D_NONE; 1114 p2->to.offset = 0; 1115 } 1116 }