github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/cmd/8g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog* appendp(Prog*, int, int, int32, int, int32); 13 14 void 15 defframe(Prog *ptxt, Bvec *bv) 16 { 17 uint32 frame; 18 Prog *p; 19 int i, j; 20 21 // fill in argument size 22 ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); 23 24 // fill in final stack size 25 if(stksize > maxstksize) 26 maxstksize = stksize; 27 frame = rnd(maxstksize+maxarg, widthptr); 28 ptxt->to.offset = frame; 29 maxstksize = 0; 30 31 // insert code to clear pointered part of the frame, 32 // so that garbage collector only sees initialized values 33 // when it looks for pointers. 34 p = ptxt; 35 if(stkzerosize >= 8*widthptr) { 36 p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0); 37 p = appendp(p, AMOVL, D_CONST, stkzerosize/widthptr, D_CX, 0); 38 p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0); 39 p = appendp(p, AREP, D_NONE, 0, D_NONE, 0); 40 appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0); 41 } else { 42 for(i=0, j=(stkptrsize-stkzerosize)/widthptr*2; i<stkzerosize; i+=widthptr, j+=2) 43 if(bvget(bv, j) || bvget(bv, j+1)) 44 p = appendp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i); 45 } 46 } 47 48 static Prog* 49 appendp(Prog *p, int as, int ftype, int32 foffset, int ttype, int32 toffset) 50 { 51 Prog *q; 52 53 q = mal(sizeof(*q)); 54 clearp(q); 55 q->as = as; 56 q->lineno = p->lineno; 57 q->from.type = ftype; 58 q->from.offset = foffset; 59 q->to.type = ttype; 60 q->to.offset = toffset; 61 q->link = p->link; 62 p->link = q; 63 return q; 64 } 65 66 // Sweep the prog list to mark any used nodes. 67 void 68 markautoused(Prog* p) 69 { 70 for (; p; p = p->link) { 71 if (p->as == ATYPE) 72 continue; 73 74 if (p->from.type == D_AUTO && p->from.node) 75 p->from.node->used = 1; 76 77 if (p->to.type == D_AUTO && p->to.node) 78 p->to.node->used = 1; 79 } 80 } 81 82 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 83 void 84 fixautoused(Prog* p) 85 { 86 Prog **lp; 87 88 for (lp=&p; (p=*lp) != P; ) { 89 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 90 *lp = p->link; 91 continue; 92 } 93 94 if (p->from.type == D_AUTO && p->from.node) 95 p->from.offset += p->from.node->stkdelta; 96 97 if (p->to.type == D_AUTO && p->to.node) 98 p->to.offset += p->to.node->stkdelta; 99 100 lp = &p->link; 101 } 102 } 103 104 void 105 clearfat(Node *nl) 106 { 107 uint32 w, c, q; 108 Node n1; 109 110 /* clear a fat object */ 111 if(debug['g']) 112 dump("\nclearfat", nl); 113 114 w = nl->type->width; 115 // Avoid taking the address for simple enough types. 116 if(componentgen(N, nl)) 117 return; 118 119 c = w % 4; // bytes 120 q = w / 4; // quads 121 122 nodreg(&n1, types[tptr], D_DI); 123 agen(nl, &n1); 124 gconreg(AMOVL, 0, D_AX); 125 126 if(q >= 4) { 127 gconreg(AMOVL, q, D_CX); 128 gins(AREP, N, N); // repeat 129 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 130 } else 131 while(q > 0) { 132 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 133 q--; 134 } 135 136 if(c >= 4) { 137 gconreg(AMOVL, c, D_CX); 138 gins(AREP, N, N); // repeat 139 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 140 } else 141 while(c > 0) { 142 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 143 c--; 144 } 145 } 146 147 /* 148 * generate: 149 * call f 150 * proc=-1 normal call but no return 151 * proc=0 normal call 152 * proc=1 goroutine run in new proc 153 * proc=2 defer call save away stack 154 * proc=3 normal call to C pointer (not Go func value) 155 */ 156 void 157 ginscall(Node *f, int proc) 158 { 159 int32 arg; 160 Prog *p; 161 Node reg, r1, con; 162 163 if(f->type != T) 164 setmaxarg(f->type); 165 166 arg = -1; 167 // Most functions have a fixed-size argument block, so traceback uses that during unwind. 168 // Not all, though: there are some variadic functions in package runtime, 169 // and for those we emit call-specific metadata recorded by caller. 170 // Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub), 171 // so we do this for all indirect calls as well. 172 if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) { 173 arg = f->type->argwid; 174 if(proc == 1 || proc == 2) 175 arg += 2*widthptr; 176 } 177 178 if(arg != -1) 179 gargsize(arg); 180 181 switch(proc) { 182 default: 183 fatal("ginscall: bad proc %d", proc); 184 break; 185 186 case 0: // normal call 187 case -1: // normal call but no return 188 if(f->op == ONAME && f->class == PFUNC) { 189 if(f == deferreturn) { 190 // Deferred calls will appear to be returning to 191 // the CALL deferreturn(SB) that we are about to emit. 192 // However, the stack trace code will show the line 193 // of the instruction byte before the return PC. 194 // To avoid that being an unrelated instruction, 195 // insert an x86 NOP that we will have the right line number. 196 // x86 NOP 0x90 is really XCHG AX, AX; use that description 197 // because the NOP pseudo-instruction will be removed by 198 // the linker. 199 nodreg(®, types[TINT], D_AX); 200 gins(AXCHGL, ®, ®); 201 } 202 p = gins(ACALL, N, f); 203 afunclit(&p->to, f); 204 if(proc == -1 || noreturn(p)) 205 gins(AUNDEF, N, N); 206 break; 207 } 208 nodreg(®, types[tptr], D_DX); 209 nodreg(&r1, types[tptr], D_BX); 210 gmove(f, ®); 211 reg.op = OINDREG; 212 gmove(®, &r1); 213 reg.op = OREGISTER; 214 gins(ACALL, ®, &r1); 215 break; 216 217 case 3: // normal call of c function pointer 218 gins(ACALL, N, f); 219 break; 220 221 case 1: // call in new proc (go) 222 case 2: // deferred call (defer) 223 nodreg(®, types[TINT32], D_CX); 224 gins(APUSHL, f, N); 225 nodconst(&con, types[TINT32], argsize(f->type)); 226 gins(APUSHL, &con, N); 227 if(proc == 1) 228 ginscall(newproc, 0); 229 else 230 ginscall(deferproc, 0); 231 gins(APOPL, N, ®); 232 gins(APOPL, N, ®); 233 if(proc == 2) { 234 nodreg(®, types[TINT64], D_AX); 235 gins(ATESTL, ®, ®); 236 patch(gbranch(AJNE, T, -1), retpc); 237 } 238 break; 239 } 240 241 if(arg != -1) 242 gargsize(-1); 243 } 244 245 /* 246 * n is call to interface method. 247 * generate res = n. 248 */ 249 void 250 cgen_callinter(Node *n, Node *res, int proc) 251 { 252 Node *i, *f; 253 Node tmpi, nodi, nodo, nodr, nodsp; 254 255 i = n->left; 256 if(i->op != ODOTINTER) 257 fatal("cgen_callinter: not ODOTINTER %O", i->op); 258 259 f = i->right; // field 260 if(f->op != ONAME) 261 fatal("cgen_callinter: not ONAME %O", f->op); 262 263 i = i->left; // interface 264 265 if(!i->addable) { 266 tempname(&tmpi, i->type); 267 cgen(i, &tmpi); 268 i = &tmpi; 269 } 270 271 genlist(n->list); // assign the args 272 273 // i is now addable, prepare an indirected 274 // register to hold its address. 275 igen(i, &nodi, res); // REG = &inter 276 277 nodindreg(&nodsp, types[tptr], D_SP); 278 nodi.type = types[tptr]; 279 nodi.xoffset += widthptr; 280 cgen(&nodi, &nodsp); // 0(SP) = 4(REG) -- i.data 281 282 regalloc(&nodo, types[tptr], res); 283 nodi.type = types[tptr]; 284 nodi.xoffset -= widthptr; 285 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 286 regfree(&nodi); 287 288 regalloc(&nodr, types[tptr], &nodo); 289 if(n->left->xoffset == BADWIDTH) 290 fatal("cgen_callinter: badwidth"); 291 cgen_checknil(&nodo); 292 nodo.op = OINDREG; 293 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 294 295 if(proc == 0) { 296 // plain call: use direct c function pointer - more efficient 297 cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f] 298 proc = 3; 299 } else { 300 // go/defer. generate go func value. 301 gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f] 302 } 303 304 nodr.type = n->left->type; 305 ginscall(&nodr, proc); 306 307 regfree(&nodr); 308 regfree(&nodo); 309 } 310 311 /* 312 * generate function call; 313 * proc=0 normal call 314 * proc=1 goroutine run in new proc 315 * proc=2 defer call save away stack 316 */ 317 void 318 cgen_call(Node *n, int proc) 319 { 320 Type *t; 321 Node nod, afun; 322 323 if(n == N) 324 return; 325 326 if(n->left->ullman >= UINF) { 327 // if name involves a fn call 328 // precompute the address of the fn 329 tempname(&afun, types[tptr]); 330 cgen(n->left, &afun); 331 } 332 333 genlist(n->list); // assign the args 334 t = n->left->type; 335 336 // call tempname pointer 337 if(n->left->ullman >= UINF) { 338 regalloc(&nod, types[tptr], N); 339 cgen_as(&nod, &afun); 340 nod.type = t; 341 ginscall(&nod, proc); 342 regfree(&nod); 343 return; 344 } 345 346 // call pointer 347 if(n->left->op != ONAME || n->left->class != PFUNC) { 348 regalloc(&nod, types[tptr], N); 349 cgen_as(&nod, n->left); 350 nod.type = t; 351 ginscall(&nod, proc); 352 regfree(&nod); 353 return; 354 } 355 356 // call direct 357 n->left->method = 1; 358 ginscall(n->left, proc); 359 } 360 361 /* 362 * call to n has already been generated. 363 * generate: 364 * res = return value from call. 365 */ 366 void 367 cgen_callret(Node *n, Node *res) 368 { 369 Node nod; 370 Type *fp, *t; 371 Iter flist; 372 373 t = n->left->type; 374 if(t->etype == TPTR32 || t->etype == TPTR64) 375 t = t->type; 376 377 fp = structfirst(&flist, getoutarg(t)); 378 if(fp == T) 379 fatal("cgen_callret: nil"); 380 381 memset(&nod, 0, sizeof(nod)); 382 nod.op = OINDREG; 383 nod.val.u.reg = D_SP; 384 nod.addable = 1; 385 386 nod.xoffset = fp->width; 387 nod.type = fp->type; 388 cgen_as(res, &nod); 389 } 390 391 /* 392 * call to n has already been generated. 393 * generate: 394 * res = &return value from call. 395 */ 396 void 397 cgen_aret(Node *n, Node *res) 398 { 399 Node nod1, nod2; 400 Type *fp, *t; 401 Iter flist; 402 403 t = n->left->type; 404 if(isptr[t->etype]) 405 t = t->type; 406 407 fp = structfirst(&flist, getoutarg(t)); 408 if(fp == T) 409 fatal("cgen_aret: nil"); 410 411 memset(&nod1, 0, sizeof(nod1)); 412 nod1.op = OINDREG; 413 nod1.val.u.reg = D_SP; 414 nod1.addable = 1; 415 416 nod1.xoffset = fp->width; 417 nod1.type = fp->type; 418 419 if(res->op != OREGISTER) { 420 regalloc(&nod2, types[tptr], res); 421 gins(ALEAL, &nod1, &nod2); 422 gins(AMOVL, &nod2, res); 423 regfree(&nod2); 424 } else 425 gins(ALEAL, &nod1, res); 426 } 427 428 /* 429 * generate return. 430 * n->left is assignments to return values. 431 */ 432 void 433 cgen_ret(Node *n) 434 { 435 Prog *p; 436 437 genlist(n->list); // copy out args 438 if(retpc) { 439 gjmp(retpc); 440 return; 441 } 442 p = gins(ARET, N, N); 443 if(n->op == ORETJMP) { 444 p->to.type = D_EXTERN; 445 p->to.sym = n->left->sym; 446 } 447 } 448 449 /* 450 * generate += *= etc. 451 */ 452 void 453 cgen_asop(Node *n) 454 { 455 Node n1, n2, n3, n4; 456 Node *nl, *nr; 457 Prog *p1; 458 Addr addr; 459 int a; 460 461 nl = n->left; 462 nr = n->right; 463 464 if(nr->ullman >= UINF && nl->ullman >= UINF) { 465 tempname(&n1, nr->type); 466 cgen(nr, &n1); 467 n2 = *n; 468 n2.right = &n1; 469 cgen_asop(&n2); 470 goto ret; 471 } 472 473 if(!isint[nl->type->etype]) 474 goto hard; 475 if(!isint[nr->type->etype]) 476 goto hard; 477 if(is64(nl->type) || is64(nr->type)) 478 goto hard; 479 480 switch(n->etype) { 481 case OADD: 482 if(smallintconst(nr)) 483 if(mpgetfix(nr->val.u.xval) == 1) { 484 a = optoas(OINC, nl->type); 485 if(nl->addable) { 486 gins(a, N, nl); 487 goto ret; 488 } 489 if(sudoaddable(a, nl, &addr)) { 490 p1 = gins(a, N, N); 491 p1->to = addr; 492 sudoclean(); 493 goto ret; 494 } 495 } 496 break; 497 498 case OSUB: 499 if(smallintconst(nr)) 500 if(mpgetfix(nr->val.u.xval) == 1) { 501 a = optoas(ODEC, nl->type); 502 if(nl->addable) { 503 gins(a, N, nl); 504 goto ret; 505 } 506 if(sudoaddable(a, nl, &addr)) { 507 p1 = gins(a, N, N); 508 p1->to = addr; 509 sudoclean(); 510 goto ret; 511 } 512 } 513 break; 514 } 515 516 switch(n->etype) { 517 case OADD: 518 case OSUB: 519 case OXOR: 520 case OAND: 521 case OOR: 522 a = optoas(n->etype, nl->type); 523 if(nl->addable) { 524 if(smallintconst(nr)) { 525 gins(a, nr, nl); 526 goto ret; 527 } 528 regalloc(&n2, nr->type, N); 529 cgen(nr, &n2); 530 gins(a, &n2, nl); 531 regfree(&n2); 532 goto ret; 533 } 534 if(nr->ullman < UINF) 535 if(sudoaddable(a, nl, &addr)) { 536 if(smallintconst(nr)) { 537 p1 = gins(a, nr, N); 538 p1->to = addr; 539 sudoclean(); 540 goto ret; 541 } 542 regalloc(&n2, nr->type, N); 543 cgen(nr, &n2); 544 p1 = gins(a, &n2, N); 545 p1->to = addr; 546 regfree(&n2); 547 sudoclean(); 548 goto ret; 549 } 550 } 551 552 hard: 553 n2.op = 0; 554 n1.op = 0; 555 if(nr->ullman >= nl->ullman || nl->addable) { 556 mgen(nr, &n2, N); 557 nr = &n2; 558 } else { 559 tempname(&n2, nr->type); 560 cgen(nr, &n2); 561 nr = &n2; 562 } 563 if(!nl->addable) { 564 igen(nl, &n1, N); 565 nl = &n1; 566 } 567 568 n3 = *n; 569 n3.left = nl; 570 n3.right = nr; 571 n3.op = n->etype; 572 573 mgen(&n3, &n4, N); 574 gmove(&n4, nl); 575 576 if(n1.op) 577 regfree(&n1); 578 mfree(&n2); 579 mfree(&n4); 580 581 ret: 582 ; 583 } 584 585 int 586 samereg(Node *a, Node *b) 587 { 588 if(a->op != OREGISTER) 589 return 0; 590 if(b->op != OREGISTER) 591 return 0; 592 if(a->val.u.reg != b->val.u.reg) 593 return 0; 594 return 1; 595 } 596 597 /* 598 * generate division. 599 * caller must set: 600 * ax = allocated AX register 601 * dx = allocated DX register 602 * generates one of: 603 * res = nl / nr 604 * res = nl % nr 605 * according to op. 606 */ 607 void 608 dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) 609 { 610 int check; 611 Node n1, t1, t2, t3, t4, n4, nz; 612 Type *t, *t0; 613 Prog *p1, *p2; 614 615 // Have to be careful about handling 616 // most negative int divided by -1 correctly. 617 // The hardware will trap. 618 // Also the byte divide instruction needs AH, 619 // which we otherwise don't have to deal with. 620 // Easiest way to avoid for int8, int16: use int32. 621 // For int32 and int64, use explicit test. 622 // Could use int64 hw for int32. 623 t = nl->type; 624 t0 = t; 625 check = 0; 626 if(issigned[t->etype]) { 627 check = 1; 628 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) 629 check = 0; 630 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 631 check = 0; 632 } 633 if(t->width < 4) { 634 if(issigned[t->etype]) 635 t = types[TINT32]; 636 else 637 t = types[TUINT32]; 638 check = 0; 639 } 640 641 tempname(&t1, t); 642 tempname(&t2, t); 643 if(t0 != t) { 644 tempname(&t3, t0); 645 tempname(&t4, t0); 646 cgen(nl, &t3); 647 cgen(nr, &t4); 648 // Convert. 649 gmove(&t3, &t1); 650 gmove(&t4, &t2); 651 } else { 652 cgen(nl, &t1); 653 cgen(nr, &t2); 654 } 655 656 if(!samereg(ax, res) && !samereg(dx, res)) 657 regalloc(&n1, t, res); 658 else 659 regalloc(&n1, t, N); 660 gmove(&t2, &n1); 661 gmove(&t1, ax); 662 p2 = P; 663 if(check) { 664 nodconst(&n4, t, -1); 665 gins(optoas(OCMP, t), &n1, &n4); 666 p1 = gbranch(optoas(ONE, t), T, +1); 667 if(op == ODIV) { 668 // a / (-1) is -a. 669 gins(optoas(OMINUS, t), N, ax); 670 gmove(ax, res); 671 } else { 672 // a % (-1) is 0. 673 nodconst(&n4, t, 0); 674 gmove(&n4, res); 675 } 676 p2 = gbranch(AJMP, T, 0); 677 patch(p1, pc); 678 } 679 if(!issigned[t->etype]) { 680 nodconst(&nz, t, 0); 681 gmove(&nz, dx); 682 } else 683 gins(optoas(OEXTEND, t), N, N); 684 gins(optoas(op, t), &n1, N); 685 regfree(&n1); 686 687 if(op == ODIV) 688 gmove(ax, res); 689 else 690 gmove(dx, res); 691 if(check) 692 patch(p2, pc); 693 } 694 695 static void 696 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 697 { 698 int r; 699 700 r = reg[dr]; 701 nodreg(x, types[TINT32], dr); 702 703 // save current ax and dx if they are live 704 // and not the destination 705 memset(oldx, 0, sizeof *oldx); 706 if(r > 0 && !samereg(x, res)) { 707 tempname(oldx, types[TINT32]); 708 gmove(x, oldx); 709 } 710 711 regalloc(x, t, x); 712 } 713 714 static void 715 restx(Node *x, Node *oldx) 716 { 717 regfree(x); 718 719 if(oldx->op != 0) { 720 x->type = types[TINT32]; 721 gmove(oldx, x); 722 } 723 } 724 725 /* 726 * generate division according to op, one of: 727 * res = nl / nr 728 * res = nl % nr 729 */ 730 void 731 cgen_div(int op, Node *nl, Node *nr, Node *res) 732 { 733 Node ax, dx, oldax, olddx; 734 Type *t; 735 736 if(is64(nl->type)) 737 fatal("cgen_div %T", nl->type); 738 739 if(issigned[nl->type->etype]) 740 t = types[TINT32]; 741 else 742 t = types[TUINT32]; 743 savex(D_AX, &ax, &oldax, res, t); 744 savex(D_DX, &dx, &olddx, res, t); 745 dodiv(op, nl, nr, res, &ax, &dx); 746 restx(&dx, &olddx); 747 restx(&ax, &oldax); 748 } 749 750 /* 751 * generate shift according to op, one of: 752 * res = nl << nr 753 * res = nl >> nr 754 */ 755 void 756 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 757 { 758 Node n1, n2, nt, cx, oldcx, hi, lo; 759 int a, w; 760 Prog *p1, *p2; 761 uvlong sc; 762 763 if(nl->type->width > 4) 764 fatal("cgen_shift %T", nl->type); 765 766 w = nl->type->width * 8; 767 768 a = optoas(op, nl->type); 769 770 if(nr->op == OLITERAL) { 771 tempname(&n2, nl->type); 772 cgen(nl, &n2); 773 regalloc(&n1, nl->type, res); 774 gmove(&n2, &n1); 775 sc = mpgetfix(nr->val.u.xval); 776 if(sc >= nl->type->width*8) { 777 // large shift gets 2 shifts by width-1 778 gins(a, ncon(w-1), &n1); 779 gins(a, ncon(w-1), &n1); 780 } else 781 gins(a, nr, &n1); 782 gmove(&n1, res); 783 regfree(&n1); 784 return; 785 } 786 787 memset(&oldcx, 0, sizeof oldcx); 788 nodreg(&cx, types[TUINT32], D_CX); 789 if(reg[D_CX] > 1 && !samereg(&cx, res)) { 790 tempname(&oldcx, types[TUINT32]); 791 gmove(&cx, &oldcx); 792 } 793 794 if(nr->type->width > 4) { 795 tempname(&nt, nr->type); 796 n1 = nt; 797 } else { 798 nodreg(&n1, types[TUINT32], D_CX); 799 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 800 } 801 802 if(samereg(&cx, res)) 803 regalloc(&n2, nl->type, N); 804 else 805 regalloc(&n2, nl->type, res); 806 if(nl->ullman >= nr->ullman) { 807 cgen(nl, &n2); 808 cgen(nr, &n1); 809 } else { 810 cgen(nr, &n1); 811 cgen(nl, &n2); 812 } 813 814 // test and fix up large shifts 815 if(bounded) { 816 if(nr->type->width > 4) { 817 // delayed reg alloc 818 nodreg(&n1, types[TUINT32], D_CX); 819 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 820 split64(&nt, &lo, &hi); 821 gmove(&lo, &n1); 822 splitclean(); 823 } 824 } else { 825 if(nr->type->width > 4) { 826 // delayed reg alloc 827 nodreg(&n1, types[TUINT32], D_CX); 828 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 829 split64(&nt, &lo, &hi); 830 gmove(&lo, &n1); 831 gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); 832 p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1); 833 gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); 834 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 835 splitclean(); 836 patch(p2, pc); 837 } else { 838 gins(optoas(OCMP, nr->type), &n1, ncon(w)); 839 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 840 } 841 if(op == ORSH && issigned[nl->type->etype]) { 842 gins(a, ncon(w-1), &n2); 843 } else { 844 gmove(ncon(0), &n2); 845 } 846 patch(p1, pc); 847 } 848 gins(a, &n1, &n2); 849 850 if(oldcx.op != 0) 851 gmove(&oldcx, &cx); 852 853 gmove(&n2, res); 854 855 regfree(&n1); 856 regfree(&n2); 857 } 858 859 /* 860 * generate byte multiply: 861 * res = nl * nr 862 * there is no 2-operand byte multiply instruction so 863 * we do a full-width multiplication and truncate afterwards. 864 */ 865 void 866 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 867 { 868 Node n1, n2, nt, *tmp; 869 Type *t; 870 int a; 871 872 // copy from byte to full registers 873 t = types[TUINT32]; 874 if(issigned[nl->type->etype]) 875 t = types[TINT32]; 876 877 // largest ullman on left. 878 if(nl->ullman < nr->ullman) { 879 tmp = nl; 880 nl = nr; 881 nr = tmp; 882 } 883 884 tempname(&nt, nl->type); 885 cgen(nl, &nt); 886 regalloc(&n1, t, res); 887 cgen(nr, &n1); 888 regalloc(&n2, t, N); 889 gmove(&nt, &n2); 890 a = optoas(op, t); 891 gins(a, &n2, &n1); 892 regfree(&n2); 893 gmove(&n1, res); 894 regfree(&n1); 895 } 896 897 /* 898 * generate high multiply: 899 * res = (nl*nr) >> width 900 */ 901 void 902 cgen_hmul(Node *nl, Node *nr, Node *res) 903 { 904 Type *t; 905 int a; 906 Node n1, n2, ax, dx; 907 908 t = nl->type; 909 a = optoas(OHMUL, t); 910 // gen nl in n1. 911 tempname(&n1, t); 912 cgen(nl, &n1); 913 // gen nr in n2. 914 regalloc(&n2, t, res); 915 cgen(nr, &n2); 916 917 // multiply. 918 nodreg(&ax, t, D_AX); 919 gmove(&n2, &ax); 920 gins(a, &n1, N); 921 regfree(&n2); 922 923 if(t->width == 1) { 924 // byte multiply behaves differently. 925 nodreg(&ax, t, D_AH); 926 nodreg(&dx, t, D_DL); 927 gmove(&ax, &dx); 928 } 929 nodreg(&dx, t, D_DX); 930 gmove(&dx, res); 931 } 932 933 static void cgen_float387(Node *n, Node *res); 934 static void cgen_floatsse(Node *n, Node *res); 935 936 /* 937 * generate floating-point operation. 938 */ 939 void 940 cgen_float(Node *n, Node *res) 941 { 942 Node *nl; 943 Node n1, n2; 944 Prog *p1, *p2, *p3; 945 946 nl = n->left; 947 switch(n->op) { 948 case OEQ: 949 case ONE: 950 case OLT: 951 case OLE: 952 case OGE: 953 p1 = gbranch(AJMP, T, 0); 954 p2 = pc; 955 gmove(nodbool(1), res); 956 p3 = gbranch(AJMP, T, 0); 957 patch(p1, pc); 958 bgen(n, 1, 0, p2); 959 gmove(nodbool(0), res); 960 patch(p3, pc); 961 return; 962 963 case OPLUS: 964 cgen(nl, res); 965 return; 966 967 case OCONV: 968 if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { 969 cgen(nl, res); 970 return; 971 } 972 973 tempname(&n2, n->type); 974 mgen(nl, &n1, res); 975 gmove(&n1, &n2); 976 gmove(&n2, res); 977 mfree(&n1); 978 return; 979 } 980 981 if(use_sse) 982 cgen_floatsse(n, res); 983 else 984 cgen_float387(n, res); 985 } 986 987 // floating-point. 387 (not SSE2) 988 static void 989 cgen_float387(Node *n, Node *res) 990 { 991 Node f0, f1; 992 Node *nl, *nr; 993 994 nl = n->left; 995 nr = n->right; 996 nodreg(&f0, nl->type, D_F0); 997 nodreg(&f1, n->type, D_F0+1); 998 if(nr != N) 999 goto flt2; 1000 1001 // unary 1002 cgen(nl, &f0); 1003 if(n->op != OCONV && n->op != OPLUS) 1004 gins(foptoas(n->op, n->type, 0), N, N); 1005 gmove(&f0, res); 1006 return; 1007 1008 flt2: // binary 1009 if(nl->ullman >= nr->ullman) { 1010 cgen(nl, &f0); 1011 if(nr->addable) 1012 gins(foptoas(n->op, n->type, 0), nr, &f0); 1013 else { 1014 cgen(nr, &f0); 1015 gins(foptoas(n->op, n->type, Fpop), &f0, &f1); 1016 } 1017 } else { 1018 cgen(nr, &f0); 1019 if(nl->addable) 1020 gins(foptoas(n->op, n->type, Frev), nl, &f0); 1021 else { 1022 cgen(nl, &f0); 1023 gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); 1024 } 1025 } 1026 gmove(&f0, res); 1027 return; 1028 1029 } 1030 1031 static void 1032 cgen_floatsse(Node *n, Node *res) 1033 { 1034 Node *nl, *nr, *r; 1035 Node n1, n2, nt; 1036 int a; 1037 1038 nl = n->left; 1039 nr = n->right; 1040 switch(n->op) { 1041 default: 1042 dump("cgen_floatsse", n); 1043 fatal("cgen_floatsse %O", n->op); 1044 return; 1045 1046 case OMINUS: 1047 case OCOM: 1048 nr = nodintconst(-1); 1049 convlit(&nr, n->type); 1050 a = foptoas(OMUL, nl->type, 0); 1051 goto sbop; 1052 1053 // symmetric binary 1054 case OADD: 1055 case OMUL: 1056 a = foptoas(n->op, nl->type, 0); 1057 goto sbop; 1058 1059 // asymmetric binary 1060 case OSUB: 1061 case OMOD: 1062 case ODIV: 1063 a = foptoas(n->op, nl->type, 0); 1064 goto abop; 1065 } 1066 1067 sbop: // symmetric binary 1068 if(nl->ullman < nr->ullman || nl->op == OLITERAL) { 1069 r = nl; 1070 nl = nr; 1071 nr = r; 1072 } 1073 1074 abop: // asymmetric binary 1075 if(nl->ullman >= nr->ullman) { 1076 tempname(&nt, nl->type); 1077 cgen(nl, &nt); 1078 mgen(nr, &n2, N); 1079 regalloc(&n1, nl->type, res); 1080 gmove(&nt, &n1); 1081 gins(a, &n2, &n1); 1082 gmove(&n1, res); 1083 regfree(&n1); 1084 mfree(&n2); 1085 } else { 1086 regalloc(&n2, nr->type, res); 1087 cgen(nr, &n2); 1088 regalloc(&n1, nl->type, N); 1089 cgen(nl, &n1); 1090 gins(a, &n2, &n1); 1091 regfree(&n2); 1092 gmove(&n1, res); 1093 regfree(&n1); 1094 } 1095 return; 1096 } 1097 1098 void 1099 bgen_float(Node *n, int true, int likely, Prog *to) 1100 { 1101 int et, a; 1102 Node *nl, *nr, *r; 1103 Node n1, n2, n3, tmp, t1, t2, ax; 1104 Prog *p1, *p2; 1105 1106 nl = n->left; 1107 nr = n->right; 1108 a = n->op; 1109 if(!true) { 1110 // brcom is not valid on floats when NaN is involved. 1111 p1 = gbranch(AJMP, T, 0); 1112 p2 = gbranch(AJMP, T, 0); 1113 patch(p1, pc); 1114 // No need to avoid re-genning ninit. 1115 bgen_float(n, 1, -likely, p2); 1116 patch(gbranch(AJMP, T, 0), to); 1117 patch(p2, pc); 1118 return; 1119 } 1120 1121 if(use_sse) 1122 goto sse; 1123 else 1124 goto x87; 1125 1126 x87: 1127 a = brrev(a); // because the args are stacked 1128 if(a == OGE || a == OGT) { 1129 // only < and <= work right with NaN; reverse if needed 1130 r = nr; 1131 nr = nl; 1132 nl = r; 1133 a = brrev(a); 1134 } 1135 1136 nodreg(&tmp, nr->type, D_F0); 1137 nodreg(&n2, nr->type, D_F0 + 1); 1138 nodreg(&ax, types[TUINT16], D_AX); 1139 et = simsimtype(nr->type); 1140 if(et == TFLOAT64) { 1141 if(nl->ullman > nr->ullman) { 1142 cgen(nl, &tmp); 1143 cgen(nr, &tmp); 1144 gins(AFXCHD, &tmp, &n2); 1145 } else { 1146 cgen(nr, &tmp); 1147 cgen(nl, &tmp); 1148 } 1149 gins(AFUCOMIP, &tmp, &n2); 1150 gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF 1151 } else { 1152 // TODO(rsc): The moves back and forth to memory 1153 // here are for truncating the value to 32 bits. 1154 // This handles 32-bit comparison but presumably 1155 // all the other ops have the same problem. 1156 // We need to figure out what the right general 1157 // solution is, besides telling people to use float64. 1158 tempname(&t1, types[TFLOAT32]); 1159 tempname(&t2, types[TFLOAT32]); 1160 cgen(nr, &t1); 1161 cgen(nl, &t2); 1162 gmove(&t2, &tmp); 1163 gins(AFCOMFP, &t1, &tmp); 1164 gins(AFSTSW, N, &ax); 1165 gins(ASAHF, N, N); 1166 } 1167 1168 goto ret; 1169 1170 sse: 1171 if(!nl->addable) { 1172 tempname(&n1, nl->type); 1173 cgen(nl, &n1); 1174 nl = &n1; 1175 } 1176 if(!nr->addable) { 1177 tempname(&tmp, nr->type); 1178 cgen(nr, &tmp); 1179 nr = &tmp; 1180 } 1181 regalloc(&n2, nr->type, N); 1182 gmove(nr, &n2); 1183 nr = &n2; 1184 1185 if(nl->op != OREGISTER) { 1186 regalloc(&n3, nl->type, N); 1187 gmove(nl, &n3); 1188 nl = &n3; 1189 } 1190 1191 if(a == OGE || a == OGT) { 1192 // only < and <= work right with NaN; reverse if needed 1193 r = nr; 1194 nr = nl; 1195 nl = r; 1196 a = brrev(a); 1197 } 1198 1199 gins(foptoas(OCMP, nr->type, 0), nl, nr); 1200 if(nl->op == OREGISTER) 1201 regfree(nl); 1202 regfree(nr); 1203 1204 ret: 1205 if(a == OEQ) { 1206 // neither NE nor P 1207 p1 = gbranch(AJNE, T, -likely); 1208 p2 = gbranch(AJPS, T, -likely); 1209 patch(gbranch(AJMP, T, 0), to); 1210 patch(p1, pc); 1211 patch(p2, pc); 1212 } else if(a == ONE) { 1213 // either NE or P 1214 patch(gbranch(AJNE, T, likely), to); 1215 patch(gbranch(AJPS, T, likely), to); 1216 } else 1217 patch(gbranch(optoas(a, nr->type), T, likely), to); 1218 1219 } 1220 1221 // Called after regopt and peep have run. 1222 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1223 void 1224 expandchecks(Prog *firstp) 1225 { 1226 Prog *p, *p1, *p2; 1227 1228 for(p = firstp; p != P; p = p->link) { 1229 if(p->as != ACHECKNIL) 1230 continue; 1231 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1232 warnl(p->lineno, "nil check %D", &p->from); 1233 // check is 1234 // CMP arg, $0 1235 // JNE 2(PC) (likely) 1236 // MOV AX, 0 1237 p1 = mal(sizeof *p1); 1238 p2 = mal(sizeof *p2); 1239 clearp(p1); 1240 clearp(p2); 1241 p1->link = p2; 1242 p2->link = p->link; 1243 p->link = p1; 1244 p1->lineno = p->lineno; 1245 p2->lineno = p->lineno; 1246 p1->loc = 9999; 1247 p2->loc = 9999; 1248 p->as = ACMPL; 1249 p->to.type = D_CONST; 1250 p->to.offset = 0; 1251 p1->as = AJNE; 1252 p1->from.type = D_CONST; 1253 p1->from.offset = 1; // likely 1254 p1->to.type = D_BRANCH; 1255 p1->to.u.branch = p2->link; 1256 // crash by write to memory address 0. 1257 // if possible, since we know arg is 0, use 0(arg), 1258 // which will be shorter to encode than plain 0. 1259 p2->as = AMOVL; 1260 p2->from.type = D_AX; 1261 if(regtyp(&p->from)) 1262 p2->to.type = p->from.type + D_INDIR; 1263 else 1264 p2->to.type = D_INDIR+D_NONE; 1265 p2->to.offset = 0; 1266 } 1267 }