github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/8g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 void 13 defframe(Prog *ptxt) 14 { 15 // fill in argument size 16 ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); 17 18 // fill in final stack size 19 if(stksize > maxstksize) 20 maxstksize = stksize; 21 ptxt->to.offset = rnd(maxstksize+maxarg, widthptr); 22 maxstksize = 0; 23 } 24 25 // Sweep the prog list to mark any used nodes. 26 void 27 markautoused(Prog* p) 28 { 29 for (; p; p = p->link) { 30 if (p->as == ATYPE) 31 continue; 32 33 if (p->from.type == D_AUTO && p->from.node) 34 p->from.node->used = 1; 35 36 if (p->to.type == D_AUTO && p->to.node) 37 p->to.node->used = 1; 38 } 39 } 40 41 // Fixup instructions after compactframe has moved all autos around. 42 void 43 fixautoused(Prog* p) 44 { 45 Prog **lp; 46 47 for (lp=&p; (p=*lp) != P; ) { 48 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 49 *lp = p->link; 50 continue; 51 } 52 53 if (p->from.type == D_AUTO && p->from.node) 54 p->from.offset += p->from.node->stkdelta; 55 56 if (p->to.type == D_AUTO && p->to.node) 57 p->to.offset += p->to.node->stkdelta; 58 59 lp = &p->link; 60 } 61 } 62 63 void 64 clearfat(Node *nl) 65 { 66 uint32 w, c, q; 67 Node n1; 68 69 /* clear a fat object */ 70 if(debug['g']) 71 dump("\nclearfat", nl); 72 73 w = nl->type->width; 74 // Avoid taking the address for simple enough types. 75 if(componentgen(N, nl)) 76 return; 77 78 c = w % 4; // bytes 79 q = w / 4; // quads 80 81 gconreg(AMOVL, 0, D_AX); 82 nodreg(&n1, types[tptr], D_DI); 83 agen(nl, &n1); 84 85 if(q >= 4) { 86 gconreg(AMOVL, q, D_CX); 87 gins(AREP, N, N); // repeat 88 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 89 } else 90 while(q > 0) { 91 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 92 q--; 93 } 94 95 if(c >= 4) { 96 gconreg(AMOVL, c, D_CX); 97 gins(AREP, N, N); // repeat 98 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 99 } else 100 while(c > 0) { 101 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 102 c--; 103 } 104 } 105 106 /* 107 * generate: 108 * call f 109 * proc=-1 normal call but no return 110 * proc=0 normal call 111 * proc=1 goroutine run in new proc 112 * proc=2 defer call save away stack 113 * proc=3 normal call to C pointer (not Go func value) 114 */ 115 void 116 ginscall(Node *f, int proc) 117 { 118 Prog *p; 119 Node reg, r1, con; 120 121 switch(proc) { 122 default: 123 fatal("ginscall: bad proc %d", proc); 124 break; 125 126 case 0: // normal call 127 case -1: // normal call but no return 128 if(f->op == ONAME && f->class == PFUNC) { 129 p = gins(ACALL, N, f); 130 afunclit(&p->to, f); 131 if(proc == -1 || noreturn(p)) 132 gins(AUNDEF, N, N); 133 break; 134 } 135 nodreg(®, types[tptr], D_DX); 136 nodreg(&r1, types[tptr], D_BX); 137 gmove(f, ®); 138 reg.op = OINDREG; 139 gmove(®, &r1); 140 reg.op = OREGISTER; 141 gins(ACALL, ®, &r1); 142 break; 143 144 case 3: // normal call of c function pointer 145 gins(ACALL, N, f); 146 break; 147 148 case 1: // call in new proc (go) 149 case 2: // deferred call (defer) 150 nodreg(®, types[TINT32], D_CX); 151 gins(APUSHL, f, N); 152 nodconst(&con, types[TINT32], argsize(f->type)); 153 gins(APUSHL, &con, N); 154 if(proc == 1) 155 ginscall(newproc, 0); 156 else 157 ginscall(deferproc, 0); 158 gins(APOPL, N, ®); 159 gins(APOPL, N, ®); 160 if(proc == 2) { 161 nodreg(®, types[TINT64], D_AX); 162 gins(ATESTL, ®, ®); 163 patch(gbranch(AJNE, T, -1), retpc); 164 } 165 break; 166 } 167 } 168 169 /* 170 * n is call to interface method. 171 * generate res = n. 172 */ 173 void 174 cgen_callinter(Node *n, Node *res, int proc) 175 { 176 Node *i, *f; 177 Node tmpi, nodi, nodo, nodr, nodsp; 178 179 i = n->left; 180 if(i->op != ODOTINTER) 181 fatal("cgen_callinter: not ODOTINTER %O", i->op); 182 183 f = i->right; // field 184 if(f->op != ONAME) 185 fatal("cgen_callinter: not ONAME %O", f->op); 186 187 i = i->left; // interface 188 189 if(!i->addable) { 190 tempname(&tmpi, i->type); 191 cgen(i, &tmpi); 192 i = &tmpi; 193 } 194 195 genlist(n->list); // assign the args 196 197 // i is now addable, prepare an indirected 198 // register to hold its address. 199 igen(i, &nodi, res); // REG = &inter 200 201 nodindreg(&nodsp, types[tptr], D_SP); 202 nodi.type = types[tptr]; 203 nodi.xoffset += widthptr; 204 cgen(&nodi, &nodsp); // 0(SP) = 4(REG) -- i.data 205 206 regalloc(&nodo, types[tptr], res); 207 nodi.type = types[tptr]; 208 nodi.xoffset -= widthptr; 209 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 210 regfree(&nodi); 211 212 regalloc(&nodr, types[tptr], &nodo); 213 if(n->left->xoffset == BADWIDTH) 214 fatal("cgen_callinter: badwidth"); 215 nodo.op = OINDREG; 216 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 217 218 if(proc == 0) { 219 // plain call: use direct c function pointer - more efficient 220 cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f] 221 proc = 3; 222 } else { 223 // go/defer. generate go func value. 224 gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f] 225 } 226 227 // BOTCH nodr.type = fntype; 228 nodr.type = n->left->type; 229 ginscall(&nodr, proc); 230 231 regfree(&nodr); 232 regfree(&nodo); 233 234 setmaxarg(n->left->type); 235 } 236 237 /* 238 * generate function call; 239 * proc=0 normal call 240 * proc=1 goroutine run in new proc 241 * proc=2 defer call save away stack 242 */ 243 void 244 cgen_call(Node *n, int proc) 245 { 246 Type *t; 247 Node nod, afun; 248 249 if(n == N) 250 return; 251 252 if(n->left->ullman >= UINF) { 253 // if name involves a fn call 254 // precompute the address of the fn 255 tempname(&afun, types[tptr]); 256 cgen(n->left, &afun); 257 } 258 259 genlist(n->list); // assign the args 260 t = n->left->type; 261 262 setmaxarg(t); 263 264 // call tempname pointer 265 if(n->left->ullman >= UINF) { 266 regalloc(&nod, types[tptr], N); 267 cgen_as(&nod, &afun); 268 nod.type = t; 269 ginscall(&nod, proc); 270 regfree(&nod); 271 return; 272 } 273 274 // call pointer 275 if(n->left->op != ONAME || n->left->class != PFUNC) { 276 regalloc(&nod, types[tptr], N); 277 cgen_as(&nod, n->left); 278 nod.type = t; 279 ginscall(&nod, proc); 280 regfree(&nod); 281 return; 282 } 283 284 // call direct 285 n->left->method = 1; 286 ginscall(n->left, proc); 287 } 288 289 /* 290 * call to n has already been generated. 291 * generate: 292 * res = return value from call. 293 */ 294 void 295 cgen_callret(Node *n, Node *res) 296 { 297 Node nod; 298 Type *fp, *t; 299 Iter flist; 300 301 t = n->left->type; 302 if(t->etype == TPTR32 || t->etype == TPTR64) 303 t = t->type; 304 305 fp = structfirst(&flist, getoutarg(t)); 306 if(fp == T) 307 fatal("cgen_callret: nil"); 308 309 memset(&nod, 0, sizeof(nod)); 310 nod.op = OINDREG; 311 nod.val.u.reg = D_SP; 312 nod.addable = 1; 313 314 nod.xoffset = fp->width; 315 nod.type = fp->type; 316 cgen_as(res, &nod); 317 } 318 319 /* 320 * call to n has already been generated. 321 * generate: 322 * res = &return value from call. 323 */ 324 void 325 cgen_aret(Node *n, Node *res) 326 { 327 Node nod1, nod2; 328 Type *fp, *t; 329 Iter flist; 330 331 t = n->left->type; 332 if(isptr[t->etype]) 333 t = t->type; 334 335 fp = structfirst(&flist, getoutarg(t)); 336 if(fp == T) 337 fatal("cgen_aret: nil"); 338 339 memset(&nod1, 0, sizeof(nod1)); 340 nod1.op = OINDREG; 341 nod1.val.u.reg = D_SP; 342 nod1.addable = 1; 343 344 nod1.xoffset = fp->width; 345 nod1.type = fp->type; 346 347 if(res->op != OREGISTER) { 348 regalloc(&nod2, types[tptr], res); 349 gins(ALEAL, &nod1, &nod2); 350 gins(AMOVL, &nod2, res); 351 regfree(&nod2); 352 } else 353 gins(ALEAL, &nod1, res); 354 } 355 356 /* 357 * generate return. 358 * n->left is assignments to return values. 359 */ 360 void 361 cgen_ret(Node *n) 362 { 363 genlist(n->list); // copy out args 364 if(retpc) 365 gjmp(retpc); 366 else 367 gins(ARET, N, N); 368 } 369 370 /* 371 * generate += *= etc. 372 */ 373 void 374 cgen_asop(Node *n) 375 { 376 Node n1, n2, n3, n4; 377 Node *nl, *nr; 378 Prog *p1; 379 Addr addr; 380 int a; 381 382 nl = n->left; 383 nr = n->right; 384 385 if(nr->ullman >= UINF && nl->ullman >= UINF) { 386 tempname(&n1, nr->type); 387 cgen(nr, &n1); 388 n2 = *n; 389 n2.right = &n1; 390 cgen_asop(&n2); 391 goto ret; 392 } 393 394 if(!isint[nl->type->etype]) 395 goto hard; 396 if(!isint[nr->type->etype]) 397 goto hard; 398 if(is64(nl->type) || is64(nr->type)) 399 goto hard; 400 401 switch(n->etype) { 402 case OADD: 403 if(smallintconst(nr)) 404 if(mpgetfix(nr->val.u.xval) == 1) { 405 a = optoas(OINC, nl->type); 406 if(nl->addable) { 407 gins(a, N, nl); 408 goto ret; 409 } 410 if(sudoaddable(a, nl, &addr)) { 411 p1 = gins(a, N, N); 412 p1->to = addr; 413 sudoclean(); 414 goto ret; 415 } 416 } 417 break; 418 419 case OSUB: 420 if(smallintconst(nr)) 421 if(mpgetfix(nr->val.u.xval) == 1) { 422 a = optoas(ODEC, nl->type); 423 if(nl->addable) { 424 gins(a, N, nl); 425 goto ret; 426 } 427 if(sudoaddable(a, nl, &addr)) { 428 p1 = gins(a, N, N); 429 p1->to = addr; 430 sudoclean(); 431 goto ret; 432 } 433 } 434 break; 435 } 436 437 switch(n->etype) { 438 case OADD: 439 case OSUB: 440 case OXOR: 441 case OAND: 442 case OOR: 443 a = optoas(n->etype, nl->type); 444 if(nl->addable) { 445 if(smallintconst(nr)) { 446 gins(a, nr, nl); 447 goto ret; 448 } 449 regalloc(&n2, nr->type, N); 450 cgen(nr, &n2); 451 gins(a, &n2, nl); 452 regfree(&n2); 453 goto ret; 454 } 455 if(nr->ullman < UINF) 456 if(sudoaddable(a, nl, &addr)) { 457 if(smallintconst(nr)) { 458 p1 = gins(a, nr, N); 459 p1->to = addr; 460 sudoclean(); 461 goto ret; 462 } 463 regalloc(&n2, nr->type, N); 464 cgen(nr, &n2); 465 p1 = gins(a, &n2, N); 466 p1->to = addr; 467 regfree(&n2); 468 sudoclean(); 469 goto ret; 470 } 471 } 472 473 hard: 474 n2.op = 0; 475 n1.op = 0; 476 if(nr->ullman >= nl->ullman || nl->addable) { 477 mgen(nr, &n2, N); 478 nr = &n2; 479 } else { 480 tempname(&n2, nr->type); 481 cgen(nr, &n2); 482 nr = &n2; 483 } 484 if(!nl->addable) { 485 igen(nl, &n1, N); 486 nl = &n1; 487 } 488 489 n3 = *n; 490 n3.left = nl; 491 n3.right = nr; 492 n3.op = n->etype; 493 494 mgen(&n3, &n4, N); 495 gmove(&n4, nl); 496 497 if(n1.op) 498 regfree(&n1); 499 mfree(&n2); 500 mfree(&n4); 501 502 ret: 503 ; 504 } 505 506 int 507 samereg(Node *a, Node *b) 508 { 509 if(a->op != OREGISTER) 510 return 0; 511 if(b->op != OREGISTER) 512 return 0; 513 if(a->val.u.reg != b->val.u.reg) 514 return 0; 515 return 1; 516 } 517 518 /* 519 * generate division. 520 * caller must set: 521 * ax = allocated AX register 522 * dx = allocated DX register 523 * generates one of: 524 * res = nl / nr 525 * res = nl % nr 526 * according to op. 527 */ 528 void 529 dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) 530 { 531 int check; 532 Node n1, t1, t2, t3, t4, n4, nz; 533 Type *t, *t0; 534 Prog *p1, *p2; 535 536 // Have to be careful about handling 537 // most negative int divided by -1 correctly. 538 // The hardware will trap. 539 // Also the byte divide instruction needs AH, 540 // which we otherwise don't have to deal with. 541 // Easiest way to avoid for int8, int16: use int32. 542 // For int32 and int64, use explicit test. 543 // Could use int64 hw for int32. 544 t = nl->type; 545 t0 = t; 546 check = 0; 547 if(issigned[t->etype]) { 548 check = 1; 549 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) 550 check = 0; 551 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 552 check = 0; 553 } 554 if(t->width < 4) { 555 if(issigned[t->etype]) 556 t = types[TINT32]; 557 else 558 t = types[TUINT32]; 559 check = 0; 560 } 561 562 tempname(&t1, t); 563 tempname(&t2, t); 564 if(t0 != t) { 565 tempname(&t3, t0); 566 tempname(&t4, t0); 567 cgen(nl, &t3); 568 cgen(nr, &t4); 569 // Convert. 570 gmove(&t3, &t1); 571 gmove(&t4, &t2); 572 } else { 573 cgen(nl, &t1); 574 cgen(nr, &t2); 575 } 576 577 if(!samereg(ax, res) && !samereg(dx, res)) 578 regalloc(&n1, t, res); 579 else 580 regalloc(&n1, t, N); 581 gmove(&t2, &n1); 582 gmove(&t1, ax); 583 p2 = P; 584 if(check) { 585 nodconst(&n4, t, -1); 586 gins(optoas(OCMP, t), &n1, &n4); 587 p1 = gbranch(optoas(ONE, t), T, +1); 588 if(op == ODIV) { 589 // a / (-1) is -a. 590 gins(optoas(OMINUS, t), N, ax); 591 gmove(ax, res); 592 } else { 593 // a % (-1) is 0. 594 nodconst(&n4, t, 0); 595 gmove(&n4, res); 596 } 597 p2 = gbranch(AJMP, T, 0); 598 patch(p1, pc); 599 } 600 if(!issigned[t->etype]) { 601 nodconst(&nz, t, 0); 602 gmove(&nz, dx); 603 } else 604 gins(optoas(OEXTEND, t), N, N); 605 gins(optoas(op, t), &n1, N); 606 regfree(&n1); 607 608 if(op == ODIV) 609 gmove(ax, res); 610 else 611 gmove(dx, res); 612 if(check) 613 patch(p2, pc); 614 } 615 616 static void 617 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 618 { 619 int r; 620 621 r = reg[dr]; 622 nodreg(x, types[TINT32], dr); 623 624 // save current ax and dx if they are live 625 // and not the destination 626 memset(oldx, 0, sizeof *oldx); 627 if(r > 0 && !samereg(x, res)) { 628 tempname(oldx, types[TINT32]); 629 gmove(x, oldx); 630 } 631 632 regalloc(x, t, x); 633 } 634 635 static void 636 restx(Node *x, Node *oldx) 637 { 638 regfree(x); 639 640 if(oldx->op != 0) { 641 x->type = types[TINT32]; 642 gmove(oldx, x); 643 } 644 } 645 646 /* 647 * generate division according to op, one of: 648 * res = nl / nr 649 * res = nl % nr 650 */ 651 void 652 cgen_div(int op, Node *nl, Node *nr, Node *res) 653 { 654 Node ax, dx, oldax, olddx; 655 Type *t; 656 657 if(is64(nl->type)) 658 fatal("cgen_div %T", nl->type); 659 660 if(issigned[nl->type->etype]) 661 t = types[TINT32]; 662 else 663 t = types[TUINT32]; 664 savex(D_AX, &ax, &oldax, res, t); 665 savex(D_DX, &dx, &olddx, res, t); 666 dodiv(op, nl, nr, res, &ax, &dx); 667 restx(&dx, &olddx); 668 restx(&ax, &oldax); 669 } 670 671 /* 672 * generate shift according to op, one of: 673 * res = nl << nr 674 * res = nl >> nr 675 */ 676 void 677 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 678 { 679 Node n1, n2, nt, cx, oldcx, hi, lo; 680 int a, w; 681 Prog *p1, *p2; 682 uvlong sc; 683 684 if(nl->type->width > 4) 685 fatal("cgen_shift %T", nl->type); 686 687 w = nl->type->width * 8; 688 689 a = optoas(op, nl->type); 690 691 if(nr->op == OLITERAL) { 692 tempname(&n2, nl->type); 693 cgen(nl, &n2); 694 regalloc(&n1, nl->type, res); 695 gmove(&n2, &n1); 696 sc = mpgetfix(nr->val.u.xval); 697 if(sc >= nl->type->width*8) { 698 // large shift gets 2 shifts by width-1 699 gins(a, ncon(w-1), &n1); 700 gins(a, ncon(w-1), &n1); 701 } else 702 gins(a, nr, &n1); 703 gmove(&n1, res); 704 regfree(&n1); 705 return; 706 } 707 708 memset(&oldcx, 0, sizeof oldcx); 709 nodreg(&cx, types[TUINT32], D_CX); 710 if(reg[D_CX] > 1 && !samereg(&cx, res)) { 711 tempname(&oldcx, types[TUINT32]); 712 gmove(&cx, &oldcx); 713 } 714 715 if(nr->type->width > 4) { 716 tempname(&nt, nr->type); 717 n1 = nt; 718 } else { 719 nodreg(&n1, types[TUINT32], D_CX); 720 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 721 } 722 723 if(samereg(&cx, res)) 724 regalloc(&n2, nl->type, N); 725 else 726 regalloc(&n2, nl->type, res); 727 if(nl->ullman >= nr->ullman) { 728 cgen(nl, &n2); 729 cgen(nr, &n1); 730 } else { 731 cgen(nr, &n1); 732 cgen(nl, &n2); 733 } 734 735 // test and fix up large shifts 736 if(bounded) { 737 if(nr->type->width > 4) { 738 // delayed reg alloc 739 nodreg(&n1, types[TUINT32], D_CX); 740 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 741 split64(&nt, &lo, &hi); 742 gmove(&lo, &n1); 743 splitclean(); 744 } 745 } else { 746 if(nr->type->width > 4) { 747 // delayed reg alloc 748 nodreg(&n1, types[TUINT32], D_CX); 749 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 750 split64(&nt, &lo, &hi); 751 gmove(&lo, &n1); 752 gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); 753 p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1); 754 gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); 755 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 756 splitclean(); 757 patch(p2, pc); 758 } else { 759 gins(optoas(OCMP, nr->type), &n1, ncon(w)); 760 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 761 } 762 if(op == ORSH && issigned[nl->type->etype]) { 763 gins(a, ncon(w-1), &n2); 764 } else { 765 gmove(ncon(0), &n2); 766 } 767 patch(p1, pc); 768 } 769 gins(a, &n1, &n2); 770 771 if(oldcx.op != 0) 772 gmove(&oldcx, &cx); 773 774 gmove(&n2, res); 775 776 regfree(&n1); 777 regfree(&n2); 778 } 779 780 /* 781 * generate byte multiply: 782 * res = nl * nr 783 * there is no 2-operand byte multiply instruction so 784 * we do a full-width multiplication and truncate afterwards. 785 */ 786 void 787 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 788 { 789 Node n1, n2, nt, *tmp; 790 Type *t; 791 int a; 792 793 // copy from byte to full registers 794 t = types[TUINT32]; 795 if(issigned[nl->type->etype]) 796 t = types[TINT32]; 797 798 // largest ullman on left. 799 if(nl->ullman < nr->ullman) { 800 tmp = nl; 801 nl = nr; 802 nr = tmp; 803 } 804 805 tempname(&nt, nl->type); 806 cgen(nl, &nt); 807 regalloc(&n1, t, res); 808 cgen(nr, &n1); 809 regalloc(&n2, t, N); 810 gmove(&nt, &n2); 811 a = optoas(op, t); 812 gins(a, &n2, &n1); 813 regfree(&n2); 814 gmove(&n1, res); 815 regfree(&n1); 816 } 817 818 /* 819 * generate high multiply: 820 * res = (nl*nr) >> width 821 */ 822 void 823 cgen_hmul(Node *nl, Node *nr, Node *res) 824 { 825 Type *t; 826 int a; 827 Node n1, n2, ax, dx; 828 829 t = nl->type; 830 a = optoas(OHMUL, t); 831 // gen nl in n1. 832 tempname(&n1, t); 833 cgen(nl, &n1); 834 // gen nr in n2. 835 regalloc(&n2, t, res); 836 cgen(nr, &n2); 837 838 // multiply. 839 nodreg(&ax, t, D_AX); 840 gmove(&n2, &ax); 841 gins(a, &n1, N); 842 regfree(&n2); 843 844 if(t->width == 1) { 845 // byte multiply behaves differently. 846 nodreg(&ax, t, D_AH); 847 nodreg(&dx, t, D_DL); 848 gmove(&ax, &dx); 849 } 850 nodreg(&dx, t, D_DX); 851 gmove(&dx, res); 852 } 853 854 static void cgen_float387(Node *n, Node *res); 855 static void cgen_floatsse(Node *n, Node *res); 856 857 /* 858 * generate floating-point operation. 859 */ 860 void 861 cgen_float(Node *n, Node *res) 862 { 863 Node *nl; 864 Node n1, n2; 865 Prog *p1, *p2, *p3; 866 867 nl = n->left; 868 switch(n->op) { 869 case OEQ: 870 case ONE: 871 case OLT: 872 case OLE: 873 case OGE: 874 p1 = gbranch(AJMP, T, 0); 875 p2 = pc; 876 gmove(nodbool(1), res); 877 p3 = gbranch(AJMP, T, 0); 878 patch(p1, pc); 879 bgen(n, 1, 0, p2); 880 gmove(nodbool(0), res); 881 patch(p3, pc); 882 return; 883 884 case OPLUS: 885 cgen(nl, res); 886 return; 887 888 case OCONV: 889 if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { 890 cgen(nl, res); 891 return; 892 } 893 894 tempname(&n2, n->type); 895 mgen(nl, &n1, res); 896 gmove(&n1, &n2); 897 gmove(&n2, res); 898 mfree(&n1); 899 return; 900 } 901 902 if(use_sse) 903 cgen_floatsse(n, res); 904 else 905 cgen_float387(n, res); 906 } 907 908 // floating-point. 387 (not SSE2) 909 static void 910 cgen_float387(Node *n, Node *res) 911 { 912 Node f0, f1; 913 Node *nl, *nr; 914 915 nl = n->left; 916 nr = n->right; 917 nodreg(&f0, nl->type, D_F0); 918 nodreg(&f1, n->type, D_F0+1); 919 if(nr != N) 920 goto flt2; 921 922 // unary 923 cgen(nl, &f0); 924 if(n->op != OCONV && n->op != OPLUS) 925 gins(foptoas(n->op, n->type, 0), N, N); 926 gmove(&f0, res); 927 return; 928 929 flt2: // binary 930 if(nl->ullman >= nr->ullman) { 931 cgen(nl, &f0); 932 if(nr->addable) 933 gins(foptoas(n->op, n->type, 0), nr, &f0); 934 else { 935 cgen(nr, &f0); 936 gins(foptoas(n->op, n->type, Fpop), &f0, &f1); 937 } 938 } else { 939 cgen(nr, &f0); 940 if(nl->addable) 941 gins(foptoas(n->op, n->type, Frev), nl, &f0); 942 else { 943 cgen(nl, &f0); 944 gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); 945 } 946 } 947 gmove(&f0, res); 948 return; 949 950 } 951 952 static void 953 cgen_floatsse(Node *n, Node *res) 954 { 955 Node *nl, *nr, *r; 956 Node n1, n2, nt; 957 int a; 958 959 nl = n->left; 960 nr = n->right; 961 switch(n->op) { 962 default: 963 dump("cgen_floatsse", n); 964 fatal("cgen_floatsse %O", n->op); 965 return; 966 967 case OMINUS: 968 case OCOM: 969 nr = nodintconst(-1); 970 convlit(&nr, n->type); 971 a = foptoas(OMUL, nl->type, 0); 972 goto sbop; 973 974 // symmetric binary 975 case OADD: 976 case OMUL: 977 a = foptoas(n->op, nl->type, 0); 978 goto sbop; 979 980 // asymmetric binary 981 case OSUB: 982 case OMOD: 983 case ODIV: 984 a = foptoas(n->op, nl->type, 0); 985 goto abop; 986 } 987 988 sbop: // symmetric binary 989 if(nl->ullman < nr->ullman || nl->op == OLITERAL) { 990 r = nl; 991 nl = nr; 992 nr = r; 993 } 994 995 abop: // asymmetric binary 996 if(nl->ullman >= nr->ullman) { 997 tempname(&nt, nl->type); 998 cgen(nl, &nt); 999 mgen(nr, &n2, N); 1000 regalloc(&n1, nl->type, res); 1001 gmove(&nt, &n1); 1002 gins(a, &n2, &n1); 1003 gmove(&n1, res); 1004 regfree(&n1); 1005 mfree(&n2); 1006 } else { 1007 regalloc(&n2, nr->type, res); 1008 cgen(nr, &n2); 1009 regalloc(&n1, nl->type, N); 1010 cgen(nl, &n1); 1011 gins(a, &n2, &n1); 1012 regfree(&n2); 1013 gmove(&n1, res); 1014 regfree(&n1); 1015 } 1016 return; 1017 } 1018 1019 void 1020 bgen_float(Node *n, int true, int likely, Prog *to) 1021 { 1022 int et, a; 1023 Node *nl, *nr, *r; 1024 Node n1, n2, n3, tmp, t1, t2, ax; 1025 Prog *p1, *p2; 1026 1027 nl = n->left; 1028 nr = n->right; 1029 a = n->op; 1030 if(!true) { 1031 // brcom is not valid on floats when NaN is involved. 1032 p1 = gbranch(AJMP, T, 0); 1033 p2 = gbranch(AJMP, T, 0); 1034 patch(p1, pc); 1035 // No need to avoid re-genning ninit. 1036 bgen_float(n, 1, -likely, p2); 1037 patch(gbranch(AJMP, T, 0), to); 1038 patch(p2, pc); 1039 return; 1040 } 1041 1042 if(use_sse) 1043 goto sse; 1044 else 1045 goto x87; 1046 1047 x87: 1048 a = brrev(a); // because the args are stacked 1049 if(a == OGE || a == OGT) { 1050 // only < and <= work right with NaN; reverse if needed 1051 r = nr; 1052 nr = nl; 1053 nl = r; 1054 a = brrev(a); 1055 } 1056 1057 nodreg(&tmp, nr->type, D_F0); 1058 nodreg(&n2, nr->type, D_F0 + 1); 1059 nodreg(&ax, types[TUINT16], D_AX); 1060 et = simsimtype(nr->type); 1061 if(et == TFLOAT64) { 1062 if(nl->ullman > nr->ullman) { 1063 cgen(nl, &tmp); 1064 cgen(nr, &tmp); 1065 gins(AFXCHD, &tmp, &n2); 1066 } else { 1067 cgen(nr, &tmp); 1068 cgen(nl, &tmp); 1069 } 1070 gins(AFUCOMIP, &tmp, &n2); 1071 gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF 1072 } else { 1073 // TODO(rsc): The moves back and forth to memory 1074 // here are for truncating the value to 32 bits. 1075 // This handles 32-bit comparison but presumably 1076 // all the other ops have the same problem. 1077 // We need to figure out what the right general 1078 // solution is, besides telling people to use float64. 1079 tempname(&t1, types[TFLOAT32]); 1080 tempname(&t2, types[TFLOAT32]); 1081 cgen(nr, &t1); 1082 cgen(nl, &t2); 1083 gmove(&t2, &tmp); 1084 gins(AFCOMFP, &t1, &tmp); 1085 gins(AFSTSW, N, &ax); 1086 gins(ASAHF, N, N); 1087 } 1088 1089 goto ret; 1090 1091 sse: 1092 if(!nl->addable) { 1093 tempname(&n1, nl->type); 1094 cgen(nl, &n1); 1095 nl = &n1; 1096 } 1097 if(!nr->addable) { 1098 tempname(&tmp, nr->type); 1099 cgen(nr, &tmp); 1100 nr = &tmp; 1101 } 1102 regalloc(&n2, nr->type, N); 1103 gmove(nr, &n2); 1104 nr = &n2; 1105 1106 if(nl->op != OREGISTER) { 1107 regalloc(&n3, nl->type, N); 1108 gmove(nl, &n3); 1109 nl = &n3; 1110 } 1111 1112 if(a == OGE || a == OGT) { 1113 // only < and <= work right with NaN; reverse if needed 1114 r = nr; 1115 nr = nl; 1116 nl = r; 1117 a = brrev(a); 1118 } 1119 1120 gins(foptoas(OCMP, nr->type, 0), nl, nr); 1121 if(nl->op == OREGISTER) 1122 regfree(nl); 1123 regfree(nr); 1124 1125 ret: 1126 if(a == OEQ) { 1127 // neither NE nor P 1128 p1 = gbranch(AJNE, T, -likely); 1129 p2 = gbranch(AJPS, T, -likely); 1130 patch(gbranch(AJMP, T, 0), to); 1131 patch(p1, pc); 1132 patch(p2, pc); 1133 } else if(a == ONE) { 1134 // either NE or P 1135 patch(gbranch(AJNE, T, likely), to); 1136 patch(gbranch(AJPS, T, likely), to); 1137 } else 1138 patch(gbranch(optoas(a, nr->type), T, likely), to); 1139 1140 }