github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/cmd/8g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog *appendpp(Prog*, int, int, vlong, int, vlong); 13 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); 14 15 void 16 defframe(Prog *ptxt) 17 { 18 uint32 frame, ax; 19 Prog *p; 20 vlong lo, hi; 21 NodeList *l; 22 Node *n; 23 24 // fill in argument size 25 ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); 26 27 // fill in final stack size 28 frame = rnd(stksize+maxarg, widthptr); 29 ptxt->to.offset = frame; 30 31 // insert code to zero ambiguously live variables 32 // so that the garbage collector only sees initialized values 33 // when it looks for pointers. 34 p = ptxt; 35 hi = 0; 36 lo = hi; 37 ax = 0; 38 for(l=curfn->dcl; l != nil; l = l->next) { 39 n = l->n; 40 if(!n->needzero) 41 continue; 42 if(n->class != PAUTO) 43 fatal("needzero class %d", n->class); 44 if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) 45 fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); 46 if(lo != hi && n->xoffset + n->type->width == lo - 2*widthptr) { 47 // merge with range we already have 48 lo = n->xoffset; 49 continue; 50 } 51 // zero old range 52 p = zerorange(p, frame, lo, hi, &ax); 53 54 // set new range 55 hi = n->xoffset + n->type->width; 56 lo = n->xoffset; 57 } 58 // zero final range 59 zerorange(p, frame, lo, hi, &ax); 60 } 61 62 static Prog* 63 zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax) 64 { 65 vlong cnt, i; 66 67 cnt = hi - lo; 68 if(cnt == 0) 69 return p; 70 if(*ax == 0) { 71 p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0); 72 *ax = 1; 73 } 74 if(cnt <= 4*widthreg) { 75 for(i = 0; i < cnt; i += widthreg) { 76 p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo+i); 77 } 78 } else if(!nacl && cnt <= 128*widthreg) { 79 p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0); 80 p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 1*(128-cnt/widthreg)); 81 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 82 } else { 83 p = appendpp(p, AMOVL, D_CONST, cnt/widthreg, D_CX, 0); 84 p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0); 85 p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0); 86 p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0); 87 } 88 return p; 89 } 90 91 static Prog* 92 appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) 93 { 94 Prog *q; 95 q = mal(sizeof(*q)); 96 clearp(q); 97 q->as = as; 98 q->lineno = p->lineno; 99 q->from.type = ftype; 100 q->from.offset = foffset; 101 q->to.type = ttype; 102 q->to.offset = toffset; 103 q->link = p->link; 104 p->link = q; 105 return q; 106 } 107 108 // Sweep the prog list to mark any used nodes. 109 void 110 markautoused(Prog* p) 111 { 112 for (; p; p = p->link) { 113 if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL) 114 continue; 115 116 if (p->from.node) 117 p->from.node->used = 1; 118 119 if (p->to.node) 120 p->to.node->used = 1; 121 } 122 } 123 124 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 125 void 126 fixautoused(Prog* p) 127 { 128 Prog **lp; 129 130 for (lp=&p; (p=*lp) != P; ) { 131 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 132 *lp = p->link; 133 continue; 134 } 135 if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) { 136 // Cannot remove VARDEF instruction, because - unlike TYPE handled above - 137 // VARDEFs are interspersed with other code, and a jump might be using the 138 // VARDEF as a target. Replace with a no-op instead. A later pass will remove 139 // the no-ops. 140 p->to.type = D_NONE; 141 p->to.node = N; 142 p->as = ANOP; 143 continue; 144 } 145 146 if (p->from.type == D_AUTO && p->from.node) 147 p->from.offset += p->from.node->stkdelta; 148 149 if (p->to.type == D_AUTO && p->to.node) 150 p->to.offset += p->to.node->stkdelta; 151 152 lp = &p->link; 153 } 154 } 155 156 void 157 clearfat(Node *nl) 158 { 159 uint32 w, c, q; 160 Node n1, z; 161 Prog *p; 162 163 /* clear a fat object */ 164 if(debug['g']) 165 dump("\nclearfat", nl); 166 167 w = nl->type->width; 168 // Avoid taking the address for simple enough types. 169 if(componentgen(N, nl)) 170 return; 171 172 c = w % 4; // bytes 173 q = w / 4; // quads 174 175 if(q < 4) { 176 // Write sequence of MOV 0, off(base) instead of using STOSL. 177 // The hope is that although the code will be slightly longer, 178 // the MOVs will have no dependencies and pipeline better 179 // than the unrolled STOSL loop. 180 // NOTE: Must use agen, not igen, so that optimizer sees address 181 // being taken. We are not writing on field boundaries. 182 regalloc(&n1, types[tptr], N); 183 agen(nl, &n1); 184 n1.op = OINDREG; 185 nodconst(&z, types[TUINT64], 0); 186 while(q-- > 0) { 187 n1.type = z.type; 188 gins(AMOVL, &z, &n1); 189 n1.xoffset += 4; 190 } 191 nodconst(&z, types[TUINT8], 0); 192 while(c-- > 0) { 193 n1.type = z.type; 194 gins(AMOVB, &z, &n1); 195 n1.xoffset++; 196 } 197 regfree(&n1); 198 return; 199 } 200 201 nodreg(&n1, types[tptr], D_DI); 202 agen(nl, &n1); 203 gconreg(AMOVL, 0, D_AX); 204 205 if(q > 128 || (q >= 4 && nacl)) { 206 gconreg(AMOVL, q, D_CX); 207 gins(AREP, N, N); // repeat 208 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 209 } else if(q >= 4) { 210 p = gins(ADUFFZERO, N, N); 211 p->to.type = D_ADDR; 212 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 213 // 1 and 128 = magic constants: see ../../runtime/asm_386.s 214 p->to.offset = 1*(128-q); 215 } else 216 while(q > 0) { 217 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 218 q--; 219 } 220 221 while(c > 0) { 222 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 223 c--; 224 } 225 } 226 227 /* 228 * generate: 229 * call f 230 * proc=-1 normal call but no return 231 * proc=0 normal call 232 * proc=1 goroutine run in new proc 233 * proc=2 defer call save away stack 234 * proc=3 normal call to C pointer (not Go func value) 235 */ 236 void 237 ginscall(Node *f, int proc) 238 { 239 Prog *p; 240 Node reg, r1, con, stk; 241 int32 extra; 242 243 if(f->type != T) { 244 extra = 0; 245 if(proc == 1 || proc == 2) 246 extra = 2 * widthptr; 247 setmaxarg(f->type, extra); 248 } 249 250 switch(proc) { 251 default: 252 fatal("ginscall: bad proc %d", proc); 253 break; 254 255 case 0: // normal call 256 case -1: // normal call but no return 257 if(f->op == ONAME && f->class == PFUNC) { 258 if(f == deferreturn) { 259 // Deferred calls will appear to be returning to 260 // the CALL deferreturn(SB) that we are about to emit. 261 // However, the stack trace code will show the line 262 // of the instruction byte before the return PC. 263 // To avoid that being an unrelated instruction, 264 // insert an x86 NOP that we will have the right line number. 265 // x86 NOP 0x90 is really XCHG AX, AX; use that description 266 // because the NOP pseudo-instruction will be removed by 267 // the linker. 268 nodreg(®, types[TINT], D_AX); 269 gins(AXCHGL, ®, ®); 270 } 271 p = gins(ACALL, N, f); 272 afunclit(&p->to, f); 273 if(proc == -1 || noreturn(p)) 274 gins(AUNDEF, N, N); 275 break; 276 } 277 nodreg(®, types[tptr], D_DX); 278 nodreg(&r1, types[tptr], D_BX); 279 gmove(f, ®); 280 reg.op = OINDREG; 281 gmove(®, &r1); 282 reg.op = OREGISTER; 283 gins(ACALL, ®, &r1); 284 break; 285 286 case 3: // normal call of c function pointer 287 gins(ACALL, N, f); 288 break; 289 290 case 1: // call in new proc (go) 291 case 2: // deferred call (defer) 292 memset(&stk, 0, sizeof(stk)); 293 stk.op = OINDREG; 294 stk.val.u.reg = D_SP; 295 stk.xoffset = 0; 296 297 // size of arguments at 0(SP) 298 nodconst(&con, types[TINT32], argsize(f->type)); 299 gins(AMOVL, &con, &stk); 300 301 // FuncVal* at 4(SP) 302 stk.xoffset = widthptr; 303 gins(AMOVL, f, &stk); 304 305 if(proc == 1) 306 ginscall(newproc, 0); 307 else 308 ginscall(deferproc, 0); 309 if(proc == 2) { 310 nodreg(®, types[TINT32], D_AX); 311 gins(ATESTL, ®, ®); 312 p = gbranch(AJEQ, T, +1); 313 cgen_ret(N); 314 patch(p, pc); 315 } 316 break; 317 } 318 } 319 320 /* 321 * n is call to interface method. 322 * generate res = n. 323 */ 324 void 325 cgen_callinter(Node *n, Node *res, int proc) 326 { 327 Node *i, *f; 328 Node tmpi, nodi, nodo, nodr, nodsp; 329 330 i = n->left; 331 if(i->op != ODOTINTER) 332 fatal("cgen_callinter: not ODOTINTER %O", i->op); 333 334 f = i->right; // field 335 if(f->op != ONAME) 336 fatal("cgen_callinter: not ONAME %O", f->op); 337 338 i = i->left; // interface 339 340 if(!i->addable) { 341 tempname(&tmpi, i->type); 342 cgen(i, &tmpi); 343 i = &tmpi; 344 } 345 346 genlist(n->list); // assign the args 347 348 // i is now addable, prepare an indirected 349 // register to hold its address. 350 igen(i, &nodi, res); // REG = &inter 351 352 nodindreg(&nodsp, types[tptr], D_SP); 353 nodsp.xoffset = 0; 354 if(proc != 0) 355 nodsp.xoffset += 2 * widthptr; // leave room for size & fn 356 nodi.type = types[tptr]; 357 nodi.xoffset += widthptr; 358 cgen(&nodi, &nodsp); // {0 or 8}(SP) = 4(REG) -- i.data 359 360 regalloc(&nodo, types[tptr], res); 361 nodi.type = types[tptr]; 362 nodi.xoffset -= widthptr; 363 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 364 regfree(&nodi); 365 366 regalloc(&nodr, types[tptr], &nodo); 367 if(n->left->xoffset == BADWIDTH) 368 fatal("cgen_callinter: badwidth"); 369 cgen_checknil(&nodo); 370 nodo.op = OINDREG; 371 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 372 373 if(proc == 0) { 374 // plain call: use direct c function pointer - more efficient 375 cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f] 376 proc = 3; 377 } else { 378 // go/defer. generate go func value. 379 gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f] 380 } 381 382 nodr.type = n->left->type; 383 ginscall(&nodr, proc); 384 385 regfree(&nodr); 386 regfree(&nodo); 387 } 388 389 /* 390 * generate function call; 391 * proc=0 normal call 392 * proc=1 goroutine run in new proc 393 * proc=2 defer call save away stack 394 */ 395 void 396 cgen_call(Node *n, int proc) 397 { 398 Type *t; 399 Node nod, afun; 400 401 if(n == N) 402 return; 403 404 if(n->left->ullman >= UINF) { 405 // if name involves a fn call 406 // precompute the address of the fn 407 tempname(&afun, types[tptr]); 408 cgen(n->left, &afun); 409 } 410 411 genlist(n->list); // assign the args 412 t = n->left->type; 413 414 // call tempname pointer 415 if(n->left->ullman >= UINF) { 416 regalloc(&nod, types[tptr], N); 417 cgen_as(&nod, &afun); 418 nod.type = t; 419 ginscall(&nod, proc); 420 regfree(&nod); 421 return; 422 } 423 424 // call pointer 425 if(n->left->op != ONAME || n->left->class != PFUNC) { 426 regalloc(&nod, types[tptr], N); 427 cgen_as(&nod, n->left); 428 nod.type = t; 429 ginscall(&nod, proc); 430 regfree(&nod); 431 return; 432 } 433 434 // call direct 435 n->left->method = 1; 436 ginscall(n->left, proc); 437 } 438 439 /* 440 * call to n has already been generated. 441 * generate: 442 * res = return value from call. 443 */ 444 void 445 cgen_callret(Node *n, Node *res) 446 { 447 Node nod; 448 Type *fp, *t; 449 Iter flist; 450 451 t = n->left->type; 452 if(t->etype == TPTR32 || t->etype == TPTR64) 453 t = t->type; 454 455 fp = structfirst(&flist, getoutarg(t)); 456 if(fp == T) 457 fatal("cgen_callret: nil"); 458 459 memset(&nod, 0, sizeof(nod)); 460 nod.op = OINDREG; 461 nod.val.u.reg = D_SP; 462 nod.addable = 1; 463 464 nod.xoffset = fp->width; 465 nod.type = fp->type; 466 cgen_as(res, &nod); 467 } 468 469 /* 470 * call to n has already been generated. 471 * generate: 472 * res = &return value from call. 473 */ 474 void 475 cgen_aret(Node *n, Node *res) 476 { 477 Node nod1, nod2; 478 Type *fp, *t; 479 Iter flist; 480 481 t = n->left->type; 482 if(isptr[t->etype]) 483 t = t->type; 484 485 fp = structfirst(&flist, getoutarg(t)); 486 if(fp == T) 487 fatal("cgen_aret: nil"); 488 489 memset(&nod1, 0, sizeof(nod1)); 490 nod1.op = OINDREG; 491 nod1.val.u.reg = D_SP; 492 nod1.addable = 1; 493 494 nod1.xoffset = fp->width; 495 nod1.type = fp->type; 496 497 if(res->op != OREGISTER) { 498 regalloc(&nod2, types[tptr], res); 499 gins(ALEAL, &nod1, &nod2); 500 gins(AMOVL, &nod2, res); 501 regfree(&nod2); 502 } else 503 gins(ALEAL, &nod1, res); 504 } 505 506 /* 507 * generate return. 508 * n->left is assignments to return values. 509 */ 510 void 511 cgen_ret(Node *n) 512 { 513 Prog *p; 514 515 if(n != N) 516 genlist(n->list); // copy out args 517 if(hasdefer) 518 ginscall(deferreturn, 0); 519 genlist(curfn->exit); 520 p = gins(ARET, N, N); 521 if(n != N && n->op == ORETJMP) { 522 p->to.type = D_EXTERN; 523 p->to.sym = linksym(n->left->sym); 524 } 525 } 526 527 /* 528 * generate += *= etc. 529 */ 530 void 531 cgen_asop(Node *n) 532 { 533 Node n1, n2, n3, n4; 534 Node *nl, *nr; 535 Prog *p1; 536 Addr addr; 537 int a; 538 539 nl = n->left; 540 nr = n->right; 541 542 if(nr->ullman >= UINF && nl->ullman >= UINF) { 543 tempname(&n1, nr->type); 544 cgen(nr, &n1); 545 n2 = *n; 546 n2.right = &n1; 547 cgen_asop(&n2); 548 goto ret; 549 } 550 551 if(!isint[nl->type->etype]) 552 goto hard; 553 if(!isint[nr->type->etype]) 554 goto hard; 555 if(is64(nl->type) || is64(nr->type)) 556 goto hard; 557 558 switch(n->etype) { 559 case OADD: 560 if(smallintconst(nr)) 561 if(mpgetfix(nr->val.u.xval) == 1) { 562 a = optoas(OINC, nl->type); 563 if(nl->addable) { 564 gins(a, N, nl); 565 goto ret; 566 } 567 if(sudoaddable(a, nl, &addr)) { 568 p1 = gins(a, N, N); 569 p1->to = addr; 570 sudoclean(); 571 goto ret; 572 } 573 } 574 break; 575 576 case OSUB: 577 if(smallintconst(nr)) 578 if(mpgetfix(nr->val.u.xval) == 1) { 579 a = optoas(ODEC, nl->type); 580 if(nl->addable) { 581 gins(a, N, nl); 582 goto ret; 583 } 584 if(sudoaddable(a, nl, &addr)) { 585 p1 = gins(a, N, N); 586 p1->to = addr; 587 sudoclean(); 588 goto ret; 589 } 590 } 591 break; 592 } 593 594 switch(n->etype) { 595 case OADD: 596 case OSUB: 597 case OXOR: 598 case OAND: 599 case OOR: 600 a = optoas(n->etype, nl->type); 601 if(nl->addable) { 602 if(smallintconst(nr)) { 603 gins(a, nr, nl); 604 goto ret; 605 } 606 regalloc(&n2, nr->type, N); 607 cgen(nr, &n2); 608 gins(a, &n2, nl); 609 regfree(&n2); 610 goto ret; 611 } 612 if(nr->ullman < UINF) 613 if(sudoaddable(a, nl, &addr)) { 614 if(smallintconst(nr)) { 615 p1 = gins(a, nr, N); 616 p1->to = addr; 617 sudoclean(); 618 goto ret; 619 } 620 regalloc(&n2, nr->type, N); 621 cgen(nr, &n2); 622 p1 = gins(a, &n2, N); 623 p1->to = addr; 624 regfree(&n2); 625 sudoclean(); 626 goto ret; 627 } 628 } 629 630 hard: 631 n2.op = 0; 632 n1.op = 0; 633 if(nr->ullman >= nl->ullman || nl->addable) { 634 mgen(nr, &n2, N); 635 nr = &n2; 636 } else { 637 tempname(&n2, nr->type); 638 cgen(nr, &n2); 639 nr = &n2; 640 } 641 if(!nl->addable) { 642 igen(nl, &n1, N); 643 nl = &n1; 644 } 645 646 n3 = *n; 647 n3.left = nl; 648 n3.right = nr; 649 n3.op = n->etype; 650 651 mgen(&n3, &n4, N); 652 gmove(&n4, nl); 653 654 if(n1.op) 655 regfree(&n1); 656 mfree(&n2); 657 mfree(&n4); 658 659 ret: 660 ; 661 } 662 663 int 664 samereg(Node *a, Node *b) 665 { 666 if(a->op != OREGISTER) 667 return 0; 668 if(b->op != OREGISTER) 669 return 0; 670 if(a->val.u.reg != b->val.u.reg) 671 return 0; 672 return 1; 673 } 674 675 /* 676 * generate division. 677 * caller must set: 678 * ax = allocated AX register 679 * dx = allocated DX register 680 * generates one of: 681 * res = nl / nr 682 * res = nl % nr 683 * according to op. 684 */ 685 void 686 dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) 687 { 688 int check; 689 Node n1, t1, t2, t3, t4, n4, nz; 690 Type *t, *t0; 691 Prog *p1, *p2; 692 693 // Have to be careful about handling 694 // most negative int divided by -1 correctly. 695 // The hardware will trap. 696 // Also the byte divide instruction needs AH, 697 // which we otherwise don't have to deal with. 698 // Easiest way to avoid for int8, int16: use int32. 699 // For int32 and int64, use explicit test. 700 // Could use int64 hw for int32. 701 t = nl->type; 702 t0 = t; 703 check = 0; 704 if(issigned[t->etype]) { 705 check = 1; 706 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) 707 check = 0; 708 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 709 check = 0; 710 } 711 if(t->width < 4) { 712 if(issigned[t->etype]) 713 t = types[TINT32]; 714 else 715 t = types[TUINT32]; 716 check = 0; 717 } 718 719 tempname(&t1, t); 720 tempname(&t2, t); 721 if(t0 != t) { 722 tempname(&t3, t0); 723 tempname(&t4, t0); 724 cgen(nl, &t3); 725 cgen(nr, &t4); 726 // Convert. 727 gmove(&t3, &t1); 728 gmove(&t4, &t2); 729 } else { 730 cgen(nl, &t1); 731 cgen(nr, &t2); 732 } 733 734 if(!samereg(ax, res) && !samereg(dx, res)) 735 regalloc(&n1, t, res); 736 else 737 regalloc(&n1, t, N); 738 gmove(&t2, &n1); 739 gmove(&t1, ax); 740 p2 = P; 741 if(nacl) { 742 // Native Client does not relay the divide-by-zero trap 743 // to the executing program, so we must insert a check 744 // for ourselves. 745 nodconst(&n4, t, 0); 746 gins(optoas(OCMP, t), &n1, &n4); 747 p1 = gbranch(optoas(ONE, t), T, +1); 748 if(panicdiv == N) 749 panicdiv = sysfunc("panicdivide"); 750 ginscall(panicdiv, -1); 751 patch(p1, pc); 752 } 753 if(check) { 754 nodconst(&n4, t, -1); 755 gins(optoas(OCMP, t), &n1, &n4); 756 p1 = gbranch(optoas(ONE, t), T, +1); 757 if(op == ODIV) { 758 // a / (-1) is -a. 759 gins(optoas(OMINUS, t), N, ax); 760 gmove(ax, res); 761 } else { 762 // a % (-1) is 0. 763 nodconst(&n4, t, 0); 764 gmove(&n4, res); 765 } 766 p2 = gbranch(AJMP, T, 0); 767 patch(p1, pc); 768 } 769 if(!issigned[t->etype]) { 770 nodconst(&nz, t, 0); 771 gmove(&nz, dx); 772 } else 773 gins(optoas(OEXTEND, t), N, N); 774 gins(optoas(op, t), &n1, N); 775 regfree(&n1); 776 777 if(op == ODIV) 778 gmove(ax, res); 779 else 780 gmove(dx, res); 781 if(check) 782 patch(p2, pc); 783 } 784 785 static void 786 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 787 { 788 int r; 789 790 r = reg[dr]; 791 nodreg(x, types[TINT32], dr); 792 793 // save current ax and dx if they are live 794 // and not the destination 795 memset(oldx, 0, sizeof *oldx); 796 if(r > 0 && !samereg(x, res)) { 797 tempname(oldx, types[TINT32]); 798 gmove(x, oldx); 799 } 800 801 regalloc(x, t, x); 802 } 803 804 static void 805 restx(Node *x, Node *oldx) 806 { 807 regfree(x); 808 809 if(oldx->op != 0) { 810 x->type = types[TINT32]; 811 gmove(oldx, x); 812 } 813 } 814 815 /* 816 * generate division according to op, one of: 817 * res = nl / nr 818 * res = nl % nr 819 */ 820 void 821 cgen_div(int op, Node *nl, Node *nr, Node *res) 822 { 823 Node ax, dx, oldax, olddx; 824 Type *t; 825 826 if(is64(nl->type)) 827 fatal("cgen_div %T", nl->type); 828 829 if(issigned[nl->type->etype]) 830 t = types[TINT32]; 831 else 832 t = types[TUINT32]; 833 savex(D_AX, &ax, &oldax, res, t); 834 savex(D_DX, &dx, &olddx, res, t); 835 dodiv(op, nl, nr, res, &ax, &dx); 836 restx(&dx, &olddx); 837 restx(&ax, &oldax); 838 } 839 840 /* 841 * generate shift according to op, one of: 842 * res = nl << nr 843 * res = nl >> nr 844 */ 845 void 846 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 847 { 848 Node n1, n2, nt, cx, oldcx, hi, lo; 849 int a, w; 850 Prog *p1, *p2; 851 uvlong sc; 852 853 if(nl->type->width > 4) 854 fatal("cgen_shift %T", nl->type); 855 856 w = nl->type->width * 8; 857 858 a = optoas(op, nl->type); 859 860 if(nr->op == OLITERAL) { 861 tempname(&n2, nl->type); 862 cgen(nl, &n2); 863 regalloc(&n1, nl->type, res); 864 gmove(&n2, &n1); 865 sc = mpgetfix(nr->val.u.xval); 866 if(sc >= nl->type->width*8) { 867 // large shift gets 2 shifts by width-1 868 gins(a, ncon(w-1), &n1); 869 gins(a, ncon(w-1), &n1); 870 } else 871 gins(a, nr, &n1); 872 gmove(&n1, res); 873 regfree(&n1); 874 return; 875 } 876 877 memset(&oldcx, 0, sizeof oldcx); 878 nodreg(&cx, types[TUINT32], D_CX); 879 if(reg[D_CX] > 1 && !samereg(&cx, res)) { 880 tempname(&oldcx, types[TUINT32]); 881 gmove(&cx, &oldcx); 882 } 883 884 if(nr->type->width > 4) { 885 tempname(&nt, nr->type); 886 n1 = nt; 887 } else { 888 nodreg(&n1, types[TUINT32], D_CX); 889 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 890 } 891 892 if(samereg(&cx, res)) 893 regalloc(&n2, nl->type, N); 894 else 895 regalloc(&n2, nl->type, res); 896 if(nl->ullman >= nr->ullman) { 897 cgen(nl, &n2); 898 cgen(nr, &n1); 899 } else { 900 cgen(nr, &n1); 901 cgen(nl, &n2); 902 } 903 904 // test and fix up large shifts 905 if(bounded) { 906 if(nr->type->width > 4) { 907 // delayed reg alloc 908 nodreg(&n1, types[TUINT32], D_CX); 909 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 910 split64(&nt, &lo, &hi); 911 gmove(&lo, &n1); 912 splitclean(); 913 } 914 } else { 915 if(nr->type->width > 4) { 916 // delayed reg alloc 917 nodreg(&n1, types[TUINT32], D_CX); 918 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 919 split64(&nt, &lo, &hi); 920 gmove(&lo, &n1); 921 gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); 922 p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1); 923 gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); 924 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 925 splitclean(); 926 patch(p2, pc); 927 } else { 928 gins(optoas(OCMP, nr->type), &n1, ncon(w)); 929 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 930 } 931 if(op == ORSH && issigned[nl->type->etype]) { 932 gins(a, ncon(w-1), &n2); 933 } else { 934 gmove(ncon(0), &n2); 935 } 936 patch(p1, pc); 937 } 938 gins(a, &n1, &n2); 939 940 if(oldcx.op != 0) 941 gmove(&oldcx, &cx); 942 943 gmove(&n2, res); 944 945 regfree(&n1); 946 regfree(&n2); 947 } 948 949 /* 950 * generate byte multiply: 951 * res = nl * nr 952 * there is no 2-operand byte multiply instruction so 953 * we do a full-width multiplication and truncate afterwards. 954 */ 955 void 956 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 957 { 958 Node n1, n2, nt, *tmp; 959 Type *t; 960 int a; 961 962 // copy from byte to full registers 963 t = types[TUINT32]; 964 if(issigned[nl->type->etype]) 965 t = types[TINT32]; 966 967 // largest ullman on left. 968 if(nl->ullman < nr->ullman) { 969 tmp = nl; 970 nl = nr; 971 nr = tmp; 972 } 973 974 tempname(&nt, nl->type); 975 cgen(nl, &nt); 976 regalloc(&n1, t, res); 977 cgen(nr, &n1); 978 regalloc(&n2, t, N); 979 gmove(&nt, &n2); 980 a = optoas(op, t); 981 gins(a, &n2, &n1); 982 regfree(&n2); 983 gmove(&n1, res); 984 regfree(&n1); 985 } 986 987 /* 988 * generate high multiply: 989 * res = (nl*nr) >> width 990 */ 991 void 992 cgen_hmul(Node *nl, Node *nr, Node *res) 993 { 994 Type *t; 995 int a; 996 Node n1, n2, ax, dx; 997 998 t = nl->type; 999 a = optoas(OHMUL, t); 1000 // gen nl in n1. 1001 tempname(&n1, t); 1002 cgen(nl, &n1); 1003 // gen nr in n2. 1004 regalloc(&n2, t, res); 1005 cgen(nr, &n2); 1006 1007 // multiply. 1008 nodreg(&ax, t, D_AX); 1009 gmove(&n2, &ax); 1010 gins(a, &n1, N); 1011 regfree(&n2); 1012 1013 if(t->width == 1) { 1014 // byte multiply behaves differently. 1015 nodreg(&ax, t, D_AH); 1016 nodreg(&dx, t, D_DX); 1017 gmove(&ax, &dx); 1018 } 1019 nodreg(&dx, t, D_DX); 1020 gmove(&dx, res); 1021 } 1022 1023 static void cgen_float387(Node *n, Node *res); 1024 static void cgen_floatsse(Node *n, Node *res); 1025 1026 /* 1027 * generate floating-point operation. 1028 */ 1029 void 1030 cgen_float(Node *n, Node *res) 1031 { 1032 Node *nl; 1033 Node n1, n2; 1034 Prog *p1, *p2, *p3; 1035 1036 nl = n->left; 1037 switch(n->op) { 1038 case OEQ: 1039 case ONE: 1040 case OLT: 1041 case OLE: 1042 case OGE: 1043 p1 = gbranch(AJMP, T, 0); 1044 p2 = pc; 1045 gmove(nodbool(1), res); 1046 p3 = gbranch(AJMP, T, 0); 1047 patch(p1, pc); 1048 bgen(n, 1, 0, p2); 1049 gmove(nodbool(0), res); 1050 patch(p3, pc); 1051 return; 1052 1053 case OPLUS: 1054 cgen(nl, res); 1055 return; 1056 1057 case OCONV: 1058 if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { 1059 cgen(nl, res); 1060 return; 1061 } 1062 1063 tempname(&n2, n->type); 1064 mgen(nl, &n1, res); 1065 gmove(&n1, &n2); 1066 gmove(&n2, res); 1067 mfree(&n1); 1068 return; 1069 } 1070 1071 if(use_sse) 1072 cgen_floatsse(n, res); 1073 else 1074 cgen_float387(n, res); 1075 } 1076 1077 // floating-point. 387 (not SSE2) 1078 static void 1079 cgen_float387(Node *n, Node *res) 1080 { 1081 Node f0, f1; 1082 Node *nl, *nr; 1083 1084 nl = n->left; 1085 nr = n->right; 1086 nodreg(&f0, nl->type, D_F0); 1087 nodreg(&f1, n->type, D_F0+1); 1088 if(nr != N) 1089 goto flt2; 1090 1091 // unary 1092 cgen(nl, &f0); 1093 if(n->op != OCONV && n->op != OPLUS) 1094 gins(foptoas(n->op, n->type, 0), N, N); 1095 gmove(&f0, res); 1096 return; 1097 1098 flt2: // binary 1099 if(nl->ullman >= nr->ullman) { 1100 cgen(nl, &f0); 1101 if(nr->addable) 1102 gins(foptoas(n->op, n->type, 0), nr, &f0); 1103 else { 1104 cgen(nr, &f0); 1105 gins(foptoas(n->op, n->type, Fpop), &f0, &f1); 1106 } 1107 } else { 1108 cgen(nr, &f0); 1109 if(nl->addable) 1110 gins(foptoas(n->op, n->type, Frev), nl, &f0); 1111 else { 1112 cgen(nl, &f0); 1113 gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); 1114 } 1115 } 1116 gmove(&f0, res); 1117 return; 1118 1119 } 1120 1121 static void 1122 cgen_floatsse(Node *n, Node *res) 1123 { 1124 Node *nl, *nr, *r; 1125 Node n1, n2, nt; 1126 int a; 1127 1128 nl = n->left; 1129 nr = n->right; 1130 switch(n->op) { 1131 default: 1132 dump("cgen_floatsse", n); 1133 fatal("cgen_floatsse %O", n->op); 1134 return; 1135 1136 case OMINUS: 1137 case OCOM: 1138 nr = nodintconst(-1); 1139 convlit(&nr, n->type); 1140 a = foptoas(OMUL, nl->type, 0); 1141 goto sbop; 1142 1143 // symmetric binary 1144 case OADD: 1145 case OMUL: 1146 a = foptoas(n->op, nl->type, 0); 1147 goto sbop; 1148 1149 // asymmetric binary 1150 case OSUB: 1151 case OMOD: 1152 case ODIV: 1153 a = foptoas(n->op, nl->type, 0); 1154 goto abop; 1155 } 1156 1157 sbop: // symmetric binary 1158 if(nl->ullman < nr->ullman || nl->op == OLITERAL) { 1159 r = nl; 1160 nl = nr; 1161 nr = r; 1162 } 1163 1164 abop: // asymmetric binary 1165 if(nl->ullman >= nr->ullman) { 1166 tempname(&nt, nl->type); 1167 cgen(nl, &nt); 1168 mgen(nr, &n2, N); 1169 regalloc(&n1, nl->type, res); 1170 gmove(&nt, &n1); 1171 gins(a, &n2, &n1); 1172 gmove(&n1, res); 1173 regfree(&n1); 1174 mfree(&n2); 1175 } else { 1176 regalloc(&n2, nr->type, res); 1177 cgen(nr, &n2); 1178 regalloc(&n1, nl->type, N); 1179 cgen(nl, &n1); 1180 gins(a, &n2, &n1); 1181 regfree(&n2); 1182 gmove(&n1, res); 1183 regfree(&n1); 1184 } 1185 return; 1186 } 1187 1188 void 1189 bgen_float(Node *n, int true, int likely, Prog *to) 1190 { 1191 int et, a; 1192 Node *nl, *nr, *r; 1193 Node n1, n2, n3, tmp, t1, t2, ax; 1194 Prog *p1, *p2; 1195 1196 nl = n->left; 1197 nr = n->right; 1198 a = n->op; 1199 if(!true) { 1200 // brcom is not valid on floats when NaN is involved. 1201 p1 = gbranch(AJMP, T, 0); 1202 p2 = gbranch(AJMP, T, 0); 1203 patch(p1, pc); 1204 // No need to avoid re-genning ninit. 1205 bgen_float(n, 1, -likely, p2); 1206 patch(gbranch(AJMP, T, 0), to); 1207 patch(p2, pc); 1208 return; 1209 } 1210 1211 if(use_sse) 1212 goto sse; 1213 else 1214 goto x87; 1215 1216 x87: 1217 a = brrev(a); // because the args are stacked 1218 if(a == OGE || a == OGT) { 1219 // only < and <= work right with NaN; reverse if needed 1220 r = nr; 1221 nr = nl; 1222 nl = r; 1223 a = brrev(a); 1224 } 1225 1226 nodreg(&tmp, nr->type, D_F0); 1227 nodreg(&n2, nr->type, D_F0 + 1); 1228 nodreg(&ax, types[TUINT16], D_AX); 1229 et = simsimtype(nr->type); 1230 if(et == TFLOAT64) { 1231 if(nl->ullman > nr->ullman) { 1232 cgen(nl, &tmp); 1233 cgen(nr, &tmp); 1234 gins(AFXCHD, &tmp, &n2); 1235 } else { 1236 cgen(nr, &tmp); 1237 cgen(nl, &tmp); 1238 } 1239 gins(AFUCOMIP, &tmp, &n2); 1240 gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF 1241 } else { 1242 // TODO(rsc): The moves back and forth to memory 1243 // here are for truncating the value to 32 bits. 1244 // This handles 32-bit comparison but presumably 1245 // all the other ops have the same problem. 1246 // We need to figure out what the right general 1247 // solution is, besides telling people to use float64. 1248 tempname(&t1, types[TFLOAT32]); 1249 tempname(&t2, types[TFLOAT32]); 1250 cgen(nr, &t1); 1251 cgen(nl, &t2); 1252 gmove(&t2, &tmp); 1253 gins(AFCOMFP, &t1, &tmp); 1254 gins(AFSTSW, N, &ax); 1255 gins(ASAHF, N, N); 1256 } 1257 1258 goto ret; 1259 1260 sse: 1261 if(!nl->addable) { 1262 tempname(&n1, nl->type); 1263 cgen(nl, &n1); 1264 nl = &n1; 1265 } 1266 if(!nr->addable) { 1267 tempname(&tmp, nr->type); 1268 cgen(nr, &tmp); 1269 nr = &tmp; 1270 } 1271 regalloc(&n2, nr->type, N); 1272 gmove(nr, &n2); 1273 nr = &n2; 1274 1275 if(nl->op != OREGISTER) { 1276 regalloc(&n3, nl->type, N); 1277 gmove(nl, &n3); 1278 nl = &n3; 1279 } 1280 1281 if(a == OGE || a == OGT) { 1282 // only < and <= work right with NaN; reverse if needed 1283 r = nr; 1284 nr = nl; 1285 nl = r; 1286 a = brrev(a); 1287 } 1288 1289 gins(foptoas(OCMP, nr->type, 0), nl, nr); 1290 if(nl->op == OREGISTER) 1291 regfree(nl); 1292 regfree(nr); 1293 1294 ret: 1295 if(a == OEQ) { 1296 // neither NE nor P 1297 p1 = gbranch(AJNE, T, -likely); 1298 p2 = gbranch(AJPS, T, -likely); 1299 patch(gbranch(AJMP, T, 0), to); 1300 patch(p1, pc); 1301 patch(p2, pc); 1302 } else if(a == ONE) { 1303 // either NE or P 1304 patch(gbranch(AJNE, T, likely), to); 1305 patch(gbranch(AJPS, T, likely), to); 1306 } else 1307 patch(gbranch(optoas(a, nr->type), T, likely), to); 1308 1309 } 1310 1311 // Called after regopt and peep have run. 1312 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1313 void 1314 expandchecks(Prog *firstp) 1315 { 1316 Prog *p, *p1, *p2; 1317 1318 for(p = firstp; p != P; p = p->link) { 1319 if(p->as != ACHECKNIL) 1320 continue; 1321 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1322 warnl(p->lineno, "generated nil check"); 1323 // check is 1324 // CMP arg, $0 1325 // JNE 2(PC) (likely) 1326 // MOV AX, 0 1327 p1 = mal(sizeof *p1); 1328 p2 = mal(sizeof *p2); 1329 clearp(p1); 1330 clearp(p2); 1331 p1->link = p2; 1332 p2->link = p->link; 1333 p->link = p1; 1334 p1->lineno = p->lineno; 1335 p2->lineno = p->lineno; 1336 p1->pc = 9999; 1337 p2->pc = 9999; 1338 p->as = ACMPL; 1339 p->to.type = D_CONST; 1340 p->to.offset = 0; 1341 p1->as = AJNE; 1342 p1->from.type = D_CONST; 1343 p1->from.offset = 1; // likely 1344 p1->to.type = D_BRANCH; 1345 p1->to.u.branch = p2->link; 1346 // crash by write to memory address 0. 1347 // if possible, since we know arg is 0, use 0(arg), 1348 // which will be shorter to encode than plain 0. 1349 p2->as = AMOVL; 1350 p2->from.type = D_AX; 1351 if(regtyp(&p->from)) 1352 p2->to.type = p->from.type + D_INDIR; 1353 else 1354 p2->to.type = D_INDIR+D_NONE; 1355 p2->to.offset = 0; 1356 } 1357 }