github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/cmd/8g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog *appendpp(Prog*, int, int, vlong, int, vlong); 13 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); 14 15 void 16 defframe(Prog *ptxt) 17 { 18 uint32 frame, ax; 19 Prog *p; 20 vlong lo, hi; 21 NodeList *l; 22 Node *n; 23 24 // fill in argument size 25 ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); 26 27 // fill in final stack size 28 frame = rnd(stksize+maxarg, widthptr); 29 ptxt->to.offset = frame; 30 31 // insert code to zero ambiguously live variables 32 // so that the garbage collector only sees initialized values 33 // when it looks for pointers. 34 p = ptxt; 35 hi = 0; 36 lo = hi; 37 ax = 0; 38 for(l=curfn->dcl; l != nil; l = l->next) { 39 n = l->n; 40 if(!n->needzero) 41 continue; 42 if(n->class != PAUTO) 43 fatal("needzero class %d", n->class); 44 if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) 45 fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); 46 if(lo != hi && n->xoffset + n->type->width == lo - 2*widthptr) { 47 // merge with range we already have 48 lo = n->xoffset; 49 continue; 50 } 51 // zero old range 52 p = zerorange(p, frame, lo, hi, &ax); 53 54 // set new range 55 hi = n->xoffset + n->type->width; 56 lo = n->xoffset; 57 } 58 // zero final range 59 zerorange(p, frame, lo, hi, &ax); 60 } 61 62 static Prog* 63 zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax) 64 { 65 vlong cnt, i; 66 67 cnt = hi - lo; 68 if(cnt == 0) 69 return p; 70 if(*ax == 0) { 71 p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0); 72 *ax = 1; 73 } 74 if(cnt <= 4*widthreg) { 75 for(i = 0; i < cnt; i += widthreg) { 76 p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo+i); 77 } 78 } else if(!nacl && cnt <= 128*widthreg) { 79 p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0); 80 p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 1*(128-cnt/widthreg)); 81 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 82 } else { 83 p = appendpp(p, AMOVL, D_CONST, cnt/widthreg, D_CX, 0); 84 p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0); 85 p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0); 86 p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0); 87 } 88 return p; 89 } 90 91 static Prog* 92 appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) 93 { 94 Prog *q; 95 q = mal(sizeof(*q)); 96 clearp(q); 97 q->as = as; 98 q->lineno = p->lineno; 99 q->from.type = ftype; 100 q->from.offset = foffset; 101 q->to.type = ttype; 102 q->to.offset = toffset; 103 q->link = p->link; 104 p->link = q; 105 return q; 106 } 107 108 // Sweep the prog list to mark any used nodes. 109 void 110 markautoused(Prog* p) 111 { 112 for (; p; p = p->link) { 113 if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL) 114 continue; 115 116 if (p->from.node) 117 p->from.node->used = 1; 118 119 if (p->to.node) 120 p->to.node->used = 1; 121 } 122 } 123 124 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 125 void 126 fixautoused(Prog* p) 127 { 128 Prog **lp; 129 130 for (lp=&p; (p=*lp) != P; ) { 131 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 132 *lp = p->link; 133 continue; 134 } 135 if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) { 136 // Cannot remove VARDEF instruction, because - unlike TYPE handled above - 137 // VARDEFs are interspersed with other code, and a jump might be using the 138 // VARDEF as a target. Replace with a no-op instead. A later pass will remove 139 // the no-ops. 140 p->to.type = D_NONE; 141 p->to.node = N; 142 p->as = ANOP; 143 continue; 144 } 145 146 if (p->from.type == D_AUTO && p->from.node) 147 p->from.offset += p->from.node->stkdelta; 148 149 if (p->to.type == D_AUTO && p->to.node) 150 p->to.offset += p->to.node->stkdelta; 151 152 lp = &p->link; 153 } 154 } 155 156 void 157 clearfat(Node *nl) 158 { 159 uint32 w, c, q; 160 Node n1, z; 161 Prog *p; 162 163 /* clear a fat object */ 164 if(debug['g']) 165 dump("\nclearfat", nl); 166 167 w = nl->type->width; 168 // Avoid taking the address for simple enough types. 169 if(componentgen(N, nl)) 170 return; 171 172 c = w % 4; // bytes 173 q = w / 4; // quads 174 175 if(q < 4) { 176 // Write sequence of MOV 0, off(base) instead of using STOSL. 177 // The hope is that although the code will be slightly longer, 178 // the MOVs will have no dependencies and pipeline better 179 // than the unrolled STOSL loop. 180 // NOTE: Must use agen, not igen, so that optimizer sees address 181 // being taken. We are not writing on field boundaries. 182 regalloc(&n1, types[tptr], N); 183 agen(nl, &n1); 184 n1.op = OINDREG; 185 nodconst(&z, types[TUINT64], 0); 186 while(q-- > 0) { 187 n1.type = z.type; 188 gins(AMOVL, &z, &n1); 189 n1.xoffset += 4; 190 } 191 nodconst(&z, types[TUINT8], 0); 192 while(c-- > 0) { 193 n1.type = z.type; 194 gins(AMOVB, &z, &n1); 195 n1.xoffset++; 196 } 197 regfree(&n1); 198 return; 199 } 200 201 nodreg(&n1, types[tptr], D_DI); 202 agen(nl, &n1); 203 gconreg(AMOVL, 0, D_AX); 204 205 if(q > 128 || (q >= 4 && nacl)) { 206 gconreg(AMOVL, q, D_CX); 207 gins(AREP, N, N); // repeat 208 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 209 } else if(q >= 4) { 210 p = gins(ADUFFZERO, N, N); 211 p->to.type = D_ADDR; 212 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 213 // 1 and 128 = magic constants: see ../../runtime/asm_386.s 214 p->to.offset = 1*(128-q); 215 } else 216 while(q > 0) { 217 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 218 q--; 219 } 220 221 while(c > 0) { 222 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 223 c--; 224 } 225 } 226 227 /* 228 * generate: 229 * call f 230 * proc=-1 normal call but no return 231 * proc=0 normal call 232 * proc=1 goroutine run in new proc 233 * proc=2 defer call save away stack 234 * proc=3 normal call to C pointer (not Go func value) 235 */ 236 void 237 ginscall(Node *f, int proc) 238 { 239 Prog *p; 240 Node reg, r1, con; 241 242 if(f->type != T) 243 setmaxarg(f->type); 244 245 switch(proc) { 246 default: 247 fatal("ginscall: bad proc %d", proc); 248 break; 249 250 case 0: // normal call 251 case -1: // normal call but no return 252 if(f->op == ONAME && f->class == PFUNC) { 253 if(f == deferreturn) { 254 // Deferred calls will appear to be returning to 255 // the CALL deferreturn(SB) that we are about to emit. 256 // However, the stack trace code will show the line 257 // of the instruction byte before the return PC. 258 // To avoid that being an unrelated instruction, 259 // insert an x86 NOP that we will have the right line number. 260 // x86 NOP 0x90 is really XCHG AX, AX; use that description 261 // because the NOP pseudo-instruction will be removed by 262 // the linker. 263 nodreg(®, types[TINT], D_AX); 264 gins(AXCHGL, ®, ®); 265 } 266 p = gins(ACALL, N, f); 267 afunclit(&p->to, f); 268 if(proc == -1 || noreturn(p)) 269 gins(AUNDEF, N, N); 270 break; 271 } 272 nodreg(®, types[tptr], D_DX); 273 nodreg(&r1, types[tptr], D_BX); 274 gmove(f, ®); 275 reg.op = OINDREG; 276 gmove(®, &r1); 277 reg.op = OREGISTER; 278 gins(ACALL, ®, &r1); 279 break; 280 281 case 3: // normal call of c function pointer 282 gins(ACALL, N, f); 283 break; 284 285 case 1: // call in new proc (go) 286 case 2: // deferred call (defer) 287 nodreg(®, types[TINT32], D_CX); 288 gins(APUSHL, f, N); 289 nodconst(&con, types[TINT32], argsize(f->type)); 290 gins(APUSHL, &con, N); 291 if(proc == 1) 292 ginscall(newproc, 0); 293 else 294 ginscall(deferproc, 0); 295 gins(APOPL, N, ®); 296 gins(APOPL, N, ®); 297 if(proc == 2) { 298 nodreg(®, types[TINT64], D_AX); 299 gins(ATESTL, ®, ®); 300 p = gbranch(AJEQ, T, +1); 301 cgen_ret(N); 302 patch(p, pc); 303 } 304 break; 305 } 306 } 307 308 /* 309 * n is call to interface method. 310 * generate res = n. 311 */ 312 void 313 cgen_callinter(Node *n, Node *res, int proc) 314 { 315 Node *i, *f; 316 Node tmpi, nodi, nodo, nodr, nodsp; 317 318 i = n->left; 319 if(i->op != ODOTINTER) 320 fatal("cgen_callinter: not ODOTINTER %O", i->op); 321 322 f = i->right; // field 323 if(f->op != ONAME) 324 fatal("cgen_callinter: not ONAME %O", f->op); 325 326 i = i->left; // interface 327 328 if(!i->addable) { 329 tempname(&tmpi, i->type); 330 cgen(i, &tmpi); 331 i = &tmpi; 332 } 333 334 genlist(n->list); // assign the args 335 336 // i is now addable, prepare an indirected 337 // register to hold its address. 338 igen(i, &nodi, res); // REG = &inter 339 340 nodindreg(&nodsp, types[tptr], D_SP); 341 nodi.type = types[tptr]; 342 nodi.xoffset += widthptr; 343 cgen(&nodi, &nodsp); // 0(SP) = 4(REG) -- i.data 344 345 regalloc(&nodo, types[tptr], res); 346 nodi.type = types[tptr]; 347 nodi.xoffset -= widthptr; 348 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 349 regfree(&nodi); 350 351 regalloc(&nodr, types[tptr], &nodo); 352 if(n->left->xoffset == BADWIDTH) 353 fatal("cgen_callinter: badwidth"); 354 cgen_checknil(&nodo); 355 nodo.op = OINDREG; 356 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 357 358 if(proc == 0) { 359 // plain call: use direct c function pointer - more efficient 360 cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f] 361 proc = 3; 362 } else { 363 // go/defer. generate go func value. 364 gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f] 365 } 366 367 nodr.type = n->left->type; 368 ginscall(&nodr, proc); 369 370 regfree(&nodr); 371 regfree(&nodo); 372 } 373 374 /* 375 * generate function call; 376 * proc=0 normal call 377 * proc=1 goroutine run in new proc 378 * proc=2 defer call save away stack 379 */ 380 void 381 cgen_call(Node *n, int proc) 382 { 383 Type *t; 384 Node nod, afun; 385 386 if(n == N) 387 return; 388 389 if(n->left->ullman >= UINF) { 390 // if name involves a fn call 391 // precompute the address of the fn 392 tempname(&afun, types[tptr]); 393 cgen(n->left, &afun); 394 } 395 396 genlist(n->list); // assign the args 397 t = n->left->type; 398 399 // call tempname pointer 400 if(n->left->ullman >= UINF) { 401 regalloc(&nod, types[tptr], N); 402 cgen_as(&nod, &afun); 403 nod.type = t; 404 ginscall(&nod, proc); 405 regfree(&nod); 406 return; 407 } 408 409 // call pointer 410 if(n->left->op != ONAME || n->left->class != PFUNC) { 411 regalloc(&nod, types[tptr], N); 412 cgen_as(&nod, n->left); 413 nod.type = t; 414 ginscall(&nod, proc); 415 regfree(&nod); 416 return; 417 } 418 419 // call direct 420 n->left->method = 1; 421 ginscall(n->left, proc); 422 } 423 424 /* 425 * call to n has already been generated. 426 * generate: 427 * res = return value from call. 428 */ 429 void 430 cgen_callret(Node *n, Node *res) 431 { 432 Node nod; 433 Type *fp, *t; 434 Iter flist; 435 436 t = n->left->type; 437 if(t->etype == TPTR32 || t->etype == TPTR64) 438 t = t->type; 439 440 fp = structfirst(&flist, getoutarg(t)); 441 if(fp == T) 442 fatal("cgen_callret: nil"); 443 444 memset(&nod, 0, sizeof(nod)); 445 nod.op = OINDREG; 446 nod.val.u.reg = D_SP; 447 nod.addable = 1; 448 449 nod.xoffset = fp->width; 450 nod.type = fp->type; 451 cgen_as(res, &nod); 452 } 453 454 /* 455 * call to n has already been generated. 456 * generate: 457 * res = &return value from call. 458 */ 459 void 460 cgen_aret(Node *n, Node *res) 461 { 462 Node nod1, nod2; 463 Type *fp, *t; 464 Iter flist; 465 466 t = n->left->type; 467 if(isptr[t->etype]) 468 t = t->type; 469 470 fp = structfirst(&flist, getoutarg(t)); 471 if(fp == T) 472 fatal("cgen_aret: nil"); 473 474 memset(&nod1, 0, sizeof(nod1)); 475 nod1.op = OINDREG; 476 nod1.val.u.reg = D_SP; 477 nod1.addable = 1; 478 479 nod1.xoffset = fp->width; 480 nod1.type = fp->type; 481 482 if(res->op != OREGISTER) { 483 regalloc(&nod2, types[tptr], res); 484 gins(ALEAL, &nod1, &nod2); 485 gins(AMOVL, &nod2, res); 486 regfree(&nod2); 487 } else 488 gins(ALEAL, &nod1, res); 489 } 490 491 /* 492 * generate return. 493 * n->left is assignments to return values. 494 */ 495 void 496 cgen_ret(Node *n) 497 { 498 Prog *p; 499 500 if(n != N) 501 genlist(n->list); // copy out args 502 if(hasdefer) 503 ginscall(deferreturn, 0); 504 genlist(curfn->exit); 505 p = gins(ARET, N, N); 506 if(n != N && n->op == ORETJMP) { 507 p->to.type = D_EXTERN; 508 p->to.sym = linksym(n->left->sym); 509 } 510 } 511 512 /* 513 * generate += *= etc. 514 */ 515 void 516 cgen_asop(Node *n) 517 { 518 Node n1, n2, n3, n4; 519 Node *nl, *nr; 520 Prog *p1; 521 Addr addr; 522 int a; 523 524 nl = n->left; 525 nr = n->right; 526 527 if(nr->ullman >= UINF && nl->ullman >= UINF) { 528 tempname(&n1, nr->type); 529 cgen(nr, &n1); 530 n2 = *n; 531 n2.right = &n1; 532 cgen_asop(&n2); 533 goto ret; 534 } 535 536 if(!isint[nl->type->etype]) 537 goto hard; 538 if(!isint[nr->type->etype]) 539 goto hard; 540 if(is64(nl->type) || is64(nr->type)) 541 goto hard; 542 543 switch(n->etype) { 544 case OADD: 545 if(smallintconst(nr)) 546 if(mpgetfix(nr->val.u.xval) == 1) { 547 a = optoas(OINC, nl->type); 548 if(nl->addable) { 549 gins(a, N, nl); 550 goto ret; 551 } 552 if(sudoaddable(a, nl, &addr)) { 553 p1 = gins(a, N, N); 554 p1->to = addr; 555 sudoclean(); 556 goto ret; 557 } 558 } 559 break; 560 561 case OSUB: 562 if(smallintconst(nr)) 563 if(mpgetfix(nr->val.u.xval) == 1) { 564 a = optoas(ODEC, nl->type); 565 if(nl->addable) { 566 gins(a, N, nl); 567 goto ret; 568 } 569 if(sudoaddable(a, nl, &addr)) { 570 p1 = gins(a, N, N); 571 p1->to = addr; 572 sudoclean(); 573 goto ret; 574 } 575 } 576 break; 577 } 578 579 switch(n->etype) { 580 case OADD: 581 case OSUB: 582 case OXOR: 583 case OAND: 584 case OOR: 585 a = optoas(n->etype, nl->type); 586 if(nl->addable) { 587 if(smallintconst(nr)) { 588 gins(a, nr, nl); 589 goto ret; 590 } 591 regalloc(&n2, nr->type, N); 592 cgen(nr, &n2); 593 gins(a, &n2, nl); 594 regfree(&n2); 595 goto ret; 596 } 597 if(nr->ullman < UINF) 598 if(sudoaddable(a, nl, &addr)) { 599 if(smallintconst(nr)) { 600 p1 = gins(a, nr, N); 601 p1->to = addr; 602 sudoclean(); 603 goto ret; 604 } 605 regalloc(&n2, nr->type, N); 606 cgen(nr, &n2); 607 p1 = gins(a, &n2, N); 608 p1->to = addr; 609 regfree(&n2); 610 sudoclean(); 611 goto ret; 612 } 613 } 614 615 hard: 616 n2.op = 0; 617 n1.op = 0; 618 if(nr->ullman >= nl->ullman || nl->addable) { 619 mgen(nr, &n2, N); 620 nr = &n2; 621 } else { 622 tempname(&n2, nr->type); 623 cgen(nr, &n2); 624 nr = &n2; 625 } 626 if(!nl->addable) { 627 igen(nl, &n1, N); 628 nl = &n1; 629 } 630 631 n3 = *n; 632 n3.left = nl; 633 n3.right = nr; 634 n3.op = n->etype; 635 636 mgen(&n3, &n4, N); 637 gmove(&n4, nl); 638 639 if(n1.op) 640 regfree(&n1); 641 mfree(&n2); 642 mfree(&n4); 643 644 ret: 645 ; 646 } 647 648 int 649 samereg(Node *a, Node *b) 650 { 651 if(a->op != OREGISTER) 652 return 0; 653 if(b->op != OREGISTER) 654 return 0; 655 if(a->val.u.reg != b->val.u.reg) 656 return 0; 657 return 1; 658 } 659 660 /* 661 * generate division. 662 * caller must set: 663 * ax = allocated AX register 664 * dx = allocated DX register 665 * generates one of: 666 * res = nl / nr 667 * res = nl % nr 668 * according to op. 669 */ 670 void 671 dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) 672 { 673 int check; 674 Node n1, t1, t2, t3, t4, n4, nz; 675 Type *t, *t0; 676 Prog *p1, *p2; 677 678 // Have to be careful about handling 679 // most negative int divided by -1 correctly. 680 // The hardware will trap. 681 // Also the byte divide instruction needs AH, 682 // which we otherwise don't have to deal with. 683 // Easiest way to avoid for int8, int16: use int32. 684 // For int32 and int64, use explicit test. 685 // Could use int64 hw for int32. 686 t = nl->type; 687 t0 = t; 688 check = 0; 689 if(issigned[t->etype]) { 690 check = 1; 691 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) 692 check = 0; 693 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 694 check = 0; 695 } 696 if(t->width < 4) { 697 if(issigned[t->etype]) 698 t = types[TINT32]; 699 else 700 t = types[TUINT32]; 701 check = 0; 702 } 703 704 tempname(&t1, t); 705 tempname(&t2, t); 706 if(t0 != t) { 707 tempname(&t3, t0); 708 tempname(&t4, t0); 709 cgen(nl, &t3); 710 cgen(nr, &t4); 711 // Convert. 712 gmove(&t3, &t1); 713 gmove(&t4, &t2); 714 } else { 715 cgen(nl, &t1); 716 cgen(nr, &t2); 717 } 718 719 if(!samereg(ax, res) && !samereg(dx, res)) 720 regalloc(&n1, t, res); 721 else 722 regalloc(&n1, t, N); 723 gmove(&t2, &n1); 724 gmove(&t1, ax); 725 p2 = P; 726 if(nacl) { 727 // Native Client does not relay the divide-by-zero trap 728 // to the executing program, so we must insert a check 729 // for ourselves. 730 nodconst(&n4, t, 0); 731 gins(optoas(OCMP, t), &n1, &n4); 732 p1 = gbranch(optoas(ONE, t), T, +1); 733 if(panicdiv == N) 734 panicdiv = sysfunc("panicdivide"); 735 ginscall(panicdiv, -1); 736 patch(p1, pc); 737 } 738 if(check) { 739 nodconst(&n4, t, -1); 740 gins(optoas(OCMP, t), &n1, &n4); 741 p1 = gbranch(optoas(ONE, t), T, +1); 742 if(op == ODIV) { 743 // a / (-1) is -a. 744 gins(optoas(OMINUS, t), N, ax); 745 gmove(ax, res); 746 } else { 747 // a % (-1) is 0. 748 nodconst(&n4, t, 0); 749 gmove(&n4, res); 750 } 751 p2 = gbranch(AJMP, T, 0); 752 patch(p1, pc); 753 } 754 if(!issigned[t->etype]) { 755 nodconst(&nz, t, 0); 756 gmove(&nz, dx); 757 } else 758 gins(optoas(OEXTEND, t), N, N); 759 gins(optoas(op, t), &n1, N); 760 regfree(&n1); 761 762 if(op == ODIV) 763 gmove(ax, res); 764 else 765 gmove(dx, res); 766 if(check) 767 patch(p2, pc); 768 } 769 770 static void 771 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 772 { 773 int r; 774 775 r = reg[dr]; 776 nodreg(x, types[TINT32], dr); 777 778 // save current ax and dx if they are live 779 // and not the destination 780 memset(oldx, 0, sizeof *oldx); 781 if(r > 0 && !samereg(x, res)) { 782 tempname(oldx, types[TINT32]); 783 gmove(x, oldx); 784 } 785 786 regalloc(x, t, x); 787 } 788 789 static void 790 restx(Node *x, Node *oldx) 791 { 792 regfree(x); 793 794 if(oldx->op != 0) { 795 x->type = types[TINT32]; 796 gmove(oldx, x); 797 } 798 } 799 800 /* 801 * generate division according to op, one of: 802 * res = nl / nr 803 * res = nl % nr 804 */ 805 void 806 cgen_div(int op, Node *nl, Node *nr, Node *res) 807 { 808 Node ax, dx, oldax, olddx; 809 Type *t; 810 811 if(is64(nl->type)) 812 fatal("cgen_div %T", nl->type); 813 814 if(issigned[nl->type->etype]) 815 t = types[TINT32]; 816 else 817 t = types[TUINT32]; 818 savex(D_AX, &ax, &oldax, res, t); 819 savex(D_DX, &dx, &olddx, res, t); 820 dodiv(op, nl, nr, res, &ax, &dx); 821 restx(&dx, &olddx); 822 restx(&ax, &oldax); 823 } 824 825 /* 826 * generate shift according to op, one of: 827 * res = nl << nr 828 * res = nl >> nr 829 */ 830 void 831 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 832 { 833 Node n1, n2, nt, cx, oldcx, hi, lo; 834 int a, w; 835 Prog *p1, *p2; 836 uvlong sc; 837 838 if(nl->type->width > 4) 839 fatal("cgen_shift %T", nl->type); 840 841 w = nl->type->width * 8; 842 843 a = optoas(op, nl->type); 844 845 if(nr->op == OLITERAL) { 846 tempname(&n2, nl->type); 847 cgen(nl, &n2); 848 regalloc(&n1, nl->type, res); 849 gmove(&n2, &n1); 850 sc = mpgetfix(nr->val.u.xval); 851 if(sc >= nl->type->width*8) { 852 // large shift gets 2 shifts by width-1 853 gins(a, ncon(w-1), &n1); 854 gins(a, ncon(w-1), &n1); 855 } else 856 gins(a, nr, &n1); 857 gmove(&n1, res); 858 regfree(&n1); 859 return; 860 } 861 862 memset(&oldcx, 0, sizeof oldcx); 863 nodreg(&cx, types[TUINT32], D_CX); 864 if(reg[D_CX] > 1 && !samereg(&cx, res)) { 865 tempname(&oldcx, types[TUINT32]); 866 gmove(&cx, &oldcx); 867 } 868 869 if(nr->type->width > 4) { 870 tempname(&nt, nr->type); 871 n1 = nt; 872 } else { 873 nodreg(&n1, types[TUINT32], D_CX); 874 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 875 } 876 877 if(samereg(&cx, res)) 878 regalloc(&n2, nl->type, N); 879 else 880 regalloc(&n2, nl->type, res); 881 if(nl->ullman >= nr->ullman) { 882 cgen(nl, &n2); 883 cgen(nr, &n1); 884 } else { 885 cgen(nr, &n1); 886 cgen(nl, &n2); 887 } 888 889 // test and fix up large shifts 890 if(bounded) { 891 if(nr->type->width > 4) { 892 // delayed reg alloc 893 nodreg(&n1, types[TUINT32], D_CX); 894 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 895 split64(&nt, &lo, &hi); 896 gmove(&lo, &n1); 897 splitclean(); 898 } 899 } else { 900 if(nr->type->width > 4) { 901 // delayed reg alloc 902 nodreg(&n1, types[TUINT32], D_CX); 903 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 904 split64(&nt, &lo, &hi); 905 gmove(&lo, &n1); 906 gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); 907 p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1); 908 gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); 909 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 910 splitclean(); 911 patch(p2, pc); 912 } else { 913 gins(optoas(OCMP, nr->type), &n1, ncon(w)); 914 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 915 } 916 if(op == ORSH && issigned[nl->type->etype]) { 917 gins(a, ncon(w-1), &n2); 918 } else { 919 gmove(ncon(0), &n2); 920 } 921 patch(p1, pc); 922 } 923 gins(a, &n1, &n2); 924 925 if(oldcx.op != 0) 926 gmove(&oldcx, &cx); 927 928 gmove(&n2, res); 929 930 regfree(&n1); 931 regfree(&n2); 932 } 933 934 /* 935 * generate byte multiply: 936 * res = nl * nr 937 * there is no 2-operand byte multiply instruction so 938 * we do a full-width multiplication and truncate afterwards. 939 */ 940 void 941 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 942 { 943 Node n1, n2, nt, *tmp; 944 Type *t; 945 int a; 946 947 // copy from byte to full registers 948 t = types[TUINT32]; 949 if(issigned[nl->type->etype]) 950 t = types[TINT32]; 951 952 // largest ullman on left. 953 if(nl->ullman < nr->ullman) { 954 tmp = nl; 955 nl = nr; 956 nr = tmp; 957 } 958 959 tempname(&nt, nl->type); 960 cgen(nl, &nt); 961 regalloc(&n1, t, res); 962 cgen(nr, &n1); 963 regalloc(&n2, t, N); 964 gmove(&nt, &n2); 965 a = optoas(op, t); 966 gins(a, &n2, &n1); 967 regfree(&n2); 968 gmove(&n1, res); 969 regfree(&n1); 970 } 971 972 /* 973 * generate high multiply: 974 * res = (nl*nr) >> width 975 */ 976 void 977 cgen_hmul(Node *nl, Node *nr, Node *res) 978 { 979 Type *t; 980 int a; 981 Node n1, n2, ax, dx; 982 983 t = nl->type; 984 a = optoas(OHMUL, t); 985 // gen nl in n1. 986 tempname(&n1, t); 987 cgen(nl, &n1); 988 // gen nr in n2. 989 regalloc(&n2, t, res); 990 cgen(nr, &n2); 991 992 // multiply. 993 nodreg(&ax, t, D_AX); 994 gmove(&n2, &ax); 995 gins(a, &n1, N); 996 regfree(&n2); 997 998 if(t->width == 1) { 999 // byte multiply behaves differently. 1000 nodreg(&ax, t, D_AH); 1001 nodreg(&dx, t, D_DX); 1002 gmove(&ax, &dx); 1003 } 1004 nodreg(&dx, t, D_DX); 1005 gmove(&dx, res); 1006 } 1007 1008 static void cgen_float387(Node *n, Node *res); 1009 static void cgen_floatsse(Node *n, Node *res); 1010 1011 /* 1012 * generate floating-point operation. 1013 */ 1014 void 1015 cgen_float(Node *n, Node *res) 1016 { 1017 Node *nl; 1018 Node n1, n2; 1019 Prog *p1, *p2, *p3; 1020 1021 nl = n->left; 1022 switch(n->op) { 1023 case OEQ: 1024 case ONE: 1025 case OLT: 1026 case OLE: 1027 case OGE: 1028 p1 = gbranch(AJMP, T, 0); 1029 p2 = pc; 1030 gmove(nodbool(1), res); 1031 p3 = gbranch(AJMP, T, 0); 1032 patch(p1, pc); 1033 bgen(n, 1, 0, p2); 1034 gmove(nodbool(0), res); 1035 patch(p3, pc); 1036 return; 1037 1038 case OPLUS: 1039 cgen(nl, res); 1040 return; 1041 1042 case OCONV: 1043 if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { 1044 cgen(nl, res); 1045 return; 1046 } 1047 1048 tempname(&n2, n->type); 1049 mgen(nl, &n1, res); 1050 gmove(&n1, &n2); 1051 gmove(&n2, res); 1052 mfree(&n1); 1053 return; 1054 } 1055 1056 if(use_sse) 1057 cgen_floatsse(n, res); 1058 else 1059 cgen_float387(n, res); 1060 } 1061 1062 // floating-point. 387 (not SSE2) 1063 static void 1064 cgen_float387(Node *n, Node *res) 1065 { 1066 Node f0, f1; 1067 Node *nl, *nr; 1068 1069 nl = n->left; 1070 nr = n->right; 1071 nodreg(&f0, nl->type, D_F0); 1072 nodreg(&f1, n->type, D_F0+1); 1073 if(nr != N) 1074 goto flt2; 1075 1076 // unary 1077 cgen(nl, &f0); 1078 if(n->op != OCONV && n->op != OPLUS) 1079 gins(foptoas(n->op, n->type, 0), N, N); 1080 gmove(&f0, res); 1081 return; 1082 1083 flt2: // binary 1084 if(nl->ullman >= nr->ullman) { 1085 cgen(nl, &f0); 1086 if(nr->addable) 1087 gins(foptoas(n->op, n->type, 0), nr, &f0); 1088 else { 1089 cgen(nr, &f0); 1090 gins(foptoas(n->op, n->type, Fpop), &f0, &f1); 1091 } 1092 } else { 1093 cgen(nr, &f0); 1094 if(nl->addable) 1095 gins(foptoas(n->op, n->type, Frev), nl, &f0); 1096 else { 1097 cgen(nl, &f0); 1098 gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); 1099 } 1100 } 1101 gmove(&f0, res); 1102 return; 1103 1104 } 1105 1106 static void 1107 cgen_floatsse(Node *n, Node *res) 1108 { 1109 Node *nl, *nr, *r; 1110 Node n1, n2, nt; 1111 int a; 1112 1113 nl = n->left; 1114 nr = n->right; 1115 switch(n->op) { 1116 default: 1117 dump("cgen_floatsse", n); 1118 fatal("cgen_floatsse %O", n->op); 1119 return; 1120 1121 case OMINUS: 1122 case OCOM: 1123 nr = nodintconst(-1); 1124 convlit(&nr, n->type); 1125 a = foptoas(OMUL, nl->type, 0); 1126 goto sbop; 1127 1128 // symmetric binary 1129 case OADD: 1130 case OMUL: 1131 a = foptoas(n->op, nl->type, 0); 1132 goto sbop; 1133 1134 // asymmetric binary 1135 case OSUB: 1136 case OMOD: 1137 case ODIV: 1138 a = foptoas(n->op, nl->type, 0); 1139 goto abop; 1140 } 1141 1142 sbop: // symmetric binary 1143 if(nl->ullman < nr->ullman || nl->op == OLITERAL) { 1144 r = nl; 1145 nl = nr; 1146 nr = r; 1147 } 1148 1149 abop: // asymmetric binary 1150 if(nl->ullman >= nr->ullman) { 1151 tempname(&nt, nl->type); 1152 cgen(nl, &nt); 1153 mgen(nr, &n2, N); 1154 regalloc(&n1, nl->type, res); 1155 gmove(&nt, &n1); 1156 gins(a, &n2, &n1); 1157 gmove(&n1, res); 1158 regfree(&n1); 1159 mfree(&n2); 1160 } else { 1161 regalloc(&n2, nr->type, res); 1162 cgen(nr, &n2); 1163 regalloc(&n1, nl->type, N); 1164 cgen(nl, &n1); 1165 gins(a, &n2, &n1); 1166 regfree(&n2); 1167 gmove(&n1, res); 1168 regfree(&n1); 1169 } 1170 return; 1171 } 1172 1173 void 1174 bgen_float(Node *n, int true, int likely, Prog *to) 1175 { 1176 int et, a; 1177 Node *nl, *nr, *r; 1178 Node n1, n2, n3, tmp, t1, t2, ax; 1179 Prog *p1, *p2; 1180 1181 nl = n->left; 1182 nr = n->right; 1183 a = n->op; 1184 if(!true) { 1185 // brcom is not valid on floats when NaN is involved. 1186 p1 = gbranch(AJMP, T, 0); 1187 p2 = gbranch(AJMP, T, 0); 1188 patch(p1, pc); 1189 // No need to avoid re-genning ninit. 1190 bgen_float(n, 1, -likely, p2); 1191 patch(gbranch(AJMP, T, 0), to); 1192 patch(p2, pc); 1193 return; 1194 } 1195 1196 if(use_sse) 1197 goto sse; 1198 else 1199 goto x87; 1200 1201 x87: 1202 a = brrev(a); // because the args are stacked 1203 if(a == OGE || a == OGT) { 1204 // only < and <= work right with NaN; reverse if needed 1205 r = nr; 1206 nr = nl; 1207 nl = r; 1208 a = brrev(a); 1209 } 1210 1211 nodreg(&tmp, nr->type, D_F0); 1212 nodreg(&n2, nr->type, D_F0 + 1); 1213 nodreg(&ax, types[TUINT16], D_AX); 1214 et = simsimtype(nr->type); 1215 if(et == TFLOAT64) { 1216 if(nl->ullman > nr->ullman) { 1217 cgen(nl, &tmp); 1218 cgen(nr, &tmp); 1219 gins(AFXCHD, &tmp, &n2); 1220 } else { 1221 cgen(nr, &tmp); 1222 cgen(nl, &tmp); 1223 } 1224 gins(AFUCOMIP, &tmp, &n2); 1225 gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF 1226 } else { 1227 // TODO(rsc): The moves back and forth to memory 1228 // here are for truncating the value to 32 bits. 1229 // This handles 32-bit comparison but presumably 1230 // all the other ops have the same problem. 1231 // We need to figure out what the right general 1232 // solution is, besides telling people to use float64. 1233 tempname(&t1, types[TFLOAT32]); 1234 tempname(&t2, types[TFLOAT32]); 1235 cgen(nr, &t1); 1236 cgen(nl, &t2); 1237 gmove(&t2, &tmp); 1238 gins(AFCOMFP, &t1, &tmp); 1239 gins(AFSTSW, N, &ax); 1240 gins(ASAHF, N, N); 1241 } 1242 1243 goto ret; 1244 1245 sse: 1246 if(!nl->addable) { 1247 tempname(&n1, nl->type); 1248 cgen(nl, &n1); 1249 nl = &n1; 1250 } 1251 if(!nr->addable) { 1252 tempname(&tmp, nr->type); 1253 cgen(nr, &tmp); 1254 nr = &tmp; 1255 } 1256 regalloc(&n2, nr->type, N); 1257 gmove(nr, &n2); 1258 nr = &n2; 1259 1260 if(nl->op != OREGISTER) { 1261 regalloc(&n3, nl->type, N); 1262 gmove(nl, &n3); 1263 nl = &n3; 1264 } 1265 1266 if(a == OGE || a == OGT) { 1267 // only < and <= work right with NaN; reverse if needed 1268 r = nr; 1269 nr = nl; 1270 nl = r; 1271 a = brrev(a); 1272 } 1273 1274 gins(foptoas(OCMP, nr->type, 0), nl, nr); 1275 if(nl->op == OREGISTER) 1276 regfree(nl); 1277 regfree(nr); 1278 1279 ret: 1280 if(a == OEQ) { 1281 // neither NE nor P 1282 p1 = gbranch(AJNE, T, -likely); 1283 p2 = gbranch(AJPS, T, -likely); 1284 patch(gbranch(AJMP, T, 0), to); 1285 patch(p1, pc); 1286 patch(p2, pc); 1287 } else if(a == ONE) { 1288 // either NE or P 1289 patch(gbranch(AJNE, T, likely), to); 1290 patch(gbranch(AJPS, T, likely), to); 1291 } else 1292 patch(gbranch(optoas(a, nr->type), T, likely), to); 1293 1294 } 1295 1296 // Called after regopt and peep have run. 1297 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1298 void 1299 expandchecks(Prog *firstp) 1300 { 1301 Prog *p, *p1, *p2; 1302 1303 for(p = firstp; p != P; p = p->link) { 1304 if(p->as != ACHECKNIL) 1305 continue; 1306 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1307 warnl(p->lineno, "generated nil check"); 1308 // check is 1309 // CMP arg, $0 1310 // JNE 2(PC) (likely) 1311 // MOV AX, 0 1312 p1 = mal(sizeof *p1); 1313 p2 = mal(sizeof *p2); 1314 clearp(p1); 1315 clearp(p2); 1316 p1->link = p2; 1317 p2->link = p->link; 1318 p->link = p1; 1319 p1->lineno = p->lineno; 1320 p2->lineno = p->lineno; 1321 p1->pc = 9999; 1322 p2->pc = 9999; 1323 p->as = ACMPL; 1324 p->to.type = D_CONST; 1325 p->to.offset = 0; 1326 p1->as = AJNE; 1327 p1->from.type = D_CONST; 1328 p1->from.offset = 1; // likely 1329 p1->to.type = D_BRANCH; 1330 p1->to.u.branch = p2->link; 1331 // crash by write to memory address 0. 1332 // if possible, since we know arg is 0, use 0(arg), 1333 // which will be shorter to encode than plain 0. 1334 p2->as = AMOVL; 1335 p2->from.type = D_AX; 1336 if(regtyp(&p->from)) 1337 p2->to.type = p->from.type + D_INDIR; 1338 else 1339 p2->to.type = D_INDIR+D_NONE; 1340 p2->to.offset = 0; 1341 } 1342 }