github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/8g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog *appendpp(Prog*, int, int, vlong, int, vlong); 13 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); 14 15 void 16 defframe(Prog *ptxt) 17 { 18 uint32 frame, ax; 19 Prog *p; 20 vlong lo, hi; 21 NodeList *l; 22 Node *n; 23 24 // fill in argument size 25 ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); 26 27 // fill in final stack size 28 frame = rnd(stksize+maxarg, widthptr); 29 ptxt->to.offset = frame; 30 31 // insert code to zero ambiguously live variables 32 // so that the garbage collector only sees initialized values 33 // when it looks for pointers. 34 p = ptxt; 35 hi = 0; 36 lo = hi; 37 ax = 0; 38 for(l=curfn->dcl; l != nil; l = l->next) { 39 n = l->n; 40 if(!n->needzero) 41 continue; 42 if(n->class != PAUTO) 43 fatal("needzero class %d", n->class); 44 if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) 45 fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); 46 if(lo != hi && n->xoffset + n->type->width == lo - 2*widthptr) { 47 // merge with range we already have 48 lo = n->xoffset; 49 continue; 50 } 51 // zero old range 52 p = zerorange(p, frame, lo, hi, &ax); 53 54 // set new range 55 hi = n->xoffset + n->type->width; 56 lo = n->xoffset; 57 } 58 // zero final range 59 zerorange(p, frame, lo, hi, &ax); 60 } 61 62 static Prog* 63 zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax) 64 { 65 vlong cnt, i; 66 67 cnt = hi - lo; 68 if(cnt == 0) 69 return p; 70 if(*ax == 0) { 71 p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0); 72 *ax = 1; 73 } 74 if(cnt <= 4*widthreg) { 75 for(i = 0; i < cnt; i += widthreg) { 76 p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo+i); 77 } 78 } else if(!nacl && cnt <= 128*widthreg) { 79 p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0); 80 p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 1*(128-cnt/widthreg)); 81 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 82 } else { 83 p = appendpp(p, AMOVL, D_CONST, cnt/widthreg, D_CX, 0); 84 p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0); 85 p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0); 86 p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0); 87 } 88 return p; 89 } 90 91 static Prog* 92 appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) 93 { 94 Prog *q; 95 q = mal(sizeof(*q)); 96 clearp(q); 97 q->as = as; 98 q->lineno = p->lineno; 99 q->from.type = ftype; 100 q->from.offset = foffset; 101 q->to.type = ttype; 102 q->to.offset = toffset; 103 q->link = p->link; 104 p->link = q; 105 return q; 106 } 107 108 // Sweep the prog list to mark any used nodes. 109 void 110 markautoused(Prog* p) 111 { 112 for (; p; p = p->link) { 113 if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL) 114 continue; 115 116 if (p->from.node) 117 p->from.node->used = 1; 118 119 if (p->to.node) 120 p->to.node->used = 1; 121 } 122 } 123 124 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 125 void 126 fixautoused(Prog* p) 127 { 128 Prog **lp; 129 130 for (lp=&p; (p=*lp) != P; ) { 131 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 132 *lp = p->link; 133 continue; 134 } 135 if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) { 136 // Cannot remove VARDEF instruction, because - unlike TYPE handled above - 137 // VARDEFs are interspersed with other code, and a jump might be using the 138 // VARDEF as a target. Replace with a no-op instead. A later pass will remove 139 // the no-ops. 140 p->to.type = D_NONE; 141 p->to.node = N; 142 p->as = ANOP; 143 continue; 144 } 145 146 if (p->from.type == D_AUTO && p->from.node) 147 p->from.offset += p->from.node->stkdelta; 148 149 if (p->to.type == D_AUTO && p->to.node) 150 p->to.offset += p->to.node->stkdelta; 151 152 lp = &p->link; 153 } 154 } 155 156 void 157 clearfat(Node *nl) 158 { 159 uint32 w, c, q; 160 Node n1; 161 Prog *p; 162 163 /* clear a fat object */ 164 if(debug['g']) 165 dump("\nclearfat", nl); 166 167 w = nl->type->width; 168 // Avoid taking the address for simple enough types. 169 if(componentgen(N, nl)) 170 return; 171 172 c = w % 4; // bytes 173 q = w / 4; // quads 174 175 nodreg(&n1, types[tptr], D_DI); 176 agen(nl, &n1); 177 gconreg(AMOVL, 0, D_AX); 178 179 if(q > 128 || (q >= 4 && nacl)) { 180 gconreg(AMOVL, q, D_CX); 181 gins(AREP, N, N); // repeat 182 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 183 } else if(q >= 4) { 184 p = gins(ADUFFZERO, N, N); 185 p->to.type = D_ADDR; 186 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 187 // 1 and 128 = magic constants: see ../../pkg/runtime/asm_386.s 188 p->to.offset = 1*(128-q); 189 } else 190 while(q > 0) { 191 gins(ASTOSL, N, N); // STOL AL,*(DI)+ 192 q--; 193 } 194 195 while(c > 0) { 196 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 197 c--; 198 } 199 } 200 201 /* 202 * generate: 203 * call f 204 * proc=-1 normal call but no return 205 * proc=0 normal call 206 * proc=1 goroutine run in new proc 207 * proc=2 defer call save away stack 208 * proc=3 normal call to C pointer (not Go func value) 209 */ 210 void 211 ginscall(Node *f, int proc) 212 { 213 int32 arg; 214 Prog *p; 215 Node reg, r1, con; 216 217 if(f->type != T) 218 setmaxarg(f->type); 219 220 arg = -1; 221 // Most functions have a fixed-size argument block, so traceback uses that during unwind. 222 // Not all, though: there are some variadic functions in package runtime, 223 // and for those we emit call-specific metadata recorded by caller. 224 // Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub), 225 // so we do this for all indirect calls as well. 226 if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) { 227 arg = f->type->argwid; 228 if(proc == 1 || proc == 2) 229 arg += 2*widthptr; 230 } 231 232 if(arg != -1) 233 gargsize(arg); 234 235 switch(proc) { 236 default: 237 fatal("ginscall: bad proc %d", proc); 238 break; 239 240 case 0: // normal call 241 case -1: // normal call but no return 242 if(f->op == ONAME && f->class == PFUNC) { 243 if(f == deferreturn) { 244 // Deferred calls will appear to be returning to 245 // the CALL deferreturn(SB) that we are about to emit. 246 // However, the stack trace code will show the line 247 // of the instruction byte before the return PC. 248 // To avoid that being an unrelated instruction, 249 // insert an x86 NOP that we will have the right line number. 250 // x86 NOP 0x90 is really XCHG AX, AX; use that description 251 // because the NOP pseudo-instruction will be removed by 252 // the linker. 253 nodreg(®, types[TINT], D_AX); 254 gins(AXCHGL, ®, ®); 255 } 256 p = gins(ACALL, N, f); 257 afunclit(&p->to, f); 258 if(proc == -1 || noreturn(p)) 259 gins(AUNDEF, N, N); 260 break; 261 } 262 nodreg(®, types[tptr], D_DX); 263 nodreg(&r1, types[tptr], D_BX); 264 gmove(f, ®); 265 reg.op = OINDREG; 266 gmove(®, &r1); 267 reg.op = OREGISTER; 268 gins(ACALL, ®, &r1); 269 break; 270 271 case 3: // normal call of c function pointer 272 gins(ACALL, N, f); 273 break; 274 275 case 1: // call in new proc (go) 276 case 2: // deferred call (defer) 277 nodreg(®, types[TINT32], D_CX); 278 gins(APUSHL, f, N); 279 nodconst(&con, types[TINT32], argsize(f->type)); 280 gins(APUSHL, &con, N); 281 if(proc == 1) 282 ginscall(newproc, 0); 283 else 284 ginscall(deferproc, 0); 285 gins(APOPL, N, ®); 286 gins(APOPL, N, ®); 287 if(proc == 2) { 288 nodreg(®, types[TINT64], D_AX); 289 gins(ATESTL, ®, ®); 290 p = gbranch(AJEQ, T, +1); 291 cgen_ret(N); 292 patch(p, pc); 293 } 294 break; 295 } 296 297 if(arg != -1) 298 gargsize(-1); 299 } 300 301 /* 302 * n is call to interface method. 303 * generate res = n. 304 */ 305 void 306 cgen_callinter(Node *n, Node *res, int proc) 307 { 308 Node *i, *f; 309 Node tmpi, nodi, nodo, nodr, nodsp; 310 311 i = n->left; 312 if(i->op != ODOTINTER) 313 fatal("cgen_callinter: not ODOTINTER %O", i->op); 314 315 f = i->right; // field 316 if(f->op != ONAME) 317 fatal("cgen_callinter: not ONAME %O", f->op); 318 319 i = i->left; // interface 320 321 if(!i->addable) { 322 tempname(&tmpi, i->type); 323 cgen(i, &tmpi); 324 i = &tmpi; 325 } 326 327 genlist(n->list); // assign the args 328 329 // i is now addable, prepare an indirected 330 // register to hold its address. 331 igen(i, &nodi, res); // REG = &inter 332 333 nodindreg(&nodsp, types[tptr], D_SP); 334 nodi.type = types[tptr]; 335 nodi.xoffset += widthptr; 336 cgen(&nodi, &nodsp); // 0(SP) = 4(REG) -- i.data 337 338 regalloc(&nodo, types[tptr], res); 339 nodi.type = types[tptr]; 340 nodi.xoffset -= widthptr; 341 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 342 regfree(&nodi); 343 344 regalloc(&nodr, types[tptr], &nodo); 345 if(n->left->xoffset == BADWIDTH) 346 fatal("cgen_callinter: badwidth"); 347 cgen_checknil(&nodo); 348 nodo.op = OINDREG; 349 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 350 351 if(proc == 0) { 352 // plain call: use direct c function pointer - more efficient 353 cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f] 354 proc = 3; 355 } else { 356 // go/defer. generate go func value. 357 gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f] 358 } 359 360 nodr.type = n->left->type; 361 ginscall(&nodr, proc); 362 363 regfree(&nodr); 364 regfree(&nodo); 365 } 366 367 /* 368 * generate function call; 369 * proc=0 normal call 370 * proc=1 goroutine run in new proc 371 * proc=2 defer call save away stack 372 */ 373 void 374 cgen_call(Node *n, int proc) 375 { 376 Type *t; 377 Node nod, afun; 378 379 if(n == N) 380 return; 381 382 if(n->left->ullman >= UINF) { 383 // if name involves a fn call 384 // precompute the address of the fn 385 tempname(&afun, types[tptr]); 386 cgen(n->left, &afun); 387 } 388 389 genlist(n->list); // assign the args 390 t = n->left->type; 391 392 // call tempname pointer 393 if(n->left->ullman >= UINF) { 394 regalloc(&nod, types[tptr], N); 395 cgen_as(&nod, &afun); 396 nod.type = t; 397 ginscall(&nod, proc); 398 regfree(&nod); 399 return; 400 } 401 402 // call pointer 403 if(n->left->op != ONAME || n->left->class != PFUNC) { 404 regalloc(&nod, types[tptr], N); 405 cgen_as(&nod, n->left); 406 nod.type = t; 407 ginscall(&nod, proc); 408 regfree(&nod); 409 return; 410 } 411 412 // call direct 413 n->left->method = 1; 414 ginscall(n->left, proc); 415 } 416 417 /* 418 * call to n has already been generated. 419 * generate: 420 * res = return value from call. 421 */ 422 void 423 cgen_callret(Node *n, Node *res) 424 { 425 Node nod; 426 Type *fp, *t; 427 Iter flist; 428 429 t = n->left->type; 430 if(t->etype == TPTR32 || t->etype == TPTR64) 431 t = t->type; 432 433 fp = structfirst(&flist, getoutarg(t)); 434 if(fp == T) 435 fatal("cgen_callret: nil"); 436 437 memset(&nod, 0, sizeof(nod)); 438 nod.op = OINDREG; 439 nod.val.u.reg = D_SP; 440 nod.addable = 1; 441 442 nod.xoffset = fp->width; 443 nod.type = fp->type; 444 cgen_as(res, &nod); 445 } 446 447 /* 448 * call to n has already been generated. 449 * generate: 450 * res = &return value from call. 451 */ 452 void 453 cgen_aret(Node *n, Node *res) 454 { 455 Node nod1, nod2; 456 Type *fp, *t; 457 Iter flist; 458 459 t = n->left->type; 460 if(isptr[t->etype]) 461 t = t->type; 462 463 fp = structfirst(&flist, getoutarg(t)); 464 if(fp == T) 465 fatal("cgen_aret: nil"); 466 467 memset(&nod1, 0, sizeof(nod1)); 468 nod1.op = OINDREG; 469 nod1.val.u.reg = D_SP; 470 nod1.addable = 1; 471 472 nod1.xoffset = fp->width; 473 nod1.type = fp->type; 474 475 if(res->op != OREGISTER) { 476 regalloc(&nod2, types[tptr], res); 477 gins(ALEAL, &nod1, &nod2); 478 gins(AMOVL, &nod2, res); 479 regfree(&nod2); 480 } else 481 gins(ALEAL, &nod1, res); 482 } 483 484 /* 485 * generate return. 486 * n->left is assignments to return values. 487 */ 488 void 489 cgen_ret(Node *n) 490 { 491 Prog *p; 492 493 if(n != N) 494 genlist(n->list); // copy out args 495 if(hasdefer) 496 ginscall(deferreturn, 0); 497 genlist(curfn->exit); 498 p = gins(ARET, N, N); 499 if(n != N && n->op == ORETJMP) { 500 p->to.type = D_EXTERN; 501 p->to.sym = linksym(n->left->sym); 502 } 503 } 504 505 /* 506 * generate += *= etc. 507 */ 508 void 509 cgen_asop(Node *n) 510 { 511 Node n1, n2, n3, n4; 512 Node *nl, *nr; 513 Prog *p1; 514 Addr addr; 515 int a; 516 517 nl = n->left; 518 nr = n->right; 519 520 if(nr->ullman >= UINF && nl->ullman >= UINF) { 521 tempname(&n1, nr->type); 522 cgen(nr, &n1); 523 n2 = *n; 524 n2.right = &n1; 525 cgen_asop(&n2); 526 goto ret; 527 } 528 529 if(!isint[nl->type->etype]) 530 goto hard; 531 if(!isint[nr->type->etype]) 532 goto hard; 533 if(is64(nl->type) || is64(nr->type)) 534 goto hard; 535 536 switch(n->etype) { 537 case OADD: 538 if(smallintconst(nr)) 539 if(mpgetfix(nr->val.u.xval) == 1) { 540 a = optoas(OINC, nl->type); 541 if(nl->addable) { 542 gins(a, N, nl); 543 goto ret; 544 } 545 if(sudoaddable(a, nl, &addr)) { 546 p1 = gins(a, N, N); 547 p1->to = addr; 548 sudoclean(); 549 goto ret; 550 } 551 } 552 break; 553 554 case OSUB: 555 if(smallintconst(nr)) 556 if(mpgetfix(nr->val.u.xval) == 1) { 557 a = optoas(ODEC, nl->type); 558 if(nl->addable) { 559 gins(a, N, nl); 560 goto ret; 561 } 562 if(sudoaddable(a, nl, &addr)) { 563 p1 = gins(a, N, N); 564 p1->to = addr; 565 sudoclean(); 566 goto ret; 567 } 568 } 569 break; 570 } 571 572 switch(n->etype) { 573 case OADD: 574 case OSUB: 575 case OXOR: 576 case OAND: 577 case OOR: 578 a = optoas(n->etype, nl->type); 579 if(nl->addable) { 580 if(smallintconst(nr)) { 581 gins(a, nr, nl); 582 goto ret; 583 } 584 regalloc(&n2, nr->type, N); 585 cgen(nr, &n2); 586 gins(a, &n2, nl); 587 regfree(&n2); 588 goto ret; 589 } 590 if(nr->ullman < UINF) 591 if(sudoaddable(a, nl, &addr)) { 592 if(smallintconst(nr)) { 593 p1 = gins(a, nr, N); 594 p1->to = addr; 595 sudoclean(); 596 goto ret; 597 } 598 regalloc(&n2, nr->type, N); 599 cgen(nr, &n2); 600 p1 = gins(a, &n2, N); 601 p1->to = addr; 602 regfree(&n2); 603 sudoclean(); 604 goto ret; 605 } 606 } 607 608 hard: 609 n2.op = 0; 610 n1.op = 0; 611 if(nr->ullman >= nl->ullman || nl->addable) { 612 mgen(nr, &n2, N); 613 nr = &n2; 614 } else { 615 tempname(&n2, nr->type); 616 cgen(nr, &n2); 617 nr = &n2; 618 } 619 if(!nl->addable) { 620 igen(nl, &n1, N); 621 nl = &n1; 622 } 623 624 n3 = *n; 625 n3.left = nl; 626 n3.right = nr; 627 n3.op = n->etype; 628 629 mgen(&n3, &n4, N); 630 gmove(&n4, nl); 631 632 if(n1.op) 633 regfree(&n1); 634 mfree(&n2); 635 mfree(&n4); 636 637 ret: 638 ; 639 } 640 641 int 642 samereg(Node *a, Node *b) 643 { 644 if(a->op != OREGISTER) 645 return 0; 646 if(b->op != OREGISTER) 647 return 0; 648 if(a->val.u.reg != b->val.u.reg) 649 return 0; 650 return 1; 651 } 652 653 /* 654 * generate division. 655 * caller must set: 656 * ax = allocated AX register 657 * dx = allocated DX register 658 * generates one of: 659 * res = nl / nr 660 * res = nl % nr 661 * according to op. 662 */ 663 void 664 dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) 665 { 666 int check; 667 Node n1, t1, t2, t3, t4, n4, nz; 668 Type *t, *t0; 669 Prog *p1, *p2; 670 671 // Have to be careful about handling 672 // most negative int divided by -1 correctly. 673 // The hardware will trap. 674 // Also the byte divide instruction needs AH, 675 // which we otherwise don't have to deal with. 676 // Easiest way to avoid for int8, int16: use int32. 677 // For int32 and int64, use explicit test. 678 // Could use int64 hw for int32. 679 t = nl->type; 680 t0 = t; 681 check = 0; 682 if(issigned[t->etype]) { 683 check = 1; 684 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) 685 check = 0; 686 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 687 check = 0; 688 } 689 if(t->width < 4) { 690 if(issigned[t->etype]) 691 t = types[TINT32]; 692 else 693 t = types[TUINT32]; 694 check = 0; 695 } 696 697 tempname(&t1, t); 698 tempname(&t2, t); 699 if(t0 != t) { 700 tempname(&t3, t0); 701 tempname(&t4, t0); 702 cgen(nl, &t3); 703 cgen(nr, &t4); 704 // Convert. 705 gmove(&t3, &t1); 706 gmove(&t4, &t2); 707 } else { 708 cgen(nl, &t1); 709 cgen(nr, &t2); 710 } 711 712 if(!samereg(ax, res) && !samereg(dx, res)) 713 regalloc(&n1, t, res); 714 else 715 regalloc(&n1, t, N); 716 gmove(&t2, &n1); 717 gmove(&t1, ax); 718 p2 = P; 719 if(nacl) { 720 // Native Client does not relay the divide-by-zero trap 721 // to the executing program, so we must insert a check 722 // for ourselves. 723 nodconst(&n4, t, 0); 724 gins(optoas(OCMP, t), &n1, &n4); 725 p1 = gbranch(optoas(ONE, t), T, +1); 726 if(panicdiv == N) 727 panicdiv = sysfunc("panicdivide"); 728 ginscall(panicdiv, -1); 729 patch(p1, pc); 730 } 731 if(check) { 732 nodconst(&n4, t, -1); 733 gins(optoas(OCMP, t), &n1, &n4); 734 p1 = gbranch(optoas(ONE, t), T, +1); 735 if(op == ODIV) { 736 // a / (-1) is -a. 737 gins(optoas(OMINUS, t), N, ax); 738 gmove(ax, res); 739 } else { 740 // a % (-1) is 0. 741 nodconst(&n4, t, 0); 742 gmove(&n4, res); 743 } 744 p2 = gbranch(AJMP, T, 0); 745 patch(p1, pc); 746 } 747 if(!issigned[t->etype]) { 748 nodconst(&nz, t, 0); 749 gmove(&nz, dx); 750 } else 751 gins(optoas(OEXTEND, t), N, N); 752 gins(optoas(op, t), &n1, N); 753 regfree(&n1); 754 755 if(op == ODIV) 756 gmove(ax, res); 757 else 758 gmove(dx, res); 759 if(check) 760 patch(p2, pc); 761 } 762 763 static void 764 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 765 { 766 int r; 767 768 r = reg[dr]; 769 nodreg(x, types[TINT32], dr); 770 771 // save current ax and dx if they are live 772 // and not the destination 773 memset(oldx, 0, sizeof *oldx); 774 if(r > 0 && !samereg(x, res)) { 775 tempname(oldx, types[TINT32]); 776 gmove(x, oldx); 777 } 778 779 regalloc(x, t, x); 780 } 781 782 static void 783 restx(Node *x, Node *oldx) 784 { 785 regfree(x); 786 787 if(oldx->op != 0) { 788 x->type = types[TINT32]; 789 gmove(oldx, x); 790 } 791 } 792 793 /* 794 * generate division according to op, one of: 795 * res = nl / nr 796 * res = nl % nr 797 */ 798 void 799 cgen_div(int op, Node *nl, Node *nr, Node *res) 800 { 801 Node ax, dx, oldax, olddx; 802 Type *t; 803 804 if(is64(nl->type)) 805 fatal("cgen_div %T", nl->type); 806 807 if(issigned[nl->type->etype]) 808 t = types[TINT32]; 809 else 810 t = types[TUINT32]; 811 savex(D_AX, &ax, &oldax, res, t); 812 savex(D_DX, &dx, &olddx, res, t); 813 dodiv(op, nl, nr, res, &ax, &dx); 814 restx(&dx, &olddx); 815 restx(&ax, &oldax); 816 } 817 818 /* 819 * generate shift according to op, one of: 820 * res = nl << nr 821 * res = nl >> nr 822 */ 823 void 824 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 825 { 826 Node n1, n2, nt, cx, oldcx, hi, lo; 827 int a, w; 828 Prog *p1, *p2; 829 uvlong sc; 830 831 if(nl->type->width > 4) 832 fatal("cgen_shift %T", nl->type); 833 834 w = nl->type->width * 8; 835 836 a = optoas(op, nl->type); 837 838 if(nr->op == OLITERAL) { 839 tempname(&n2, nl->type); 840 cgen(nl, &n2); 841 regalloc(&n1, nl->type, res); 842 gmove(&n2, &n1); 843 sc = mpgetfix(nr->val.u.xval); 844 if(sc >= nl->type->width*8) { 845 // large shift gets 2 shifts by width-1 846 gins(a, ncon(w-1), &n1); 847 gins(a, ncon(w-1), &n1); 848 } else 849 gins(a, nr, &n1); 850 gmove(&n1, res); 851 regfree(&n1); 852 return; 853 } 854 855 memset(&oldcx, 0, sizeof oldcx); 856 nodreg(&cx, types[TUINT32], D_CX); 857 if(reg[D_CX] > 1 && !samereg(&cx, res)) { 858 tempname(&oldcx, types[TUINT32]); 859 gmove(&cx, &oldcx); 860 } 861 862 if(nr->type->width > 4) { 863 tempname(&nt, nr->type); 864 n1 = nt; 865 } else { 866 nodreg(&n1, types[TUINT32], D_CX); 867 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 868 } 869 870 if(samereg(&cx, res)) 871 regalloc(&n2, nl->type, N); 872 else 873 regalloc(&n2, nl->type, res); 874 if(nl->ullman >= nr->ullman) { 875 cgen(nl, &n2); 876 cgen(nr, &n1); 877 } else { 878 cgen(nr, &n1); 879 cgen(nl, &n2); 880 } 881 882 // test and fix up large shifts 883 if(bounded) { 884 if(nr->type->width > 4) { 885 // delayed reg alloc 886 nodreg(&n1, types[TUINT32], D_CX); 887 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 888 split64(&nt, &lo, &hi); 889 gmove(&lo, &n1); 890 splitclean(); 891 } 892 } else { 893 if(nr->type->width > 4) { 894 // delayed reg alloc 895 nodreg(&n1, types[TUINT32], D_CX); 896 regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX 897 split64(&nt, &lo, &hi); 898 gmove(&lo, &n1); 899 gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); 900 p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1); 901 gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); 902 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 903 splitclean(); 904 patch(p2, pc); 905 } else { 906 gins(optoas(OCMP, nr->type), &n1, ncon(w)); 907 p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); 908 } 909 if(op == ORSH && issigned[nl->type->etype]) { 910 gins(a, ncon(w-1), &n2); 911 } else { 912 gmove(ncon(0), &n2); 913 } 914 patch(p1, pc); 915 } 916 gins(a, &n1, &n2); 917 918 if(oldcx.op != 0) 919 gmove(&oldcx, &cx); 920 921 gmove(&n2, res); 922 923 regfree(&n1); 924 regfree(&n2); 925 } 926 927 /* 928 * generate byte multiply: 929 * res = nl * nr 930 * there is no 2-operand byte multiply instruction so 931 * we do a full-width multiplication and truncate afterwards. 932 */ 933 void 934 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 935 { 936 Node n1, n2, nt, *tmp; 937 Type *t; 938 int a; 939 940 // copy from byte to full registers 941 t = types[TUINT32]; 942 if(issigned[nl->type->etype]) 943 t = types[TINT32]; 944 945 // largest ullman on left. 946 if(nl->ullman < nr->ullman) { 947 tmp = nl; 948 nl = nr; 949 nr = tmp; 950 } 951 952 tempname(&nt, nl->type); 953 cgen(nl, &nt); 954 regalloc(&n1, t, res); 955 cgen(nr, &n1); 956 regalloc(&n2, t, N); 957 gmove(&nt, &n2); 958 a = optoas(op, t); 959 gins(a, &n2, &n1); 960 regfree(&n2); 961 gmove(&n1, res); 962 regfree(&n1); 963 } 964 965 /* 966 * generate high multiply: 967 * res = (nl*nr) >> width 968 */ 969 void 970 cgen_hmul(Node *nl, Node *nr, Node *res) 971 { 972 Type *t; 973 int a; 974 Node n1, n2, ax, dx; 975 976 t = nl->type; 977 a = optoas(OHMUL, t); 978 // gen nl in n1. 979 tempname(&n1, t); 980 cgen(nl, &n1); 981 // gen nr in n2. 982 regalloc(&n2, t, res); 983 cgen(nr, &n2); 984 985 // multiply. 986 nodreg(&ax, t, D_AX); 987 gmove(&n2, &ax); 988 gins(a, &n1, N); 989 regfree(&n2); 990 991 if(t->width == 1) { 992 // byte multiply behaves differently. 993 nodreg(&ax, t, D_AH); 994 nodreg(&dx, t, D_DL); 995 gmove(&ax, &dx); 996 } 997 nodreg(&dx, t, D_DX); 998 gmove(&dx, res); 999 } 1000 1001 static void cgen_float387(Node *n, Node *res); 1002 static void cgen_floatsse(Node *n, Node *res); 1003 1004 /* 1005 * generate floating-point operation. 1006 */ 1007 void 1008 cgen_float(Node *n, Node *res) 1009 { 1010 Node *nl; 1011 Node n1, n2; 1012 Prog *p1, *p2, *p3; 1013 1014 nl = n->left; 1015 switch(n->op) { 1016 case OEQ: 1017 case ONE: 1018 case OLT: 1019 case OLE: 1020 case OGE: 1021 p1 = gbranch(AJMP, T, 0); 1022 p2 = pc; 1023 gmove(nodbool(1), res); 1024 p3 = gbranch(AJMP, T, 0); 1025 patch(p1, pc); 1026 bgen(n, 1, 0, p2); 1027 gmove(nodbool(0), res); 1028 patch(p3, pc); 1029 return; 1030 1031 case OPLUS: 1032 cgen(nl, res); 1033 return; 1034 1035 case OCONV: 1036 if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { 1037 cgen(nl, res); 1038 return; 1039 } 1040 1041 tempname(&n2, n->type); 1042 mgen(nl, &n1, res); 1043 gmove(&n1, &n2); 1044 gmove(&n2, res); 1045 mfree(&n1); 1046 return; 1047 } 1048 1049 if(use_sse) 1050 cgen_floatsse(n, res); 1051 else 1052 cgen_float387(n, res); 1053 } 1054 1055 // floating-point. 387 (not SSE2) 1056 static void 1057 cgen_float387(Node *n, Node *res) 1058 { 1059 Node f0, f1; 1060 Node *nl, *nr; 1061 1062 nl = n->left; 1063 nr = n->right; 1064 nodreg(&f0, nl->type, D_F0); 1065 nodreg(&f1, n->type, D_F0+1); 1066 if(nr != N) 1067 goto flt2; 1068 1069 // unary 1070 cgen(nl, &f0); 1071 if(n->op != OCONV && n->op != OPLUS) 1072 gins(foptoas(n->op, n->type, 0), N, N); 1073 gmove(&f0, res); 1074 return; 1075 1076 flt2: // binary 1077 if(nl->ullman >= nr->ullman) { 1078 cgen(nl, &f0); 1079 if(nr->addable) 1080 gins(foptoas(n->op, n->type, 0), nr, &f0); 1081 else { 1082 cgen(nr, &f0); 1083 gins(foptoas(n->op, n->type, Fpop), &f0, &f1); 1084 } 1085 } else { 1086 cgen(nr, &f0); 1087 if(nl->addable) 1088 gins(foptoas(n->op, n->type, Frev), nl, &f0); 1089 else { 1090 cgen(nl, &f0); 1091 gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); 1092 } 1093 } 1094 gmove(&f0, res); 1095 return; 1096 1097 } 1098 1099 static void 1100 cgen_floatsse(Node *n, Node *res) 1101 { 1102 Node *nl, *nr, *r; 1103 Node n1, n2, nt; 1104 int a; 1105 1106 nl = n->left; 1107 nr = n->right; 1108 switch(n->op) { 1109 default: 1110 dump("cgen_floatsse", n); 1111 fatal("cgen_floatsse %O", n->op); 1112 return; 1113 1114 case OMINUS: 1115 case OCOM: 1116 nr = nodintconst(-1); 1117 convlit(&nr, n->type); 1118 a = foptoas(OMUL, nl->type, 0); 1119 goto sbop; 1120 1121 // symmetric binary 1122 case OADD: 1123 case OMUL: 1124 a = foptoas(n->op, nl->type, 0); 1125 goto sbop; 1126 1127 // asymmetric binary 1128 case OSUB: 1129 case OMOD: 1130 case ODIV: 1131 a = foptoas(n->op, nl->type, 0); 1132 goto abop; 1133 } 1134 1135 sbop: // symmetric binary 1136 if(nl->ullman < nr->ullman || nl->op == OLITERAL) { 1137 r = nl; 1138 nl = nr; 1139 nr = r; 1140 } 1141 1142 abop: // asymmetric binary 1143 if(nl->ullman >= nr->ullman) { 1144 tempname(&nt, nl->type); 1145 cgen(nl, &nt); 1146 mgen(nr, &n2, N); 1147 regalloc(&n1, nl->type, res); 1148 gmove(&nt, &n1); 1149 gins(a, &n2, &n1); 1150 gmove(&n1, res); 1151 regfree(&n1); 1152 mfree(&n2); 1153 } else { 1154 regalloc(&n2, nr->type, res); 1155 cgen(nr, &n2); 1156 regalloc(&n1, nl->type, N); 1157 cgen(nl, &n1); 1158 gins(a, &n2, &n1); 1159 regfree(&n2); 1160 gmove(&n1, res); 1161 regfree(&n1); 1162 } 1163 return; 1164 } 1165 1166 void 1167 bgen_float(Node *n, int true, int likely, Prog *to) 1168 { 1169 int et, a; 1170 Node *nl, *nr, *r; 1171 Node n1, n2, n3, tmp, t1, t2, ax; 1172 Prog *p1, *p2; 1173 1174 nl = n->left; 1175 nr = n->right; 1176 a = n->op; 1177 if(!true) { 1178 // brcom is not valid on floats when NaN is involved. 1179 p1 = gbranch(AJMP, T, 0); 1180 p2 = gbranch(AJMP, T, 0); 1181 patch(p1, pc); 1182 // No need to avoid re-genning ninit. 1183 bgen_float(n, 1, -likely, p2); 1184 patch(gbranch(AJMP, T, 0), to); 1185 patch(p2, pc); 1186 return; 1187 } 1188 1189 if(use_sse) 1190 goto sse; 1191 else 1192 goto x87; 1193 1194 x87: 1195 a = brrev(a); // because the args are stacked 1196 if(a == OGE || a == OGT) { 1197 // only < and <= work right with NaN; reverse if needed 1198 r = nr; 1199 nr = nl; 1200 nl = r; 1201 a = brrev(a); 1202 } 1203 1204 nodreg(&tmp, nr->type, D_F0); 1205 nodreg(&n2, nr->type, D_F0 + 1); 1206 nodreg(&ax, types[TUINT16], D_AX); 1207 et = simsimtype(nr->type); 1208 if(et == TFLOAT64) { 1209 if(nl->ullman > nr->ullman) { 1210 cgen(nl, &tmp); 1211 cgen(nr, &tmp); 1212 gins(AFXCHD, &tmp, &n2); 1213 } else { 1214 cgen(nr, &tmp); 1215 cgen(nl, &tmp); 1216 } 1217 gins(AFUCOMIP, &tmp, &n2); 1218 gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF 1219 } else { 1220 // TODO(rsc): The moves back and forth to memory 1221 // here are for truncating the value to 32 bits. 1222 // This handles 32-bit comparison but presumably 1223 // all the other ops have the same problem. 1224 // We need to figure out what the right general 1225 // solution is, besides telling people to use float64. 1226 tempname(&t1, types[TFLOAT32]); 1227 tempname(&t2, types[TFLOAT32]); 1228 cgen(nr, &t1); 1229 cgen(nl, &t2); 1230 gmove(&t2, &tmp); 1231 gins(AFCOMFP, &t1, &tmp); 1232 gins(AFSTSW, N, &ax); 1233 gins(ASAHF, N, N); 1234 } 1235 1236 goto ret; 1237 1238 sse: 1239 if(!nl->addable) { 1240 tempname(&n1, nl->type); 1241 cgen(nl, &n1); 1242 nl = &n1; 1243 } 1244 if(!nr->addable) { 1245 tempname(&tmp, nr->type); 1246 cgen(nr, &tmp); 1247 nr = &tmp; 1248 } 1249 regalloc(&n2, nr->type, N); 1250 gmove(nr, &n2); 1251 nr = &n2; 1252 1253 if(nl->op != OREGISTER) { 1254 regalloc(&n3, nl->type, N); 1255 gmove(nl, &n3); 1256 nl = &n3; 1257 } 1258 1259 if(a == OGE || a == OGT) { 1260 // only < and <= work right with NaN; reverse if needed 1261 r = nr; 1262 nr = nl; 1263 nl = r; 1264 a = brrev(a); 1265 } 1266 1267 gins(foptoas(OCMP, nr->type, 0), nl, nr); 1268 if(nl->op == OREGISTER) 1269 regfree(nl); 1270 regfree(nr); 1271 1272 ret: 1273 if(a == OEQ) { 1274 // neither NE nor P 1275 p1 = gbranch(AJNE, T, -likely); 1276 p2 = gbranch(AJPS, T, -likely); 1277 patch(gbranch(AJMP, T, 0), to); 1278 patch(p1, pc); 1279 patch(p2, pc); 1280 } else if(a == ONE) { 1281 // either NE or P 1282 patch(gbranch(AJNE, T, likely), to); 1283 patch(gbranch(AJPS, T, likely), to); 1284 } else 1285 patch(gbranch(optoas(a, nr->type), T, likely), to); 1286 1287 } 1288 1289 // Called after regopt and peep have run. 1290 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1291 void 1292 expandchecks(Prog *firstp) 1293 { 1294 Prog *p, *p1, *p2; 1295 1296 for(p = firstp; p != P; p = p->link) { 1297 if(p->as != ACHECKNIL) 1298 continue; 1299 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1300 warnl(p->lineno, "generated nil check"); 1301 // check is 1302 // CMP arg, $0 1303 // JNE 2(PC) (likely) 1304 // MOV AX, 0 1305 p1 = mal(sizeof *p1); 1306 p2 = mal(sizeof *p2); 1307 clearp(p1); 1308 clearp(p2); 1309 p1->link = p2; 1310 p2->link = p->link; 1311 p->link = p1; 1312 p1->lineno = p->lineno; 1313 p2->lineno = p->lineno; 1314 p1->pc = 9999; 1315 p2->pc = 9999; 1316 p->as = ACMPL; 1317 p->to.type = D_CONST; 1318 p->to.offset = 0; 1319 p1->as = AJNE; 1320 p1->from.type = D_CONST; 1321 p1->from.offset = 1; // likely 1322 p1->to.type = D_BRANCH; 1323 p1->to.u.branch = p2->link; 1324 // crash by write to memory address 0. 1325 // if possible, since we know arg is 0, use 0(arg), 1326 // which will be shorter to encode than plain 0. 1327 p2->as = AMOVL; 1328 p2->from.type = D_AX; 1329 if(regtyp(&p->from)) 1330 p2->to.type = p->from.type + D_INDIR; 1331 else 1332 p2->to.type = D_INDIR+D_NONE; 1333 p2->to.offset = 0; 1334 } 1335 }