github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/cmd/6g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog* appendp(Prog*, int, int, vlong, int, vlong); 13 14 void 15 defframe(Prog *ptxt, Bvec *bv) 16 { 17 int i, j; 18 uint32 frame; 19 Prog *p; 20 21 // fill in argument size 22 ptxt->to.offset = rnd(curfn->type->argwid, widthptr); 23 24 // fill in final stack size 25 ptxt->to.offset <<= 32; 26 frame = rnd(stksize+maxarg, widthptr); 27 ptxt->to.offset |= frame; 28 29 // insert code to clear pointered part of the frame, 30 // so that garbage collector only sees initialized values 31 // when it looks for pointers. 32 p = ptxt; 33 if(stkzerosize >= 8*widthptr) { 34 p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0); 35 p = appendp(p, AMOVQ, D_CONST, stkzerosize/widthptr, D_CX, 0); 36 p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0); 37 p = appendp(p, AREP, D_NONE, 0, D_NONE, 0); 38 appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0); 39 } else { 40 j = (stkptrsize - stkzerosize)/widthptr * 2; 41 for(i=0; i<stkzerosize; i+=widthptr) { 42 if(bvget(bv, j) || bvget(bv, j+1)) 43 p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i); 44 j += 2; 45 } 46 } 47 } 48 49 static Prog* 50 appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) 51 { 52 Prog *q; 53 54 q = mal(sizeof(*q)); 55 clearp(q); 56 q->as = as; 57 q->lineno = p->lineno; 58 q->from.type = ftype; 59 q->from.offset = foffset; 60 q->to.type = ttype; 61 q->to.offset = toffset; 62 q->link = p->link; 63 p->link = q; 64 return q; 65 } 66 67 // Sweep the prog list to mark any used nodes. 68 void 69 markautoused(Prog* p) 70 { 71 for (; p; p = p->link) { 72 if (p->as == ATYPE) 73 continue; 74 75 if (p->from.type == D_AUTO && p->from.node) 76 p->from.node->used = 1; 77 78 if (p->to.type == D_AUTO && p->to.node) 79 p->to.node->used = 1; 80 } 81 } 82 83 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 84 void 85 fixautoused(Prog *p) 86 { 87 Prog **lp; 88 89 for (lp=&p; (p=*lp) != P; ) { 90 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 91 *lp = p->link; 92 continue; 93 } 94 if (p->from.type == D_AUTO && p->from.node) 95 p->from.offset += p->from.node->stkdelta; 96 97 if (p->to.type == D_AUTO && p->to.node) 98 p->to.offset += p->to.node->stkdelta; 99 100 lp = &p->link; 101 } 102 } 103 104 105 /* 106 * generate: 107 * call f 108 * proc=-1 normal call but no return 109 * proc=0 normal call 110 * proc=1 goroutine run in new proc 111 * proc=2 defer call save away stack 112 * proc=3 normal call to C pointer (not Go func value) 113 */ 114 void 115 ginscall(Node *f, int proc) 116 { 117 int32 arg; 118 Prog *p; 119 Node reg, con; 120 Node r1; 121 122 if(f->type != T) 123 setmaxarg(f->type); 124 125 arg = -1; 126 // Most functions have a fixed-size argument block, so traceback uses that during unwind. 127 // Not all, though: there are some variadic functions in package runtime, 128 // and for those we emit call-specific metadata recorded by caller. 129 // Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub), 130 // so we do this for all indirect calls as well. 131 if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) { 132 arg = f->type->argwid; 133 if(proc == 1 || proc == 2) 134 arg += 2*widthptr; 135 } 136 137 if(arg != -1) 138 gargsize(arg); 139 140 switch(proc) { 141 default: 142 fatal("ginscall: bad proc %d", proc); 143 break; 144 145 case 0: // normal call 146 case -1: // normal call but no return 147 if(f->op == ONAME && f->class == PFUNC) { 148 if(f == deferreturn) { 149 // Deferred calls will appear to be returning to 150 // the CALL deferreturn(SB) that we are about to emit. 151 // However, the stack trace code will show the line 152 // of the instruction byte before the return PC. 153 // To avoid that being an unrelated instruction, 154 // insert an x86 NOP that we will have the right line number. 155 // x86 NOP 0x90 is really XCHG AX, AX; use that description 156 // because the NOP pseudo-instruction would be removed by 157 // the linker. 158 nodreg(®, types[TINT], D_AX); 159 gins(AXCHGL, ®, ®); 160 } 161 p = gins(ACALL, N, f); 162 afunclit(&p->to, f); 163 if(proc == -1 || noreturn(p)) 164 gins(AUNDEF, N, N); 165 break; 166 } 167 nodreg(®, types[tptr], D_DX); 168 nodreg(&r1, types[tptr], D_BX); 169 gmove(f, ®); 170 reg.op = OINDREG; 171 gmove(®, &r1); 172 reg.op = OREGISTER; 173 gins(ACALL, ®, &r1); 174 break; 175 176 case 3: // normal call of c function pointer 177 gins(ACALL, N, f); 178 break; 179 180 case 1: // call in new proc (go) 181 case 2: // deferred call (defer) 182 nodreg(®, types[TINT64], D_CX); 183 if(flag_largemodel) { 184 regalloc(&r1, f->type, f); 185 gmove(f, &r1); 186 gins(APUSHQ, &r1, N); 187 regfree(&r1); 188 } else { 189 gins(APUSHQ, f, N); 190 } 191 nodconst(&con, types[TINT32], argsize(f->type)); 192 gins(APUSHQ, &con, N); 193 if(proc == 1) 194 ginscall(newproc, 0); 195 else { 196 if(!hasdefer) 197 fatal("hasdefer=0 but has defer"); 198 ginscall(deferproc, 0); 199 } 200 gins(APOPQ, N, ®); 201 gins(APOPQ, N, ®); 202 if(proc == 2) { 203 nodreg(®, types[TINT64], D_AX); 204 gins(ATESTQ, ®, ®); 205 patch(gbranch(AJNE, T, -1), retpc); 206 } 207 break; 208 } 209 210 if(arg != -1) 211 gargsize(-1); 212 } 213 214 /* 215 * n is call to interface method. 216 * generate res = n. 217 */ 218 void 219 cgen_callinter(Node *n, Node *res, int proc) 220 { 221 Node *i, *f; 222 Node tmpi, nodi, nodo, nodr, nodsp; 223 224 i = n->left; 225 if(i->op != ODOTINTER) 226 fatal("cgen_callinter: not ODOTINTER %O", i->op); 227 228 f = i->right; // field 229 if(f->op != ONAME) 230 fatal("cgen_callinter: not ONAME %O", f->op); 231 232 i = i->left; // interface 233 234 if(!i->addable) { 235 tempname(&tmpi, i->type); 236 cgen(i, &tmpi); 237 i = &tmpi; 238 } 239 240 genlist(n->list); // assign the args 241 242 // i is now addable, prepare an indirected 243 // register to hold its address. 244 igen(i, &nodi, res); // REG = &inter 245 246 nodindreg(&nodsp, types[tptr], D_SP); 247 nodi.type = types[tptr]; 248 nodi.xoffset += widthptr; 249 cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data 250 251 regalloc(&nodo, types[tptr], res); 252 nodi.type = types[tptr]; 253 nodi.xoffset -= widthptr; 254 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 255 regfree(&nodi); 256 257 regalloc(&nodr, types[tptr], &nodo); 258 if(n->left->xoffset == BADWIDTH) 259 fatal("cgen_callinter: badwidth"); 260 cgen_checknil(&nodo); // in case offset is huge 261 nodo.op = OINDREG; 262 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 263 if(proc == 0) { 264 // plain call: use direct c function pointer - more efficient 265 cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] 266 proc = 3; 267 } else { 268 // go/defer. generate go func value. 269 gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] 270 } 271 272 nodr.type = n->left->type; 273 ginscall(&nodr, proc); 274 275 regfree(&nodr); 276 regfree(&nodo); 277 } 278 279 /* 280 * generate function call; 281 * proc=0 normal call 282 * proc=1 goroutine run in new proc 283 * proc=2 defer call save away stack 284 */ 285 void 286 cgen_call(Node *n, int proc) 287 { 288 Type *t; 289 Node nod, afun; 290 291 if(n == N) 292 return; 293 294 if(n->left->ullman >= UINF) { 295 // if name involves a fn call 296 // precompute the address of the fn 297 tempname(&afun, types[tptr]); 298 cgen(n->left, &afun); 299 } 300 301 genlist(n->list); // assign the args 302 t = n->left->type; 303 304 // call tempname pointer 305 if(n->left->ullman >= UINF) { 306 regalloc(&nod, types[tptr], N); 307 cgen_as(&nod, &afun); 308 nod.type = t; 309 ginscall(&nod, proc); 310 regfree(&nod); 311 return; 312 } 313 314 // call pointer 315 if(n->left->op != ONAME || n->left->class != PFUNC) { 316 regalloc(&nod, types[tptr], N); 317 cgen_as(&nod, n->left); 318 nod.type = t; 319 ginscall(&nod, proc); 320 regfree(&nod); 321 return; 322 } 323 324 // call direct 325 n->left->method = 1; 326 ginscall(n->left, proc); 327 } 328 329 /* 330 * call to n has already been generated. 331 * generate: 332 * res = return value from call. 333 */ 334 void 335 cgen_callret(Node *n, Node *res) 336 { 337 Node nod; 338 Type *fp, *t; 339 Iter flist; 340 341 t = n->left->type; 342 if(t->etype == TPTR32 || t->etype == TPTR64) 343 t = t->type; 344 345 fp = structfirst(&flist, getoutarg(t)); 346 if(fp == T) 347 fatal("cgen_callret: nil"); 348 349 memset(&nod, 0, sizeof(nod)); 350 nod.op = OINDREG; 351 nod.val.u.reg = D_SP; 352 nod.addable = 1; 353 354 nod.xoffset = fp->width; 355 nod.type = fp->type; 356 cgen_as(res, &nod); 357 } 358 359 /* 360 * call to n has already been generated. 361 * generate: 362 * res = &return value from call. 363 */ 364 void 365 cgen_aret(Node *n, Node *res) 366 { 367 Node nod1, nod2; 368 Type *fp, *t; 369 Iter flist; 370 371 t = n->left->type; 372 if(isptr[t->etype]) 373 t = t->type; 374 375 fp = structfirst(&flist, getoutarg(t)); 376 if(fp == T) 377 fatal("cgen_aret: nil"); 378 379 memset(&nod1, 0, sizeof(nod1)); 380 nod1.op = OINDREG; 381 nod1.val.u.reg = D_SP; 382 nod1.addable = 1; 383 384 nod1.xoffset = fp->width; 385 nod1.type = fp->type; 386 387 if(res->op != OREGISTER) { 388 regalloc(&nod2, types[tptr], res); 389 gins(ALEAQ, &nod1, &nod2); 390 gins(AMOVQ, &nod2, res); 391 regfree(&nod2); 392 } else 393 gins(ALEAQ, &nod1, res); 394 } 395 396 /* 397 * generate return. 398 * n->left is assignments to return values. 399 */ 400 void 401 cgen_ret(Node *n) 402 { 403 Prog *p; 404 405 genlist(n->list); // copy out args 406 if(hasdefer || curfn->exit) { 407 gjmp(retpc); 408 return; 409 } 410 p = gins(ARET, N, N); 411 if(n->op == ORETJMP) { 412 p->to.type = D_EXTERN; 413 p->to.sym = n->left->sym; 414 } 415 } 416 417 /* 418 * generate += *= etc. 419 */ 420 void 421 cgen_asop(Node *n) 422 { 423 Node n1, n2, n3, n4; 424 Node *nl, *nr; 425 Prog *p1; 426 Addr addr; 427 int a; 428 429 nl = n->left; 430 nr = n->right; 431 432 if(nr->ullman >= UINF && nl->ullman >= UINF) { 433 tempname(&n1, nr->type); 434 cgen(nr, &n1); 435 n2 = *n; 436 n2.right = &n1; 437 cgen_asop(&n2); 438 goto ret; 439 } 440 441 if(!isint[nl->type->etype]) 442 goto hard; 443 if(!isint[nr->type->etype]) 444 goto hard; 445 446 switch(n->etype) { 447 case OADD: 448 if(smallintconst(nr)) 449 if(mpgetfix(nr->val.u.xval) == 1) { 450 a = optoas(OINC, nl->type); 451 if(nl->addable) { 452 gins(a, N, nl); 453 goto ret; 454 } 455 if(sudoaddable(a, nl, &addr)) { 456 p1 = gins(a, N, N); 457 p1->to = addr; 458 sudoclean(); 459 goto ret; 460 } 461 } 462 break; 463 464 case OSUB: 465 if(smallintconst(nr)) 466 if(mpgetfix(nr->val.u.xval) == 1) { 467 a = optoas(ODEC, nl->type); 468 if(nl->addable) { 469 gins(a, N, nl); 470 goto ret; 471 } 472 if(sudoaddable(a, nl, &addr)) { 473 p1 = gins(a, N, N); 474 p1->to = addr; 475 sudoclean(); 476 goto ret; 477 } 478 } 479 break; 480 } 481 482 switch(n->etype) { 483 case OADD: 484 case OSUB: 485 case OXOR: 486 case OAND: 487 case OOR: 488 a = optoas(n->etype, nl->type); 489 if(nl->addable) { 490 if(smallintconst(nr)) { 491 gins(a, nr, nl); 492 goto ret; 493 } 494 regalloc(&n2, nr->type, N); 495 cgen(nr, &n2); 496 gins(a, &n2, nl); 497 regfree(&n2); 498 goto ret; 499 } 500 if(nr->ullman < UINF) 501 if(sudoaddable(a, nl, &addr)) { 502 if(smallintconst(nr)) { 503 p1 = gins(a, nr, N); 504 p1->to = addr; 505 sudoclean(); 506 goto ret; 507 } 508 regalloc(&n2, nr->type, N); 509 cgen(nr, &n2); 510 p1 = gins(a, &n2, N); 511 p1->to = addr; 512 regfree(&n2); 513 sudoclean(); 514 goto ret; 515 } 516 } 517 518 hard: 519 n2.op = 0; 520 n1.op = 0; 521 if(nr->op == OLITERAL) { 522 // don't allocate a register for literals. 523 } else if(nr->ullman >= nl->ullman || nl->addable) { 524 regalloc(&n2, nr->type, N); 525 cgen(nr, &n2); 526 nr = &n2; 527 } else { 528 tempname(&n2, nr->type); 529 cgen(nr, &n2); 530 nr = &n2; 531 } 532 if(!nl->addable) { 533 igen(nl, &n1, N); 534 nl = &n1; 535 } 536 537 n3 = *n; 538 n3.left = nl; 539 n3.right = nr; 540 n3.op = n->etype; 541 542 regalloc(&n4, nl->type, N); 543 cgen(&n3, &n4); 544 gmove(&n4, nl); 545 546 if(n1.op) 547 regfree(&n1); 548 if(n2.op == OREGISTER) 549 regfree(&n2); 550 regfree(&n4); 551 552 ret: 553 ; 554 } 555 556 int 557 samereg(Node *a, Node *b) 558 { 559 if(a == N || b == N) 560 return 0; 561 if(a->op != OREGISTER) 562 return 0; 563 if(b->op != OREGISTER) 564 return 0; 565 if(a->val.u.reg != b->val.u.reg) 566 return 0; 567 return 1; 568 } 569 570 /* 571 * generate division. 572 * generates one of: 573 * res = nl / nr 574 * res = nl % nr 575 * according to op. 576 */ 577 void 578 dodiv(int op, Node *nl, Node *nr, Node *res) 579 { 580 int a, check; 581 Node n3, n4; 582 Type *t, *t0; 583 Node ax, dx, ax1, n31, oldax, olddx; 584 Prog *p1, *p2; 585 586 // Have to be careful about handling 587 // most negative int divided by -1 correctly. 588 // The hardware will trap. 589 // Also the byte divide instruction needs AH, 590 // which we otherwise don't have to deal with. 591 // Easiest way to avoid for int8, int16: use int32. 592 // For int32 and int64, use explicit test. 593 // Could use int64 hw for int32. 594 t = nl->type; 595 t0 = t; 596 check = 0; 597 if(issigned[t->etype]) { 598 check = 1; 599 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1))) 600 check = 0; 601 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 602 check = 0; 603 } 604 if(t->width < 4) { 605 if(issigned[t->etype]) 606 t = types[TINT32]; 607 else 608 t = types[TUINT32]; 609 check = 0; 610 } 611 a = optoas(op, t); 612 613 regalloc(&n3, t0, N); 614 if(nl->ullman >= nr->ullman) { 615 savex(D_AX, &ax, &oldax, res, t0); 616 cgen(nl, &ax); 617 regalloc(&ax, t0, &ax); // mark ax live during cgen 618 cgen(nr, &n3); 619 regfree(&ax); 620 } else { 621 cgen(nr, &n3); 622 savex(D_AX, &ax, &oldax, res, t0); 623 cgen(nl, &ax); 624 } 625 if(t != t0) { 626 // Convert 627 ax1 = ax; 628 n31 = n3; 629 ax.type = t; 630 n3.type = t; 631 gmove(&ax1, &ax); 632 gmove(&n31, &n3); 633 } 634 635 p2 = P; 636 if(check) { 637 nodconst(&n4, t, -1); 638 gins(optoas(OCMP, t), &n3, &n4); 639 p1 = gbranch(optoas(ONE, t), T, +1); 640 if(op == ODIV) { 641 // a / (-1) is -a. 642 gins(optoas(OMINUS, t), N, &ax); 643 gmove(&ax, res); 644 } else { 645 // a % (-1) is 0. 646 nodconst(&n4, t, 0); 647 gmove(&n4, res); 648 } 649 p2 = gbranch(AJMP, T, 0); 650 patch(p1, pc); 651 } 652 savex(D_DX, &dx, &olddx, res, t); 653 if(!issigned[t->etype]) { 654 nodconst(&n4, t, 0); 655 gmove(&n4, &dx); 656 } else 657 gins(optoas(OEXTEND, t), N, N); 658 gins(a, &n3, N); 659 regfree(&n3); 660 if(op == ODIV) 661 gmove(&ax, res); 662 else 663 gmove(&dx, res); 664 restx(&dx, &olddx); 665 if(check) 666 patch(p2, pc); 667 restx(&ax, &oldax); 668 } 669 670 /* 671 * register dr is one of the special ones (AX, CX, DI, SI, etc.). 672 * we need to use it. if it is already allocated as a temporary 673 * (r > 1; can only happen if a routine like sgen passed a 674 * special as cgen's res and then cgen used regalloc to reuse 675 * it as its own temporary), then move it for now to another 676 * register. caller must call restx to move it back. 677 * the move is not necessary if dr == res, because res is 678 * known to be dead. 679 */ 680 void 681 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 682 { 683 int r; 684 685 r = reg[dr]; 686 687 // save current ax and dx if they are live 688 // and not the destination 689 memset(oldx, 0, sizeof *oldx); 690 nodreg(x, t, dr); 691 if(r > 1 && !samereg(x, res)) { 692 regalloc(oldx, types[TINT64], N); 693 x->type = types[TINT64]; 694 gmove(x, oldx); 695 x->type = t; 696 oldx->ostk = r; // squirrel away old r value 697 reg[dr] = 1; 698 } 699 } 700 701 void 702 restx(Node *x, Node *oldx) 703 { 704 if(oldx->op != 0) { 705 x->type = types[TINT64]; 706 reg[x->val.u.reg] = oldx->ostk; 707 gmove(oldx, x); 708 regfree(oldx); 709 } 710 } 711 712 /* 713 * generate division according to op, one of: 714 * res = nl / nr 715 * res = nl % nr 716 */ 717 void 718 cgen_div(int op, Node *nl, Node *nr, Node *res) 719 { 720 Node n1, n2, n3; 721 int w, a; 722 Magic m; 723 724 if(nr->op != OLITERAL) 725 goto longdiv; 726 w = nl->type->width*8; 727 728 // Front end handled 32-bit division. We only need to handle 64-bit. 729 // try to do division by multiply by (2^w)/d 730 // see hacker's delight chapter 10 731 switch(simtype[nl->type->etype]) { 732 default: 733 goto longdiv; 734 735 case TUINT64: 736 m.w = w; 737 m.ud = mpgetfix(nr->val.u.xval); 738 umagic(&m); 739 if(m.bad) 740 break; 741 if(op == OMOD) 742 goto longmod; 743 744 cgenr(nl, &n1, N); 745 nodconst(&n2, nl->type, m.um); 746 regalloc(&n3, nl->type, res); 747 cgen_hmul(&n1, &n2, &n3); 748 749 if(m.ua) { 750 // need to add numerator accounting for overflow 751 gins(optoas(OADD, nl->type), &n1, &n3); 752 nodconst(&n2, nl->type, 1); 753 gins(optoas(ORROTC, nl->type), &n2, &n3); 754 nodconst(&n2, nl->type, m.s-1); 755 gins(optoas(ORSH, nl->type), &n2, &n3); 756 } else { 757 nodconst(&n2, nl->type, m.s); 758 gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx 759 } 760 761 gmove(&n3, res); 762 regfree(&n1); 763 regfree(&n3); 764 return; 765 766 case TINT64: 767 m.w = w; 768 m.sd = mpgetfix(nr->val.u.xval); 769 smagic(&m); 770 if(m.bad) 771 break; 772 if(op == OMOD) 773 goto longmod; 774 775 cgenr(nl, &n1, res); 776 nodconst(&n2, nl->type, m.sm); 777 regalloc(&n3, nl->type, N); 778 cgen_hmul(&n1, &n2, &n3); 779 780 if(m.sm < 0) { 781 // need to add numerator 782 gins(optoas(OADD, nl->type), &n1, &n3); 783 } 784 785 nodconst(&n2, nl->type, m.s); 786 gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 787 788 nodconst(&n2, nl->type, w-1); 789 gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg 790 gins(optoas(OSUB, nl->type), &n1, &n3); // added 791 792 if(m.sd < 0) { 793 // this could probably be removed 794 // by factoring it into the multiplier 795 gins(optoas(OMINUS, nl->type), N, &n3); 796 } 797 798 gmove(&n3, res); 799 regfree(&n1); 800 regfree(&n3); 801 return; 802 } 803 goto longdiv; 804 805 longdiv: 806 // division and mod using (slow) hardware instruction 807 dodiv(op, nl, nr, res); 808 return; 809 810 longmod: 811 // mod using formula A%B = A-(A/B*B) but 812 // we know that there is a fast algorithm for A/B 813 regalloc(&n1, nl->type, res); 814 cgen(nl, &n1); 815 regalloc(&n2, nl->type, N); 816 cgen_div(ODIV, &n1, nr, &n2); 817 a = optoas(OMUL, nl->type); 818 if(w == 8) { 819 // use 2-operand 16-bit multiply 820 // because there is no 2-operand 8-bit multiply 821 a = AIMULW; 822 } 823 if(!smallintconst(nr)) { 824 regalloc(&n3, nl->type, N); 825 cgen(nr, &n3); 826 gins(a, &n3, &n2); 827 regfree(&n3); 828 } else 829 gins(a, nr, &n2); 830 gins(optoas(OSUB, nl->type), &n2, &n1); 831 gmove(&n1, res); 832 regfree(&n1); 833 regfree(&n2); 834 } 835 836 /* 837 * generate high multiply: 838 * res = (nl*nr) >> width 839 */ 840 void 841 cgen_hmul(Node *nl, Node *nr, Node *res) 842 { 843 Type *t; 844 int a; 845 Node n1, n2, ax, dx, *tmp; 846 847 t = nl->type; 848 a = optoas(OHMUL, t); 849 if(nl->ullman < nr->ullman) { 850 tmp = nl; 851 nl = nr; 852 nr = tmp; 853 } 854 cgenr(nl, &n1, res); 855 cgenr(nr, &n2, N); 856 nodreg(&ax, t, D_AX); 857 gmove(&n1, &ax); 858 gins(a, &n2, N); 859 regfree(&n2); 860 regfree(&n1); 861 862 if(t->width == 1) { 863 // byte multiply behaves differently. 864 nodreg(&ax, t, D_AH); 865 nodreg(&dx, t, D_DL); 866 gmove(&ax, &dx); 867 } 868 nodreg(&dx, t, D_DX); 869 gmove(&dx, res); 870 } 871 872 /* 873 * generate shift according to op, one of: 874 * res = nl << nr 875 * res = nl >> nr 876 */ 877 void 878 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 879 { 880 Node n1, n2, n3, n4, n5, cx, oldcx; 881 int a, rcx; 882 Prog *p1; 883 uvlong sc; 884 Type *tcount; 885 886 a = optoas(op, nl->type); 887 888 if(nr->op == OLITERAL) { 889 regalloc(&n1, nl->type, res); 890 cgen(nl, &n1); 891 sc = mpgetfix(nr->val.u.xval); 892 if(sc >= nl->type->width*8) { 893 // large shift gets 2 shifts by width-1 894 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 895 gins(a, &n3, &n1); 896 gins(a, &n3, &n1); 897 } else 898 gins(a, nr, &n1); 899 gmove(&n1, res); 900 regfree(&n1); 901 goto ret; 902 } 903 904 if(nl->ullman >= UINF) { 905 tempname(&n4, nl->type); 906 cgen(nl, &n4); 907 nl = &n4; 908 } 909 if(nr->ullman >= UINF) { 910 tempname(&n5, nr->type); 911 cgen(nr, &n5); 912 nr = &n5; 913 } 914 915 rcx = reg[D_CX]; 916 nodreg(&n1, types[TUINT32], D_CX); 917 918 // Allow either uint32 or uint64 as shift type, 919 // to avoid unnecessary conversion from uint32 to uint64 920 // just to do the comparison. 921 tcount = types[simtype[nr->type->etype]]; 922 if(tcount->etype < TUINT32) 923 tcount = types[TUINT32]; 924 925 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 926 regalloc(&n3, tcount, &n1); // to clear high bits of CX 927 928 nodreg(&cx, types[TUINT64], D_CX); 929 memset(&oldcx, 0, sizeof oldcx); 930 if(rcx > 0 && !samereg(&cx, res)) { 931 regalloc(&oldcx, types[TUINT64], N); 932 gmove(&cx, &oldcx); 933 } 934 cx.type = tcount; 935 936 if(samereg(&cx, res)) 937 regalloc(&n2, nl->type, N); 938 else 939 regalloc(&n2, nl->type, res); 940 if(nl->ullman >= nr->ullman) { 941 cgen(nl, &n2); 942 cgen(nr, &n1); 943 gmove(&n1, &n3); 944 } else { 945 cgen(nr, &n1); 946 gmove(&n1, &n3); 947 cgen(nl, &n2); 948 } 949 regfree(&n3); 950 951 // test and fix up large shifts 952 if(!bounded) { 953 nodconst(&n3, tcount, nl->type->width*8); 954 gins(optoas(OCMP, tcount), &n1, &n3); 955 p1 = gbranch(optoas(OLT, tcount), T, +1); 956 if(op == ORSH && issigned[nl->type->etype]) { 957 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 958 gins(a, &n3, &n2); 959 } else { 960 nodconst(&n3, nl->type, 0); 961 gmove(&n3, &n2); 962 } 963 patch(p1, pc); 964 } 965 966 gins(a, &n1, &n2); 967 968 if(oldcx.op != 0) { 969 cx.type = types[TUINT64]; 970 gmove(&oldcx, &cx); 971 regfree(&oldcx); 972 } 973 974 gmove(&n2, res); 975 976 regfree(&n1); 977 regfree(&n2); 978 979 ret: 980 ; 981 } 982 983 /* 984 * generate byte multiply: 985 * res = nl * nr 986 * there is no 2-operand byte multiply instruction so 987 * we do a full-width multiplication and truncate afterwards. 988 */ 989 void 990 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 991 { 992 Node n1, n2, n1b, n2b, *tmp; 993 Type *t; 994 int a; 995 996 // largest ullman on left. 997 if(nl->ullman < nr->ullman) { 998 tmp = nl; 999 nl = nr; 1000 nr = tmp; 1001 } 1002 1003 // generate operands in "8-bit" registers. 1004 regalloc(&n1b, nl->type, res); 1005 cgen(nl, &n1b); 1006 regalloc(&n2b, nr->type, N); 1007 cgen(nr, &n2b); 1008 1009 // perform full-width multiplication. 1010 t = types[TUINT64]; 1011 if(issigned[nl->type->etype]) 1012 t = types[TINT64]; 1013 nodreg(&n1, t, n1b.val.u.reg); 1014 nodreg(&n2, t, n2b.val.u.reg); 1015 a = optoas(op, t); 1016 gins(a, &n2, &n1); 1017 1018 // truncate. 1019 gmove(&n1, res); 1020 regfree(&n1b); 1021 regfree(&n2b); 1022 } 1023 1024 void 1025 clearfat(Node *nl) 1026 { 1027 int64 w, c, q; 1028 Node n1, oldn1, ax, oldax; 1029 1030 /* clear a fat object */ 1031 if(debug['g']) 1032 dump("\nclearfat", nl); 1033 1034 1035 w = nl->type->width; 1036 // Avoid taking the address for simple enough types. 1037 if(componentgen(N, nl)) 1038 return; 1039 1040 c = w % 8; // bytes 1041 q = w / 8; // quads 1042 1043 savex(D_DI, &n1, &oldn1, N, types[tptr]); 1044 agen(nl, &n1); 1045 1046 savex(D_AX, &ax, &oldax, N, types[tptr]); 1047 gconreg(AMOVQ, 0, D_AX); 1048 1049 if(q >= 4) { 1050 gconreg(AMOVQ, q, D_CX); 1051 gins(AREP, N, N); // repeat 1052 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 1053 } else 1054 while(q > 0) { 1055 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 1056 q--; 1057 } 1058 1059 if(c >= 4) { 1060 gconreg(AMOVQ, c, D_CX); 1061 gins(AREP, N, N); // repeat 1062 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 1063 } else 1064 while(c > 0) { 1065 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 1066 c--; 1067 } 1068 1069 restx(&n1, &oldn1); 1070 restx(&ax, &oldax); 1071 } 1072 1073 // Called after regopt and peep have run. 1074 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1075 void 1076 expandchecks(Prog *firstp) 1077 { 1078 Prog *p, *p1, *p2; 1079 1080 for(p = firstp; p != P; p = p->link) { 1081 if(p->as != ACHECKNIL) 1082 continue; 1083 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1084 warnl(p->lineno, "generated nil check"); 1085 // check is 1086 // CMP arg, $0 1087 // JNE 2(PC) (likely) 1088 // MOV AX, 0 1089 p1 = mal(sizeof *p1); 1090 p2 = mal(sizeof *p2); 1091 clearp(p1); 1092 clearp(p2); 1093 p1->link = p2; 1094 p2->link = p->link; 1095 p->link = p1; 1096 p1->lineno = p->lineno; 1097 p2->lineno = p->lineno; 1098 p1->loc = 9999; 1099 p2->loc = 9999; 1100 p->as = ACMPQ; 1101 p->to.type = D_CONST; 1102 p->to.offset = 0; 1103 p1->as = AJNE; 1104 p1->from.type = D_CONST; 1105 p1->from.offset = 1; // likely 1106 p1->to.type = D_BRANCH; 1107 p1->to.u.branch = p2->link; 1108 // crash by write to memory address 0. 1109 // if possible, since we know arg is 0, use 0(arg), 1110 // which will be shorter to encode than plain 0. 1111 p2->as = AMOVL; 1112 p2->from.type = D_AX; 1113 if(regtyp(&p->from)) 1114 p2->to.type = p->from.type + D_INDIR; 1115 else 1116 p2->to.type = D_INDIR+D_NONE; 1117 p2->to.offset = 0; 1118 } 1119 }