github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/cmd/6g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog *appendpp(Prog*, int, int, vlong, int, vlong); 13 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); 14 15 void 16 defframe(Prog *ptxt) 17 { 18 uint32 frame, ax; 19 Prog *p; 20 vlong hi, lo; 21 NodeList *l; 22 Node *n; 23 24 // fill in argument size 25 ptxt->to.offset = rnd(curfn->type->argwid, widthptr); 26 27 // fill in final stack size 28 ptxt->to.offset <<= 32; 29 frame = rnd(stksize+maxarg, widthreg); 30 ptxt->to.offset |= frame; 31 32 // insert code to zero ambiguously live variables 33 // so that the garbage collector only sees initialized values 34 // when it looks for pointers. 35 p = ptxt; 36 lo = hi = 0; 37 ax = 0; 38 // iterate through declarations - they are sorted in decreasing xoffset order. 39 for(l=curfn->dcl; l != nil; l = l->next) { 40 n = l->n; 41 if(!n->needzero) 42 continue; 43 if(n->class != PAUTO) 44 fatal("needzero class %d", n->class); 45 if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) 46 fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); 47 48 if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) { 49 // merge with range we already have 50 lo = n->xoffset; 51 continue; 52 } 53 // zero old range 54 p = zerorange(p, frame, lo, hi, &ax); 55 56 // set new range 57 hi = n->xoffset + n->type->width; 58 lo = n->xoffset; 59 } 60 // zero final range 61 zerorange(p, frame, lo, hi, &ax); 62 } 63 64 static Prog* 65 zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax) 66 { 67 vlong cnt, i; 68 69 cnt = hi - lo; 70 if(cnt == 0) 71 return p; 72 if(*ax == 0) { 73 p = appendpp(p, AMOVQ, D_CONST, 0, D_AX, 0); 74 *ax = 1; 75 } 76 if(cnt % widthreg != 0) { 77 // should only happen with nacl 78 if(cnt % widthptr != 0) 79 fatal("zerorange count not a multiple of widthptr %d", cnt); 80 p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo); 81 lo += widthptr; 82 cnt -= widthptr; 83 } 84 if(cnt <= 4*widthreg) { 85 for(i = 0; i < cnt; i += widthreg) { 86 p = appendpp(p, AMOVQ, D_AX, 0, D_SP+D_INDIR, frame+lo+i); 87 } 88 } else if(!nacl && (cnt <= 128*widthreg)) { 89 p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0); 90 p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 2*(128-cnt/widthreg)); 91 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 92 } else { 93 p = appendpp(p, AMOVQ, D_CONST, cnt/widthreg, D_CX, 0); 94 p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0); 95 p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0); 96 p = appendpp(p, ASTOSQ, D_NONE, 0, D_NONE, 0); 97 } 98 return p; 99 } 100 101 static Prog* 102 appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) 103 { 104 Prog *q; 105 q = mal(sizeof(*q)); 106 clearp(q); 107 q->as = as; 108 q->lineno = p->lineno; 109 q->from.type = ftype; 110 q->from.offset = foffset; 111 q->to.type = ttype; 112 q->to.offset = toffset; 113 q->link = p->link; 114 p->link = q; 115 return q; 116 } 117 118 // Sweep the prog list to mark any used nodes. 119 void 120 markautoused(Prog* p) 121 { 122 for (; p; p = p->link) { 123 if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL) 124 continue; 125 126 if (p->from.node) 127 p->from.node->used = 1; 128 129 if (p->to.node) 130 p->to.node->used = 1; 131 } 132 } 133 134 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 135 void 136 fixautoused(Prog *p) 137 { 138 Prog **lp; 139 140 for (lp=&p; (p=*lp) != P; ) { 141 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 142 *lp = p->link; 143 continue; 144 } 145 if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) { 146 // Cannot remove VARDEF instruction, because - unlike TYPE handled above - 147 // VARDEFs are interspersed with other code, and a jump might be using the 148 // VARDEF as a target. Replace with a no-op instead. A later pass will remove 149 // the no-ops. 150 p->to.type = D_NONE; 151 p->to.node = N; 152 p->as = ANOP; 153 continue; 154 } 155 if (p->from.type == D_AUTO && p->from.node) 156 p->from.offset += p->from.node->stkdelta; 157 158 if (p->to.type == D_AUTO && p->to.node) 159 p->to.offset += p->to.node->stkdelta; 160 161 lp = &p->link; 162 } 163 } 164 165 166 /* 167 * generate: 168 * call f 169 * proc=-1 normal call but no return 170 * proc=0 normal call 171 * proc=1 goroutine run in new proc 172 * proc=2 defer call save away stack 173 * proc=3 normal call to C pointer (not Go func value) 174 */ 175 void 176 ginscall(Node *f, int proc) 177 { 178 int32 arg; 179 Prog *p; 180 Node reg, con; 181 Node r1; 182 183 if(f->type != T) 184 setmaxarg(f->type); 185 186 arg = -1; 187 // Most functions have a fixed-size argument block, so traceback uses that during unwind. 188 // Not all, though: there are some variadic functions in package runtime, 189 // and for those we emit call-specific metadata recorded by caller. 190 // Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub), 191 // so we do this for all indirect calls as well. 192 if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) { 193 arg = f->type->argwid; 194 if(proc == 1 || proc == 2) 195 arg += 2*widthptr; 196 } 197 198 if(arg != -1) 199 gargsize(arg); 200 201 switch(proc) { 202 default: 203 fatal("ginscall: bad proc %d", proc); 204 break; 205 206 case 0: // normal call 207 case -1: // normal call but no return 208 if(f->op == ONAME && f->class == PFUNC) { 209 if(f == deferreturn) { 210 // Deferred calls will appear to be returning to 211 // the CALL deferreturn(SB) that we are about to emit. 212 // However, the stack trace code will show the line 213 // of the instruction byte before the return PC. 214 // To avoid that being an unrelated instruction, 215 // insert an x86 NOP that we will have the right line number. 216 // x86 NOP 0x90 is really XCHG AX, AX; use that description 217 // because the NOP pseudo-instruction would be removed by 218 // the linker. 219 nodreg(®, types[TINT], D_AX); 220 gins(AXCHGL, ®, ®); 221 } 222 p = gins(ACALL, N, f); 223 afunclit(&p->to, f); 224 if(proc == -1 || noreturn(p)) 225 gins(AUNDEF, N, N); 226 break; 227 } 228 nodreg(®, types[tptr], D_DX); 229 nodreg(&r1, types[tptr], D_BX); 230 gmove(f, ®); 231 reg.op = OINDREG; 232 gmove(®, &r1); 233 reg.op = OREGISTER; 234 gins(ACALL, ®, &r1); 235 break; 236 237 case 3: // normal call of c function pointer 238 gins(ACALL, N, f); 239 break; 240 241 case 1: // call in new proc (go) 242 case 2: // deferred call (defer) 243 nodconst(&con, types[TINT64], argsize(f->type)); 244 if(widthptr == 4) { 245 nodreg(&r1, types[TINT32], D_CX); 246 gmove(f, &r1); 247 nodreg(®, types[TINT64], D_CX); 248 nodconst(&r1, types[TINT64], 32); 249 gins(ASHLQ, &r1, ®); 250 gins(AORQ, &con, ®); 251 gins(APUSHQ, ®, N); 252 } else { 253 nodreg(®, types[TINT64], D_CX); 254 gmove(f, ®); 255 gins(APUSHQ, ®, N); 256 gins(APUSHQ, &con, N); 257 } 258 if(proc == 1) 259 ginscall(newproc, 0); 260 else { 261 if(!hasdefer) 262 fatal("hasdefer=0 but has defer"); 263 ginscall(deferproc, 0); 264 } 265 nodreg(®, types[TINT64], D_CX); 266 gins(APOPQ, N, ®); 267 if(widthptr == 8) 268 gins(APOPQ, N, ®); 269 if(proc == 2) { 270 nodreg(®, types[TINT64], D_AX); 271 gins(ATESTQ, ®, ®); 272 p = gbranch(AJEQ, T, +1); 273 cgen_ret(N); 274 patch(p, pc); 275 } 276 break; 277 } 278 279 if(arg != -1) 280 gargsize(-1); 281 } 282 283 /* 284 * n is call to interface method. 285 * generate res = n. 286 */ 287 void 288 cgen_callinter(Node *n, Node *res, int proc) 289 { 290 Node *i, *f; 291 Node tmpi, nodi, nodo, nodr, nodsp; 292 293 i = n->left; 294 if(i->op != ODOTINTER) 295 fatal("cgen_callinter: not ODOTINTER %O", i->op); 296 297 f = i->right; // field 298 if(f->op != ONAME) 299 fatal("cgen_callinter: not ONAME %O", f->op); 300 301 i = i->left; // interface 302 303 if(!i->addable) { 304 tempname(&tmpi, i->type); 305 cgen(i, &tmpi); 306 i = &tmpi; 307 } 308 309 genlist(n->list); // assign the args 310 311 // i is now addable, prepare an indirected 312 // register to hold its address. 313 igen(i, &nodi, res); // REG = &inter 314 315 nodindreg(&nodsp, types[tptr], D_SP); 316 nodi.type = types[tptr]; 317 nodi.xoffset += widthptr; 318 cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data 319 320 regalloc(&nodo, types[tptr], res); 321 nodi.type = types[tptr]; 322 nodi.xoffset -= widthptr; 323 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 324 regfree(&nodi); 325 326 regalloc(&nodr, types[tptr], &nodo); 327 if(n->left->xoffset == BADWIDTH) 328 fatal("cgen_callinter: badwidth"); 329 cgen_checknil(&nodo); // in case offset is huge 330 nodo.op = OINDREG; 331 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 332 if(proc == 0) { 333 // plain call: use direct c function pointer - more efficient 334 cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] 335 proc = 3; 336 } else { 337 // go/defer. generate go func value. 338 gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] 339 } 340 341 nodr.type = n->left->type; 342 ginscall(&nodr, proc); 343 344 regfree(&nodr); 345 regfree(&nodo); 346 } 347 348 /* 349 * generate function call; 350 * proc=0 normal call 351 * proc=1 goroutine run in new proc 352 * proc=2 defer call save away stack 353 */ 354 void 355 cgen_call(Node *n, int proc) 356 { 357 Type *t; 358 Node nod, afun; 359 360 if(n == N) 361 return; 362 363 if(n->left->ullman >= UINF) { 364 // if name involves a fn call 365 // precompute the address of the fn 366 tempname(&afun, types[tptr]); 367 cgen(n->left, &afun); 368 } 369 370 genlist(n->list); // assign the args 371 t = n->left->type; 372 373 // call tempname pointer 374 if(n->left->ullman >= UINF) { 375 regalloc(&nod, types[tptr], N); 376 cgen_as(&nod, &afun); 377 nod.type = t; 378 ginscall(&nod, proc); 379 regfree(&nod); 380 return; 381 } 382 383 // call pointer 384 if(n->left->op != ONAME || n->left->class != PFUNC) { 385 regalloc(&nod, types[tptr], N); 386 cgen_as(&nod, n->left); 387 nod.type = t; 388 ginscall(&nod, proc); 389 regfree(&nod); 390 return; 391 } 392 393 // call direct 394 n->left->method = 1; 395 ginscall(n->left, proc); 396 } 397 398 /* 399 * call to n has already been generated. 400 * generate: 401 * res = return value from call. 402 */ 403 void 404 cgen_callret(Node *n, Node *res) 405 { 406 Node nod; 407 Type *fp, *t; 408 Iter flist; 409 410 t = n->left->type; 411 if(t->etype == TPTR32 || t->etype == TPTR64) 412 t = t->type; 413 414 fp = structfirst(&flist, getoutarg(t)); 415 if(fp == T) 416 fatal("cgen_callret: nil"); 417 418 memset(&nod, 0, sizeof(nod)); 419 nod.op = OINDREG; 420 nod.val.u.reg = D_SP; 421 nod.addable = 1; 422 423 nod.xoffset = fp->width; 424 nod.type = fp->type; 425 cgen_as(res, &nod); 426 } 427 428 /* 429 * call to n has already been generated. 430 * generate: 431 * res = &return value from call. 432 */ 433 void 434 cgen_aret(Node *n, Node *res) 435 { 436 Node nod1, nod2; 437 Type *fp, *t; 438 Iter flist; 439 440 t = n->left->type; 441 if(isptr[t->etype]) 442 t = t->type; 443 444 fp = structfirst(&flist, getoutarg(t)); 445 if(fp == T) 446 fatal("cgen_aret: nil"); 447 448 memset(&nod1, 0, sizeof(nod1)); 449 nod1.op = OINDREG; 450 nod1.val.u.reg = D_SP; 451 nod1.addable = 1; 452 453 nod1.xoffset = fp->width; 454 nod1.type = fp->type; 455 456 if(res->op != OREGISTER) { 457 regalloc(&nod2, types[tptr], res); 458 gins(leaptr, &nod1, &nod2); 459 gins(movptr, &nod2, res); 460 regfree(&nod2); 461 } else 462 gins(leaptr, &nod1, res); 463 } 464 465 /* 466 * generate return. 467 * n->left is assignments to return values. 468 */ 469 void 470 cgen_ret(Node *n) 471 { 472 Prog *p; 473 474 if(n != N) 475 genlist(n->list); // copy out args 476 if(hasdefer) 477 ginscall(deferreturn, 0); 478 genlist(curfn->exit); 479 p = gins(ARET, N, N); 480 if(n != N && n->op == ORETJMP) { 481 p->to.type = D_EXTERN; 482 p->to.sym = linksym(n->left->sym); 483 } 484 } 485 486 /* 487 * generate += *= etc. 488 */ 489 void 490 cgen_asop(Node *n) 491 { 492 Node n1, n2, n3, n4; 493 Node *nl, *nr; 494 Prog *p1; 495 Addr addr; 496 int a; 497 498 nl = n->left; 499 nr = n->right; 500 501 if(nr->ullman >= UINF && nl->ullman >= UINF) { 502 tempname(&n1, nr->type); 503 cgen(nr, &n1); 504 n2 = *n; 505 n2.right = &n1; 506 cgen_asop(&n2); 507 goto ret; 508 } 509 510 if(!isint[nl->type->etype]) 511 goto hard; 512 if(!isint[nr->type->etype]) 513 goto hard; 514 515 switch(n->etype) { 516 case OADD: 517 if(smallintconst(nr)) 518 if(mpgetfix(nr->val.u.xval) == 1) { 519 a = optoas(OINC, nl->type); 520 if(nl->addable) { 521 gins(a, N, nl); 522 goto ret; 523 } 524 if(sudoaddable(a, nl, &addr)) { 525 p1 = gins(a, N, N); 526 p1->to = addr; 527 sudoclean(); 528 goto ret; 529 } 530 } 531 break; 532 533 case OSUB: 534 if(smallintconst(nr)) 535 if(mpgetfix(nr->val.u.xval) == 1) { 536 a = optoas(ODEC, nl->type); 537 if(nl->addable) { 538 gins(a, N, nl); 539 goto ret; 540 } 541 if(sudoaddable(a, nl, &addr)) { 542 p1 = gins(a, N, N); 543 p1->to = addr; 544 sudoclean(); 545 goto ret; 546 } 547 } 548 break; 549 } 550 551 switch(n->etype) { 552 case OADD: 553 case OSUB: 554 case OXOR: 555 case OAND: 556 case OOR: 557 a = optoas(n->etype, nl->type); 558 if(nl->addable) { 559 if(smallintconst(nr)) { 560 gins(a, nr, nl); 561 goto ret; 562 } 563 regalloc(&n2, nr->type, N); 564 cgen(nr, &n2); 565 gins(a, &n2, nl); 566 regfree(&n2); 567 goto ret; 568 } 569 if(nr->ullman < UINF) 570 if(sudoaddable(a, nl, &addr)) { 571 if(smallintconst(nr)) { 572 p1 = gins(a, nr, N); 573 p1->to = addr; 574 sudoclean(); 575 goto ret; 576 } 577 regalloc(&n2, nr->type, N); 578 cgen(nr, &n2); 579 p1 = gins(a, &n2, N); 580 p1->to = addr; 581 regfree(&n2); 582 sudoclean(); 583 goto ret; 584 } 585 } 586 587 hard: 588 n2.op = 0; 589 n1.op = 0; 590 if(nr->op == OLITERAL) { 591 // don't allocate a register for literals. 592 } else if(nr->ullman >= nl->ullman || nl->addable) { 593 regalloc(&n2, nr->type, N); 594 cgen(nr, &n2); 595 nr = &n2; 596 } else { 597 tempname(&n2, nr->type); 598 cgen(nr, &n2); 599 nr = &n2; 600 } 601 if(!nl->addable) { 602 igen(nl, &n1, N); 603 nl = &n1; 604 } 605 606 n3 = *n; 607 n3.left = nl; 608 n3.right = nr; 609 n3.op = n->etype; 610 611 regalloc(&n4, nl->type, N); 612 cgen(&n3, &n4); 613 gmove(&n4, nl); 614 615 if(n1.op) 616 regfree(&n1); 617 if(n2.op == OREGISTER) 618 regfree(&n2); 619 regfree(&n4); 620 621 ret: 622 ; 623 } 624 625 int 626 samereg(Node *a, Node *b) 627 { 628 if(a == N || b == N) 629 return 0; 630 if(a->op != OREGISTER) 631 return 0; 632 if(b->op != OREGISTER) 633 return 0; 634 if(a->val.u.reg != b->val.u.reg) 635 return 0; 636 return 1; 637 } 638 639 /* 640 * generate division. 641 * generates one of: 642 * res = nl / nr 643 * res = nl % nr 644 * according to op. 645 */ 646 void 647 dodiv(int op, Node *nl, Node *nr, Node *res) 648 { 649 int a, check; 650 Node n3, n4; 651 Type *t, *t0; 652 Node ax, dx, ax1, n31, oldax, olddx; 653 Prog *p1, *p2; 654 655 // Have to be careful about handling 656 // most negative int divided by -1 correctly. 657 // The hardware will trap. 658 // Also the byte divide instruction needs AH, 659 // which we otherwise don't have to deal with. 660 // Easiest way to avoid for int8, int16: use int32. 661 // For int32 and int64, use explicit test. 662 // Could use int64 hw for int32. 663 t = nl->type; 664 t0 = t; 665 check = 0; 666 if(issigned[t->etype]) { 667 check = 1; 668 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1))) 669 check = 0; 670 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 671 check = 0; 672 } 673 if(t->width < 4) { 674 if(issigned[t->etype]) 675 t = types[TINT32]; 676 else 677 t = types[TUINT32]; 678 check = 0; 679 } 680 a = optoas(op, t); 681 682 regalloc(&n3, t0, N); 683 if(nl->ullman >= nr->ullman) { 684 savex(D_AX, &ax, &oldax, res, t0); 685 cgen(nl, &ax); 686 regalloc(&ax, t0, &ax); // mark ax live during cgen 687 cgen(nr, &n3); 688 regfree(&ax); 689 } else { 690 cgen(nr, &n3); 691 savex(D_AX, &ax, &oldax, res, t0); 692 cgen(nl, &ax); 693 } 694 if(t != t0) { 695 // Convert 696 ax1 = ax; 697 n31 = n3; 698 ax.type = t; 699 n3.type = t; 700 gmove(&ax1, &ax); 701 gmove(&n31, &n3); 702 } 703 704 p2 = P; 705 if(nacl) { 706 // Native Client does not relay the divide-by-zero trap 707 // to the executing program, so we must insert a check 708 // for ourselves. 709 nodconst(&n4, t, 0); 710 gins(optoas(OCMP, t), &n3, &n4); 711 p1 = gbranch(optoas(ONE, t), T, +1); 712 if(panicdiv == N) 713 panicdiv = sysfunc("panicdivide"); 714 ginscall(panicdiv, -1); 715 patch(p1, pc); 716 } 717 if(check) { 718 nodconst(&n4, t, -1); 719 gins(optoas(OCMP, t), &n3, &n4); 720 p1 = gbranch(optoas(ONE, t), T, +1); 721 if(op == ODIV) { 722 // a / (-1) is -a. 723 gins(optoas(OMINUS, t), N, &ax); 724 gmove(&ax, res); 725 } else { 726 // a % (-1) is 0. 727 nodconst(&n4, t, 0); 728 gmove(&n4, res); 729 } 730 p2 = gbranch(AJMP, T, 0); 731 patch(p1, pc); 732 } 733 savex(D_DX, &dx, &olddx, res, t); 734 if(!issigned[t->etype]) { 735 nodconst(&n4, t, 0); 736 gmove(&n4, &dx); 737 } else 738 gins(optoas(OEXTEND, t), N, N); 739 gins(a, &n3, N); 740 regfree(&n3); 741 if(op == ODIV) 742 gmove(&ax, res); 743 else 744 gmove(&dx, res); 745 restx(&dx, &olddx); 746 if(check) 747 patch(p2, pc); 748 restx(&ax, &oldax); 749 } 750 751 /* 752 * register dr is one of the special ones (AX, CX, DI, SI, etc.). 753 * we need to use it. if it is already allocated as a temporary 754 * (r > 1; can only happen if a routine like sgen passed a 755 * special as cgen's res and then cgen used regalloc to reuse 756 * it as its own temporary), then move it for now to another 757 * register. caller must call restx to move it back. 758 * the move is not necessary if dr == res, because res is 759 * known to be dead. 760 */ 761 void 762 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 763 { 764 int r; 765 766 r = reg[dr]; 767 768 // save current ax and dx if they are live 769 // and not the destination 770 memset(oldx, 0, sizeof *oldx); 771 nodreg(x, t, dr); 772 if(r > 1 && !samereg(x, res)) { 773 regalloc(oldx, types[TINT64], N); 774 x->type = types[TINT64]; 775 gmove(x, oldx); 776 x->type = t; 777 oldx->ostk = r; // squirrel away old r value 778 reg[dr] = 1; 779 } 780 } 781 782 void 783 restx(Node *x, Node *oldx) 784 { 785 if(oldx->op != 0) { 786 x->type = types[TINT64]; 787 reg[x->val.u.reg] = oldx->ostk; 788 gmove(oldx, x); 789 regfree(oldx); 790 } 791 } 792 793 /* 794 * generate division according to op, one of: 795 * res = nl / nr 796 * res = nl % nr 797 */ 798 void 799 cgen_div(int op, Node *nl, Node *nr, Node *res) 800 { 801 Node n1, n2, n3; 802 int w, a; 803 Magic m; 804 805 if(nr->op != OLITERAL) 806 goto longdiv; 807 w = nl->type->width*8; 808 809 // Front end handled 32-bit division. We only need to handle 64-bit. 810 // try to do division by multiply by (2^w)/d 811 // see hacker's delight chapter 10 812 switch(simtype[nl->type->etype]) { 813 default: 814 goto longdiv; 815 816 case TUINT64: 817 m.w = w; 818 m.ud = mpgetfix(nr->val.u.xval); 819 umagic(&m); 820 if(m.bad) 821 break; 822 if(op == OMOD) 823 goto longmod; 824 825 cgenr(nl, &n1, N); 826 nodconst(&n2, nl->type, m.um); 827 regalloc(&n3, nl->type, res); 828 cgen_hmul(&n1, &n2, &n3); 829 830 if(m.ua) { 831 // need to add numerator accounting for overflow 832 gins(optoas(OADD, nl->type), &n1, &n3); 833 nodconst(&n2, nl->type, 1); 834 gins(optoas(ORROTC, nl->type), &n2, &n3); 835 nodconst(&n2, nl->type, m.s-1); 836 gins(optoas(ORSH, nl->type), &n2, &n3); 837 } else { 838 nodconst(&n2, nl->type, m.s); 839 gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx 840 } 841 842 gmove(&n3, res); 843 regfree(&n1); 844 regfree(&n3); 845 return; 846 847 case TINT64: 848 m.w = w; 849 m.sd = mpgetfix(nr->val.u.xval); 850 smagic(&m); 851 if(m.bad) 852 break; 853 if(op == OMOD) 854 goto longmod; 855 856 cgenr(nl, &n1, res); 857 nodconst(&n2, nl->type, m.sm); 858 regalloc(&n3, nl->type, N); 859 cgen_hmul(&n1, &n2, &n3); 860 861 if(m.sm < 0) { 862 // need to add numerator 863 gins(optoas(OADD, nl->type), &n1, &n3); 864 } 865 866 nodconst(&n2, nl->type, m.s); 867 gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 868 869 nodconst(&n2, nl->type, w-1); 870 gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg 871 gins(optoas(OSUB, nl->type), &n1, &n3); // added 872 873 if(m.sd < 0) { 874 // this could probably be removed 875 // by factoring it into the multiplier 876 gins(optoas(OMINUS, nl->type), N, &n3); 877 } 878 879 gmove(&n3, res); 880 regfree(&n1); 881 regfree(&n3); 882 return; 883 } 884 goto longdiv; 885 886 longdiv: 887 // division and mod using (slow) hardware instruction 888 dodiv(op, nl, nr, res); 889 return; 890 891 longmod: 892 // mod using formula A%B = A-(A/B*B) but 893 // we know that there is a fast algorithm for A/B 894 regalloc(&n1, nl->type, res); 895 cgen(nl, &n1); 896 regalloc(&n2, nl->type, N); 897 cgen_div(ODIV, &n1, nr, &n2); 898 a = optoas(OMUL, nl->type); 899 if(w == 8) { 900 // use 2-operand 16-bit multiply 901 // because there is no 2-operand 8-bit multiply 902 a = AIMULW; 903 } 904 if(!smallintconst(nr)) { 905 regalloc(&n3, nl->type, N); 906 cgen(nr, &n3); 907 gins(a, &n3, &n2); 908 regfree(&n3); 909 } else 910 gins(a, nr, &n2); 911 gins(optoas(OSUB, nl->type), &n2, &n1); 912 gmove(&n1, res); 913 regfree(&n1); 914 regfree(&n2); 915 } 916 917 /* 918 * generate high multiply: 919 * res = (nl*nr) >> width 920 */ 921 void 922 cgen_hmul(Node *nl, Node *nr, Node *res) 923 { 924 Type *t; 925 int a; 926 Node n1, n2, ax, dx, *tmp; 927 928 t = nl->type; 929 a = optoas(OHMUL, t); 930 if(nl->ullman < nr->ullman) { 931 tmp = nl; 932 nl = nr; 933 nr = tmp; 934 } 935 cgenr(nl, &n1, res); 936 cgenr(nr, &n2, N); 937 nodreg(&ax, t, D_AX); 938 gmove(&n1, &ax); 939 gins(a, &n2, N); 940 regfree(&n2); 941 regfree(&n1); 942 943 if(t->width == 1) { 944 // byte multiply behaves differently. 945 nodreg(&ax, t, D_AH); 946 nodreg(&dx, t, D_DL); 947 gmove(&ax, &dx); 948 } 949 nodreg(&dx, t, D_DX); 950 gmove(&dx, res); 951 } 952 953 /* 954 * generate shift according to op, one of: 955 * res = nl << nr 956 * res = nl >> nr 957 */ 958 void 959 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 960 { 961 Node n1, n2, n3, n4, n5, cx, oldcx; 962 int a, rcx; 963 Prog *p1; 964 uvlong sc; 965 Type *tcount; 966 967 a = optoas(op, nl->type); 968 969 if(nr->op == OLITERAL) { 970 regalloc(&n1, nl->type, res); 971 cgen(nl, &n1); 972 sc = mpgetfix(nr->val.u.xval); 973 if(sc >= nl->type->width*8) { 974 // large shift gets 2 shifts by width-1 975 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 976 gins(a, &n3, &n1); 977 gins(a, &n3, &n1); 978 } else 979 gins(a, nr, &n1); 980 gmove(&n1, res); 981 regfree(&n1); 982 goto ret; 983 } 984 985 if(nl->ullman >= UINF) { 986 tempname(&n4, nl->type); 987 cgen(nl, &n4); 988 nl = &n4; 989 } 990 if(nr->ullman >= UINF) { 991 tempname(&n5, nr->type); 992 cgen(nr, &n5); 993 nr = &n5; 994 } 995 996 rcx = reg[D_CX]; 997 nodreg(&n1, types[TUINT32], D_CX); 998 999 // Allow either uint32 or uint64 as shift type, 1000 // to avoid unnecessary conversion from uint32 to uint64 1001 // just to do the comparison. 1002 tcount = types[simtype[nr->type->etype]]; 1003 if(tcount->etype < TUINT32) 1004 tcount = types[TUINT32]; 1005 1006 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 1007 regalloc(&n3, tcount, &n1); // to clear high bits of CX 1008 1009 nodreg(&cx, types[TUINT64], D_CX); 1010 memset(&oldcx, 0, sizeof oldcx); 1011 if(rcx > 0 && !samereg(&cx, res)) { 1012 regalloc(&oldcx, types[TUINT64], N); 1013 gmove(&cx, &oldcx); 1014 } 1015 cx.type = tcount; 1016 1017 if(samereg(&cx, res)) 1018 regalloc(&n2, nl->type, N); 1019 else 1020 regalloc(&n2, nl->type, res); 1021 if(nl->ullman >= nr->ullman) { 1022 cgen(nl, &n2); 1023 cgen(nr, &n1); 1024 gmove(&n1, &n3); 1025 } else { 1026 cgen(nr, &n1); 1027 gmove(&n1, &n3); 1028 cgen(nl, &n2); 1029 } 1030 regfree(&n3); 1031 1032 // test and fix up large shifts 1033 if(!bounded) { 1034 nodconst(&n3, tcount, nl->type->width*8); 1035 gins(optoas(OCMP, tcount), &n1, &n3); 1036 p1 = gbranch(optoas(OLT, tcount), T, +1); 1037 if(op == ORSH && issigned[nl->type->etype]) { 1038 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 1039 gins(a, &n3, &n2); 1040 } else { 1041 nodconst(&n3, nl->type, 0); 1042 gmove(&n3, &n2); 1043 } 1044 patch(p1, pc); 1045 } 1046 1047 gins(a, &n1, &n2); 1048 1049 if(oldcx.op != 0) { 1050 cx.type = types[TUINT64]; 1051 gmove(&oldcx, &cx); 1052 regfree(&oldcx); 1053 } 1054 1055 gmove(&n2, res); 1056 1057 regfree(&n1); 1058 regfree(&n2); 1059 1060 ret: 1061 ; 1062 } 1063 1064 /* 1065 * generate byte multiply: 1066 * res = nl * nr 1067 * there is no 2-operand byte multiply instruction so 1068 * we do a full-width multiplication and truncate afterwards. 1069 */ 1070 void 1071 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 1072 { 1073 Node n1, n2, n1b, n2b, *tmp; 1074 Type *t; 1075 int a; 1076 1077 // largest ullman on left. 1078 if(nl->ullman < nr->ullman) { 1079 tmp = nl; 1080 nl = nr; 1081 nr = tmp; 1082 } 1083 1084 // generate operands in "8-bit" registers. 1085 regalloc(&n1b, nl->type, res); 1086 cgen(nl, &n1b); 1087 regalloc(&n2b, nr->type, N); 1088 cgen(nr, &n2b); 1089 1090 // perform full-width multiplication. 1091 t = types[TUINT64]; 1092 if(issigned[nl->type->etype]) 1093 t = types[TINT64]; 1094 nodreg(&n1, t, n1b.val.u.reg); 1095 nodreg(&n2, t, n2b.val.u.reg); 1096 a = optoas(op, t); 1097 gins(a, &n2, &n1); 1098 1099 // truncate. 1100 gmove(&n1, res); 1101 regfree(&n1b); 1102 regfree(&n2b); 1103 } 1104 1105 void 1106 clearfat(Node *nl) 1107 { 1108 int64 w, c, q; 1109 Node n1, oldn1, ax, oldax, di, z; 1110 Prog *p; 1111 1112 /* clear a fat object */ 1113 if(debug['g']) 1114 dump("\nclearfat", nl); 1115 1116 w = nl->type->width; 1117 // Avoid taking the address for simple enough types. 1118 if(componentgen(N, nl)) 1119 return; 1120 1121 c = w % 8; // bytes 1122 q = w / 8; // quads 1123 1124 savex(D_DI, &n1, &oldn1, N, types[tptr]); 1125 agen(nl, &n1); 1126 1127 savex(D_AX, &ax, &oldax, N, types[tptr]); 1128 gconreg(AMOVL, 0, D_AX); 1129 1130 if(q > 128 || (q >= 4 && nacl)) { 1131 gconreg(movptr, q, D_CX); 1132 gins(AREP, N, N); // repeat 1133 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 1134 } else if(q >= 4) { 1135 p = gins(ADUFFZERO, N, N); 1136 p->to.type = D_ADDR; 1137 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 1138 // 2 and 128 = magic constants: see ../../pkg/runtime/asm_amd64.s 1139 p->to.offset = 2*(128-q); 1140 } else 1141 while(q > 0) { 1142 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 1143 q--; 1144 } 1145 1146 z = ax; 1147 di = n1; 1148 if(w >= 8 && c >= 4) { 1149 di.op = OINDREG; 1150 di.type = z.type = types[TINT64]; 1151 p = gins(AMOVQ, &z, &di); 1152 p->to.scale = 1; 1153 p->to.offset = c-8; 1154 } else if(c >= 4) { 1155 di.op = OINDREG; 1156 di.type = z.type = types[TINT32]; 1157 p = gins(AMOVL, &z, &di); 1158 if(c > 4) { 1159 p = gins(AMOVL, &z, &di); 1160 p->to.scale = 1; 1161 p->to.offset = c-4; 1162 } 1163 } else 1164 while(c > 0) { 1165 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 1166 c--; 1167 } 1168 1169 restx(&n1, &oldn1); 1170 restx(&ax, &oldax); 1171 } 1172 1173 // Called after regopt and peep have run. 1174 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1175 void 1176 expandchecks(Prog *firstp) 1177 { 1178 Prog *p, *p1, *p2; 1179 1180 for(p = firstp; p != P; p = p->link) { 1181 if(p->as != ACHECKNIL) 1182 continue; 1183 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1184 warnl(p->lineno, "generated nil check"); 1185 // check is 1186 // CMP arg, $0 1187 // JNE 2(PC) (likely) 1188 // MOV AX, 0 1189 p1 = mal(sizeof *p1); 1190 p2 = mal(sizeof *p2); 1191 clearp(p1); 1192 clearp(p2); 1193 p1->link = p2; 1194 p2->link = p->link; 1195 p->link = p1; 1196 p1->lineno = p->lineno; 1197 p2->lineno = p->lineno; 1198 p1->pc = 9999; 1199 p2->pc = 9999; 1200 p->as = cmpptr; 1201 p->to.type = D_CONST; 1202 p->to.offset = 0; 1203 p1->as = AJNE; 1204 p1->from.type = D_CONST; 1205 p1->from.offset = 1; // likely 1206 p1->to.type = D_BRANCH; 1207 p1->to.u.branch = p2->link; 1208 // crash by write to memory address 0. 1209 // if possible, since we know arg is 0, use 0(arg), 1210 // which will be shorter to encode than plain 0. 1211 p2->as = AMOVL; 1212 p2->from.type = D_AX; 1213 if(regtyp(&p->from)) 1214 p2->to.type = p->from.type + D_INDIR; 1215 else 1216 p2->to.type = D_INDIR+D_NONE; 1217 p2->to.offset = 0; 1218 } 1219 }