github.com/shijuvar/go@v0.0.0-20141209052335-e8f13700b70c/src/cmd/6g/ggen.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #undef EXTERN 6 #define EXTERN 7 #include <u.h> 8 #include <libc.h> 9 #include "gg.h" 10 #include "opt.h" 11 12 static Prog *appendpp(Prog*, int, int, vlong, int, vlong); 13 static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); 14 15 void 16 defframe(Prog *ptxt) 17 { 18 uint32 frame, ax; 19 Prog *p; 20 vlong hi, lo; 21 NodeList *l; 22 Node *n; 23 24 // fill in argument size 25 ptxt->to.offset = rnd(curfn->type->argwid, widthptr); 26 27 // fill in final stack size 28 ptxt->to.offset <<= 32; 29 frame = rnd(stksize+maxarg, widthreg); 30 ptxt->to.offset |= frame; 31 32 // insert code to zero ambiguously live variables 33 // so that the garbage collector only sees initialized values 34 // when it looks for pointers. 35 p = ptxt; 36 lo = hi = 0; 37 ax = 0; 38 // iterate through declarations - they are sorted in decreasing xoffset order. 39 for(l=curfn->dcl; l != nil; l = l->next) { 40 n = l->n; 41 if(!n->needzero) 42 continue; 43 if(n->class != PAUTO) 44 fatal("needzero class %d", n->class); 45 if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) 46 fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); 47 48 if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) { 49 // merge with range we already have 50 lo = n->xoffset; 51 continue; 52 } 53 // zero old range 54 p = zerorange(p, frame, lo, hi, &ax); 55 56 // set new range 57 hi = n->xoffset + n->type->width; 58 lo = n->xoffset; 59 } 60 // zero final range 61 zerorange(p, frame, lo, hi, &ax); 62 } 63 64 static Prog* 65 zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax) 66 { 67 vlong cnt, i; 68 69 cnt = hi - lo; 70 if(cnt == 0) 71 return p; 72 if(*ax == 0) { 73 p = appendpp(p, AMOVQ, D_CONST, 0, D_AX, 0); 74 *ax = 1; 75 } 76 if(cnt % widthreg != 0) { 77 // should only happen with nacl 78 if(cnt % widthptr != 0) 79 fatal("zerorange count not a multiple of widthptr %d", cnt); 80 p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo); 81 lo += widthptr; 82 cnt -= widthptr; 83 } 84 if(cnt <= 4*widthreg) { 85 for(i = 0; i < cnt; i += widthreg) { 86 p = appendpp(p, AMOVQ, D_AX, 0, D_SP+D_INDIR, frame+lo+i); 87 } 88 } else if(!nacl && (cnt <= 128*widthreg)) { 89 p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0); 90 p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 2*(128-cnt/widthreg)); 91 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 92 } else { 93 p = appendpp(p, AMOVQ, D_CONST, cnt/widthreg, D_CX, 0); 94 p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0); 95 p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0); 96 p = appendpp(p, ASTOSQ, D_NONE, 0, D_NONE, 0); 97 } 98 return p; 99 } 100 101 static Prog* 102 appendpp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) 103 { 104 Prog *q; 105 q = mal(sizeof(*q)); 106 clearp(q); 107 q->as = as; 108 q->lineno = p->lineno; 109 q->from.type = ftype; 110 q->from.offset = foffset; 111 q->to.type = ttype; 112 q->to.offset = toffset; 113 q->link = p->link; 114 p->link = q; 115 return q; 116 } 117 118 // Sweep the prog list to mark any used nodes. 119 void 120 markautoused(Prog* p) 121 { 122 for (; p; p = p->link) { 123 if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL) 124 continue; 125 126 if (p->from.node) 127 p->from.node->used = 1; 128 129 if (p->to.node) 130 p->to.node->used = 1; 131 } 132 } 133 134 // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. 135 void 136 fixautoused(Prog *p) 137 { 138 Prog **lp; 139 140 for (lp=&p; (p=*lp) != P; ) { 141 if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { 142 *lp = p->link; 143 continue; 144 } 145 if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) { 146 // Cannot remove VARDEF instruction, because - unlike TYPE handled above - 147 // VARDEFs are interspersed with other code, and a jump might be using the 148 // VARDEF as a target. Replace with a no-op instead. A later pass will remove 149 // the no-ops. 150 p->to.type = D_NONE; 151 p->to.node = N; 152 p->as = ANOP; 153 continue; 154 } 155 if (p->from.type == D_AUTO && p->from.node) 156 p->from.offset += p->from.node->stkdelta; 157 158 if (p->to.type == D_AUTO && p->to.node) 159 p->to.offset += p->to.node->stkdelta; 160 161 lp = &p->link; 162 } 163 } 164 165 166 /* 167 * generate: 168 * call f 169 * proc=-1 normal call but no return 170 * proc=0 normal call 171 * proc=1 goroutine run in new proc 172 * proc=2 defer call save away stack 173 * proc=3 normal call to C pointer (not Go func value) 174 */ 175 void 176 ginscall(Node *f, int proc) 177 { 178 Prog *p; 179 Node reg, con; 180 Node r1; 181 182 if(f->type != T) 183 setmaxarg(f->type); 184 185 switch(proc) { 186 default: 187 fatal("ginscall: bad proc %d", proc); 188 break; 189 190 case 0: // normal call 191 case -1: // normal call but no return 192 if(f->op == ONAME && f->class == PFUNC) { 193 if(f == deferreturn) { 194 // Deferred calls will appear to be returning to 195 // the CALL deferreturn(SB) that we are about to emit. 196 // However, the stack trace code will show the line 197 // of the instruction byte before the return PC. 198 // To avoid that being an unrelated instruction, 199 // insert an x86 NOP that we will have the right line number. 200 // x86 NOP 0x90 is really XCHG AX, AX; use that description 201 // because the NOP pseudo-instruction would be removed by 202 // the linker. 203 nodreg(®, types[TINT], D_AX); 204 gins(AXCHGL, ®, ®); 205 } 206 p = gins(ACALL, N, f); 207 afunclit(&p->to, f); 208 if(proc == -1 || noreturn(p)) 209 gins(AUNDEF, N, N); 210 break; 211 } 212 nodreg(®, types[tptr], D_DX); 213 nodreg(&r1, types[tptr], D_BX); 214 gmove(f, ®); 215 reg.op = OINDREG; 216 gmove(®, &r1); 217 reg.op = OREGISTER; 218 gins(ACALL, ®, &r1); 219 break; 220 221 case 3: // normal call of c function pointer 222 gins(ACALL, N, f); 223 break; 224 225 case 1: // call in new proc (go) 226 case 2: // deferred call (defer) 227 nodconst(&con, types[TINT64], argsize(f->type)); 228 if(widthptr == 4) { 229 nodreg(&r1, types[TINT32], D_CX); 230 gmove(f, &r1); 231 nodreg(®, types[TINT64], D_CX); 232 nodconst(&r1, types[TINT64], 32); 233 gins(ASHLQ, &r1, ®); 234 gins(AORQ, &con, ®); 235 gins(APUSHQ, ®, N); 236 } else { 237 nodreg(®, types[TINT64], D_CX); 238 gmove(f, ®); 239 gins(APUSHQ, ®, N); 240 gins(APUSHQ, &con, N); 241 } 242 if(proc == 1) 243 ginscall(newproc, 0); 244 else { 245 if(!hasdefer) 246 fatal("hasdefer=0 but has defer"); 247 ginscall(deferproc, 0); 248 } 249 nodreg(®, types[TINT64], D_CX); 250 gins(APOPQ, N, ®); 251 if(widthptr == 8) 252 gins(APOPQ, N, ®); 253 if(proc == 2) { 254 nodreg(®, types[TINT64], D_AX); 255 gins(ATESTQ, ®, ®); 256 p = gbranch(AJEQ, T, +1); 257 cgen_ret(N); 258 patch(p, pc); 259 } 260 break; 261 } 262 } 263 264 /* 265 * n is call to interface method. 266 * generate res = n. 267 */ 268 void 269 cgen_callinter(Node *n, Node *res, int proc) 270 { 271 Node *i, *f; 272 Node tmpi, nodi, nodo, nodr, nodsp; 273 274 i = n->left; 275 if(i->op != ODOTINTER) 276 fatal("cgen_callinter: not ODOTINTER %O", i->op); 277 278 f = i->right; // field 279 if(f->op != ONAME) 280 fatal("cgen_callinter: not ONAME %O", f->op); 281 282 i = i->left; // interface 283 284 if(!i->addable) { 285 tempname(&tmpi, i->type); 286 cgen(i, &tmpi); 287 i = &tmpi; 288 } 289 290 genlist(n->list); // assign the args 291 292 // i is now addable, prepare an indirected 293 // register to hold its address. 294 igen(i, &nodi, res); // REG = &inter 295 296 nodindreg(&nodsp, types[tptr], D_SP); 297 nodi.type = types[tptr]; 298 nodi.xoffset += widthptr; 299 cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data 300 301 regalloc(&nodo, types[tptr], res); 302 nodi.type = types[tptr]; 303 nodi.xoffset -= widthptr; 304 cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab 305 regfree(&nodi); 306 307 regalloc(&nodr, types[tptr], &nodo); 308 if(n->left->xoffset == BADWIDTH) 309 fatal("cgen_callinter: badwidth"); 310 cgen_checknil(&nodo); // in case offset is huge 311 nodo.op = OINDREG; 312 nodo.xoffset = n->left->xoffset + 3*widthptr + 8; 313 if(proc == 0) { 314 // plain call: use direct c function pointer - more efficient 315 cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] 316 proc = 3; 317 } else { 318 // go/defer. generate go func value. 319 gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] 320 } 321 322 nodr.type = n->left->type; 323 ginscall(&nodr, proc); 324 325 regfree(&nodr); 326 regfree(&nodo); 327 } 328 329 /* 330 * generate function call; 331 * proc=0 normal call 332 * proc=1 goroutine run in new proc 333 * proc=2 defer call save away stack 334 */ 335 void 336 cgen_call(Node *n, int proc) 337 { 338 Type *t; 339 Node nod, afun; 340 341 if(n == N) 342 return; 343 344 if(n->left->ullman >= UINF) { 345 // if name involves a fn call 346 // precompute the address of the fn 347 tempname(&afun, types[tptr]); 348 cgen(n->left, &afun); 349 } 350 351 genlist(n->list); // assign the args 352 t = n->left->type; 353 354 // call tempname pointer 355 if(n->left->ullman >= UINF) { 356 regalloc(&nod, types[tptr], N); 357 cgen_as(&nod, &afun); 358 nod.type = t; 359 ginscall(&nod, proc); 360 regfree(&nod); 361 return; 362 } 363 364 // call pointer 365 if(n->left->op != ONAME || n->left->class != PFUNC) { 366 regalloc(&nod, types[tptr], N); 367 cgen_as(&nod, n->left); 368 nod.type = t; 369 ginscall(&nod, proc); 370 regfree(&nod); 371 return; 372 } 373 374 // call direct 375 n->left->method = 1; 376 ginscall(n->left, proc); 377 } 378 379 /* 380 * call to n has already been generated. 381 * generate: 382 * res = return value from call. 383 */ 384 void 385 cgen_callret(Node *n, Node *res) 386 { 387 Node nod; 388 Type *fp, *t; 389 Iter flist; 390 391 t = n->left->type; 392 if(t->etype == TPTR32 || t->etype == TPTR64) 393 t = t->type; 394 395 fp = structfirst(&flist, getoutarg(t)); 396 if(fp == T) 397 fatal("cgen_callret: nil"); 398 399 memset(&nod, 0, sizeof(nod)); 400 nod.op = OINDREG; 401 nod.val.u.reg = D_SP; 402 nod.addable = 1; 403 404 nod.xoffset = fp->width; 405 nod.type = fp->type; 406 cgen_as(res, &nod); 407 } 408 409 /* 410 * call to n has already been generated. 411 * generate: 412 * res = &return value from call. 413 */ 414 void 415 cgen_aret(Node *n, Node *res) 416 { 417 Node nod1, nod2; 418 Type *fp, *t; 419 Iter flist; 420 421 t = n->left->type; 422 if(isptr[t->etype]) 423 t = t->type; 424 425 fp = structfirst(&flist, getoutarg(t)); 426 if(fp == T) 427 fatal("cgen_aret: nil"); 428 429 memset(&nod1, 0, sizeof(nod1)); 430 nod1.op = OINDREG; 431 nod1.val.u.reg = D_SP; 432 nod1.addable = 1; 433 434 nod1.xoffset = fp->width; 435 nod1.type = fp->type; 436 437 if(res->op != OREGISTER) { 438 regalloc(&nod2, types[tptr], res); 439 gins(leaptr, &nod1, &nod2); 440 gins(movptr, &nod2, res); 441 regfree(&nod2); 442 } else 443 gins(leaptr, &nod1, res); 444 } 445 446 /* 447 * generate return. 448 * n->left is assignments to return values. 449 */ 450 void 451 cgen_ret(Node *n) 452 { 453 Prog *p; 454 455 if(n != N) 456 genlist(n->list); // copy out args 457 if(hasdefer) 458 ginscall(deferreturn, 0); 459 genlist(curfn->exit); 460 p = gins(ARET, N, N); 461 if(n != N && n->op == ORETJMP) { 462 p->to.type = D_EXTERN; 463 p->to.sym = linksym(n->left->sym); 464 } 465 } 466 467 /* 468 * generate += *= etc. 469 */ 470 void 471 cgen_asop(Node *n) 472 { 473 Node n1, n2, n3, n4; 474 Node *nl, *nr; 475 Prog *p1; 476 Addr addr; 477 int a; 478 479 nl = n->left; 480 nr = n->right; 481 482 if(nr->ullman >= UINF && nl->ullman >= UINF) { 483 tempname(&n1, nr->type); 484 cgen(nr, &n1); 485 n2 = *n; 486 n2.right = &n1; 487 cgen_asop(&n2); 488 goto ret; 489 } 490 491 if(!isint[nl->type->etype]) 492 goto hard; 493 if(!isint[nr->type->etype]) 494 goto hard; 495 496 switch(n->etype) { 497 case OADD: 498 if(smallintconst(nr)) 499 if(mpgetfix(nr->val.u.xval) == 1) { 500 a = optoas(OINC, nl->type); 501 if(nl->addable) { 502 gins(a, N, nl); 503 goto ret; 504 } 505 if(sudoaddable(a, nl, &addr)) { 506 p1 = gins(a, N, N); 507 p1->to = addr; 508 sudoclean(); 509 goto ret; 510 } 511 } 512 break; 513 514 case OSUB: 515 if(smallintconst(nr)) 516 if(mpgetfix(nr->val.u.xval) == 1) { 517 a = optoas(ODEC, nl->type); 518 if(nl->addable) { 519 gins(a, N, nl); 520 goto ret; 521 } 522 if(sudoaddable(a, nl, &addr)) { 523 p1 = gins(a, N, N); 524 p1->to = addr; 525 sudoclean(); 526 goto ret; 527 } 528 } 529 break; 530 } 531 532 switch(n->etype) { 533 case OADD: 534 case OSUB: 535 case OXOR: 536 case OAND: 537 case OOR: 538 a = optoas(n->etype, nl->type); 539 if(nl->addable) { 540 if(smallintconst(nr)) { 541 gins(a, nr, nl); 542 goto ret; 543 } 544 regalloc(&n2, nr->type, N); 545 cgen(nr, &n2); 546 gins(a, &n2, nl); 547 regfree(&n2); 548 goto ret; 549 } 550 if(nr->ullman < UINF) 551 if(sudoaddable(a, nl, &addr)) { 552 if(smallintconst(nr)) { 553 p1 = gins(a, nr, N); 554 p1->to = addr; 555 sudoclean(); 556 goto ret; 557 } 558 regalloc(&n2, nr->type, N); 559 cgen(nr, &n2); 560 p1 = gins(a, &n2, N); 561 p1->to = addr; 562 regfree(&n2); 563 sudoclean(); 564 goto ret; 565 } 566 } 567 568 hard: 569 n2.op = 0; 570 n1.op = 0; 571 if(nr->op == OLITERAL) { 572 // don't allocate a register for literals. 573 } else if(nr->ullman >= nl->ullman || nl->addable) { 574 regalloc(&n2, nr->type, N); 575 cgen(nr, &n2); 576 nr = &n2; 577 } else { 578 tempname(&n2, nr->type); 579 cgen(nr, &n2); 580 nr = &n2; 581 } 582 if(!nl->addable) { 583 igen(nl, &n1, N); 584 nl = &n1; 585 } 586 587 n3 = *n; 588 n3.left = nl; 589 n3.right = nr; 590 n3.op = n->etype; 591 592 regalloc(&n4, nl->type, N); 593 cgen(&n3, &n4); 594 gmove(&n4, nl); 595 596 if(n1.op) 597 regfree(&n1); 598 if(n2.op == OREGISTER) 599 regfree(&n2); 600 regfree(&n4); 601 602 ret: 603 ; 604 } 605 606 int 607 samereg(Node *a, Node *b) 608 { 609 if(a == N || b == N) 610 return 0; 611 if(a->op != OREGISTER) 612 return 0; 613 if(b->op != OREGISTER) 614 return 0; 615 if(a->val.u.reg != b->val.u.reg) 616 return 0; 617 return 1; 618 } 619 620 /* 621 * generate division. 622 * generates one of: 623 * res = nl / nr 624 * res = nl % nr 625 * according to op. 626 */ 627 void 628 dodiv(int op, Node *nl, Node *nr, Node *res) 629 { 630 int a, check; 631 Node n3, n4; 632 Type *t, *t0; 633 Node ax, dx, ax1, n31, oldax, olddx; 634 Prog *p1, *p2; 635 636 // Have to be careful about handling 637 // most negative int divided by -1 correctly. 638 // The hardware will trap. 639 // Also the byte divide instruction needs AH, 640 // which we otherwise don't have to deal with. 641 // Easiest way to avoid for int8, int16: use int32. 642 // For int32 and int64, use explicit test. 643 // Could use int64 hw for int32. 644 t = nl->type; 645 t0 = t; 646 check = 0; 647 if(issigned[t->etype]) { 648 check = 1; 649 if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1))) 650 check = 0; 651 else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) 652 check = 0; 653 } 654 if(t->width < 4) { 655 if(issigned[t->etype]) 656 t = types[TINT32]; 657 else 658 t = types[TUINT32]; 659 check = 0; 660 } 661 a = optoas(op, t); 662 663 regalloc(&n3, t0, N); 664 if(nl->ullman >= nr->ullman) { 665 savex(D_AX, &ax, &oldax, res, t0); 666 cgen(nl, &ax); 667 regalloc(&ax, t0, &ax); // mark ax live during cgen 668 cgen(nr, &n3); 669 regfree(&ax); 670 } else { 671 cgen(nr, &n3); 672 savex(D_AX, &ax, &oldax, res, t0); 673 cgen(nl, &ax); 674 } 675 if(t != t0) { 676 // Convert 677 ax1 = ax; 678 n31 = n3; 679 ax.type = t; 680 n3.type = t; 681 gmove(&ax1, &ax); 682 gmove(&n31, &n3); 683 } 684 685 p2 = P; 686 if(nacl) { 687 // Native Client does not relay the divide-by-zero trap 688 // to the executing program, so we must insert a check 689 // for ourselves. 690 nodconst(&n4, t, 0); 691 gins(optoas(OCMP, t), &n3, &n4); 692 p1 = gbranch(optoas(ONE, t), T, +1); 693 if(panicdiv == N) 694 panicdiv = sysfunc("panicdivide"); 695 ginscall(panicdiv, -1); 696 patch(p1, pc); 697 } 698 if(check) { 699 nodconst(&n4, t, -1); 700 gins(optoas(OCMP, t), &n3, &n4); 701 p1 = gbranch(optoas(ONE, t), T, +1); 702 if(op == ODIV) { 703 // a / (-1) is -a. 704 gins(optoas(OMINUS, t), N, &ax); 705 gmove(&ax, res); 706 } else { 707 // a % (-1) is 0. 708 nodconst(&n4, t, 0); 709 gmove(&n4, res); 710 } 711 p2 = gbranch(AJMP, T, 0); 712 patch(p1, pc); 713 } 714 savex(D_DX, &dx, &olddx, res, t); 715 if(!issigned[t->etype]) { 716 nodconst(&n4, t, 0); 717 gmove(&n4, &dx); 718 } else 719 gins(optoas(OEXTEND, t), N, N); 720 gins(a, &n3, N); 721 regfree(&n3); 722 if(op == ODIV) 723 gmove(&ax, res); 724 else 725 gmove(&dx, res); 726 restx(&dx, &olddx); 727 if(check) 728 patch(p2, pc); 729 restx(&ax, &oldax); 730 } 731 732 /* 733 * register dr is one of the special ones (AX, CX, DI, SI, etc.). 734 * we need to use it. if it is already allocated as a temporary 735 * (r > 1; can only happen if a routine like sgen passed a 736 * special as cgen's res and then cgen used regalloc to reuse 737 * it as its own temporary), then move it for now to another 738 * register. caller must call restx to move it back. 739 * the move is not necessary if dr == res, because res is 740 * known to be dead. 741 */ 742 void 743 savex(int dr, Node *x, Node *oldx, Node *res, Type *t) 744 { 745 int r; 746 747 r = reg[dr]; 748 749 // save current ax and dx if they are live 750 // and not the destination 751 memset(oldx, 0, sizeof *oldx); 752 nodreg(x, t, dr); 753 if(r > 1 && !samereg(x, res)) { 754 regalloc(oldx, types[TINT64], N); 755 x->type = types[TINT64]; 756 gmove(x, oldx); 757 x->type = t; 758 oldx->ostk = r; // squirrel away old r value 759 reg[dr] = 1; 760 } 761 } 762 763 void 764 restx(Node *x, Node *oldx) 765 { 766 if(oldx->op != 0) { 767 x->type = types[TINT64]; 768 reg[x->val.u.reg] = oldx->ostk; 769 gmove(oldx, x); 770 regfree(oldx); 771 } 772 } 773 774 /* 775 * generate division according to op, one of: 776 * res = nl / nr 777 * res = nl % nr 778 */ 779 void 780 cgen_div(int op, Node *nl, Node *nr, Node *res) 781 { 782 Node n1, n2, n3; 783 int w, a; 784 Magic m; 785 786 if(nr->op != OLITERAL) 787 goto longdiv; 788 w = nl->type->width*8; 789 790 // Front end handled 32-bit division. We only need to handle 64-bit. 791 // try to do division by multiply by (2^w)/d 792 // see hacker's delight chapter 10 793 switch(simtype[nl->type->etype]) { 794 default: 795 goto longdiv; 796 797 case TUINT64: 798 m.w = w; 799 m.ud = mpgetfix(nr->val.u.xval); 800 umagic(&m); 801 if(m.bad) 802 break; 803 if(op == OMOD) 804 goto longmod; 805 806 cgenr(nl, &n1, N); 807 nodconst(&n2, nl->type, m.um); 808 regalloc(&n3, nl->type, res); 809 cgen_hmul(&n1, &n2, &n3); 810 811 if(m.ua) { 812 // need to add numerator accounting for overflow 813 gins(optoas(OADD, nl->type), &n1, &n3); 814 nodconst(&n2, nl->type, 1); 815 gins(optoas(ORROTC, nl->type), &n2, &n3); 816 nodconst(&n2, nl->type, m.s-1); 817 gins(optoas(ORSH, nl->type), &n2, &n3); 818 } else { 819 nodconst(&n2, nl->type, m.s); 820 gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx 821 } 822 823 gmove(&n3, res); 824 regfree(&n1); 825 regfree(&n3); 826 return; 827 828 case TINT64: 829 m.w = w; 830 m.sd = mpgetfix(nr->val.u.xval); 831 smagic(&m); 832 if(m.bad) 833 break; 834 if(op == OMOD) 835 goto longmod; 836 837 cgenr(nl, &n1, res); 838 nodconst(&n2, nl->type, m.sm); 839 regalloc(&n3, nl->type, N); 840 cgen_hmul(&n1, &n2, &n3); 841 842 if(m.sm < 0) { 843 // need to add numerator 844 gins(optoas(OADD, nl->type), &n1, &n3); 845 } 846 847 nodconst(&n2, nl->type, m.s); 848 gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 849 850 nodconst(&n2, nl->type, w-1); 851 gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg 852 gins(optoas(OSUB, nl->type), &n1, &n3); // added 853 854 if(m.sd < 0) { 855 // this could probably be removed 856 // by factoring it into the multiplier 857 gins(optoas(OMINUS, nl->type), N, &n3); 858 } 859 860 gmove(&n3, res); 861 regfree(&n1); 862 regfree(&n3); 863 return; 864 } 865 goto longdiv; 866 867 longdiv: 868 // division and mod using (slow) hardware instruction 869 dodiv(op, nl, nr, res); 870 return; 871 872 longmod: 873 // mod using formula A%B = A-(A/B*B) but 874 // we know that there is a fast algorithm for A/B 875 regalloc(&n1, nl->type, res); 876 cgen(nl, &n1); 877 regalloc(&n2, nl->type, N); 878 cgen_div(ODIV, &n1, nr, &n2); 879 a = optoas(OMUL, nl->type); 880 if(w == 8) { 881 // use 2-operand 16-bit multiply 882 // because there is no 2-operand 8-bit multiply 883 a = AIMULW; 884 } 885 if(!smallintconst(nr)) { 886 regalloc(&n3, nl->type, N); 887 cgen(nr, &n3); 888 gins(a, &n3, &n2); 889 regfree(&n3); 890 } else 891 gins(a, nr, &n2); 892 gins(optoas(OSUB, nl->type), &n2, &n1); 893 gmove(&n1, res); 894 regfree(&n1); 895 regfree(&n2); 896 } 897 898 /* 899 * generate high multiply: 900 * res = (nl*nr) >> width 901 */ 902 void 903 cgen_hmul(Node *nl, Node *nr, Node *res) 904 { 905 Type *t; 906 int a; 907 Node n1, n2, ax, dx, *tmp; 908 909 t = nl->type; 910 a = optoas(OHMUL, t); 911 if(nl->ullman < nr->ullman) { 912 tmp = nl; 913 nl = nr; 914 nr = tmp; 915 } 916 cgenr(nl, &n1, res); 917 cgenr(nr, &n2, N); 918 nodreg(&ax, t, D_AX); 919 gmove(&n1, &ax); 920 gins(a, &n2, N); 921 regfree(&n2); 922 regfree(&n1); 923 924 if(t->width == 1) { 925 // byte multiply behaves differently. 926 nodreg(&ax, t, D_AH); 927 nodreg(&dx, t, D_DX); 928 gmove(&ax, &dx); 929 } 930 nodreg(&dx, t, D_DX); 931 gmove(&dx, res); 932 } 933 934 /* 935 * generate shift according to op, one of: 936 * res = nl << nr 937 * res = nl >> nr 938 */ 939 void 940 cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) 941 { 942 Node n1, n2, n3, n4, n5, cx, oldcx; 943 int a, rcx; 944 Prog *p1; 945 uvlong sc; 946 Type *tcount; 947 948 a = optoas(op, nl->type); 949 950 if(nr->op == OLITERAL) { 951 regalloc(&n1, nl->type, res); 952 cgen(nl, &n1); 953 sc = mpgetfix(nr->val.u.xval); 954 if(sc >= nl->type->width*8) { 955 // large shift gets 2 shifts by width-1 956 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 957 gins(a, &n3, &n1); 958 gins(a, &n3, &n1); 959 } else 960 gins(a, nr, &n1); 961 gmove(&n1, res); 962 regfree(&n1); 963 goto ret; 964 } 965 966 if(nl->ullman >= UINF) { 967 tempname(&n4, nl->type); 968 cgen(nl, &n4); 969 nl = &n4; 970 } 971 if(nr->ullman >= UINF) { 972 tempname(&n5, nr->type); 973 cgen(nr, &n5); 974 nr = &n5; 975 } 976 977 rcx = reg[D_CX]; 978 nodreg(&n1, types[TUINT32], D_CX); 979 980 // Allow either uint32 or uint64 as shift type, 981 // to avoid unnecessary conversion from uint32 to uint64 982 // just to do the comparison. 983 tcount = types[simtype[nr->type->etype]]; 984 if(tcount->etype < TUINT32) 985 tcount = types[TUINT32]; 986 987 regalloc(&n1, nr->type, &n1); // to hold the shift type in CX 988 regalloc(&n3, tcount, &n1); // to clear high bits of CX 989 990 nodreg(&cx, types[TUINT64], D_CX); 991 memset(&oldcx, 0, sizeof oldcx); 992 if(rcx > 0 && !samereg(&cx, res)) { 993 regalloc(&oldcx, types[TUINT64], N); 994 gmove(&cx, &oldcx); 995 } 996 cx.type = tcount; 997 998 if(samereg(&cx, res)) 999 regalloc(&n2, nl->type, N); 1000 else 1001 regalloc(&n2, nl->type, res); 1002 if(nl->ullman >= nr->ullman) { 1003 cgen(nl, &n2); 1004 cgen(nr, &n1); 1005 gmove(&n1, &n3); 1006 } else { 1007 cgen(nr, &n1); 1008 gmove(&n1, &n3); 1009 cgen(nl, &n2); 1010 } 1011 regfree(&n3); 1012 1013 // test and fix up large shifts 1014 if(!bounded) { 1015 nodconst(&n3, tcount, nl->type->width*8); 1016 gins(optoas(OCMP, tcount), &n1, &n3); 1017 p1 = gbranch(optoas(OLT, tcount), T, +1); 1018 if(op == ORSH && issigned[nl->type->etype]) { 1019 nodconst(&n3, types[TUINT32], nl->type->width*8-1); 1020 gins(a, &n3, &n2); 1021 } else { 1022 nodconst(&n3, nl->type, 0); 1023 gmove(&n3, &n2); 1024 } 1025 patch(p1, pc); 1026 } 1027 1028 gins(a, &n1, &n2); 1029 1030 if(oldcx.op != 0) { 1031 cx.type = types[TUINT64]; 1032 gmove(&oldcx, &cx); 1033 regfree(&oldcx); 1034 } 1035 1036 gmove(&n2, res); 1037 1038 regfree(&n1); 1039 regfree(&n2); 1040 1041 ret: 1042 ; 1043 } 1044 1045 /* 1046 * generate byte multiply: 1047 * res = nl * nr 1048 * there is no 2-operand byte multiply instruction so 1049 * we do a full-width multiplication and truncate afterwards. 1050 */ 1051 void 1052 cgen_bmul(int op, Node *nl, Node *nr, Node *res) 1053 { 1054 Node n1, n2, n1b, n2b, *tmp; 1055 Type *t; 1056 int a; 1057 1058 // largest ullman on left. 1059 if(nl->ullman < nr->ullman) { 1060 tmp = nl; 1061 nl = nr; 1062 nr = tmp; 1063 } 1064 1065 // generate operands in "8-bit" registers. 1066 regalloc(&n1b, nl->type, res); 1067 cgen(nl, &n1b); 1068 regalloc(&n2b, nr->type, N); 1069 cgen(nr, &n2b); 1070 1071 // perform full-width multiplication. 1072 t = types[TUINT64]; 1073 if(issigned[nl->type->etype]) 1074 t = types[TINT64]; 1075 nodreg(&n1, t, n1b.val.u.reg); 1076 nodreg(&n2, t, n2b.val.u.reg); 1077 a = optoas(op, t); 1078 gins(a, &n2, &n1); 1079 1080 // truncate. 1081 gmove(&n1, res); 1082 regfree(&n1b); 1083 regfree(&n2b); 1084 } 1085 1086 void 1087 clearfat(Node *nl) 1088 { 1089 int64 w, c, q; 1090 Node n1, oldn1, ax, oldax, di, z; 1091 Prog *p; 1092 1093 /* clear a fat object */ 1094 if(debug['g']) 1095 dump("\nclearfat", nl); 1096 1097 w = nl->type->width; 1098 // Avoid taking the address for simple enough types. 1099 if(componentgen(N, nl)) 1100 return; 1101 1102 c = w % 8; // bytes 1103 q = w / 8; // quads 1104 1105 if(q < 4) { 1106 // Write sequence of MOV 0, off(base) instead of using STOSQ. 1107 // The hope is that although the code will be slightly longer, 1108 // the MOVs will have no dependencies and pipeline better 1109 // than the unrolled STOSQ loop. 1110 // NOTE: Must use agen, not igen, so that optimizer sees address 1111 // being taken. We are not writing on field boundaries. 1112 agenr(nl, &n1, N); 1113 n1.op = OINDREG; 1114 nodconst(&z, types[TUINT64], 0); 1115 while(q-- > 0) { 1116 n1.type = z.type; 1117 gins(AMOVQ, &z, &n1); 1118 n1.xoffset += 8; 1119 } 1120 if(c >= 4) { 1121 nodconst(&z, types[TUINT32], 0); 1122 n1.type = z.type; 1123 gins(AMOVL, &z, &n1); 1124 n1.xoffset += 4; 1125 c -= 4; 1126 } 1127 nodconst(&z, types[TUINT8], 0); 1128 while(c-- > 0) { 1129 n1.type = z.type; 1130 gins(AMOVB, &z, &n1); 1131 n1.xoffset++; 1132 } 1133 regfree(&n1); 1134 return; 1135 } 1136 1137 savex(D_DI, &n1, &oldn1, N, types[tptr]); 1138 agen(nl, &n1); 1139 1140 savex(D_AX, &ax, &oldax, N, types[tptr]); 1141 gconreg(AMOVL, 0, D_AX); 1142 1143 if(q > 128 || nacl) { 1144 gconreg(movptr, q, D_CX); 1145 gins(AREP, N, N); // repeat 1146 gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ 1147 } else { 1148 p = gins(ADUFFZERO, N, N); 1149 p->to.type = D_ADDR; 1150 p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); 1151 // 2 and 128 = magic constants: see ../../runtime/asm_amd64.s 1152 p->to.offset = 2*(128-q); 1153 } 1154 1155 z = ax; 1156 di = n1; 1157 if(w >= 8 && c >= 4) { 1158 di.op = OINDREG; 1159 di.type = z.type = types[TINT64]; 1160 p = gins(AMOVQ, &z, &di); 1161 p->to.scale = 1; 1162 p->to.offset = c-8; 1163 } else if(c >= 4) { 1164 di.op = OINDREG; 1165 di.type = z.type = types[TINT32]; 1166 p = gins(AMOVL, &z, &di); 1167 if(c > 4) { 1168 p = gins(AMOVL, &z, &di); 1169 p->to.scale = 1; 1170 p->to.offset = c-4; 1171 } 1172 } else 1173 while(c > 0) { 1174 gins(ASTOSB, N, N); // STOB AL,*(DI)+ 1175 c--; 1176 } 1177 1178 restx(&n1, &oldn1); 1179 restx(&ax, &oldax); 1180 } 1181 1182 // Called after regopt and peep have run. 1183 // Expand CHECKNIL pseudo-op into actual nil pointer check. 1184 void 1185 expandchecks(Prog *firstp) 1186 { 1187 Prog *p, *p1, *p2; 1188 1189 for(p = firstp; p != P; p = p->link) { 1190 if(p->as != ACHECKNIL) 1191 continue; 1192 if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers 1193 warnl(p->lineno, "generated nil check"); 1194 // check is 1195 // CMP arg, $0 1196 // JNE 2(PC) (likely) 1197 // MOV AX, 0 1198 p1 = mal(sizeof *p1); 1199 p2 = mal(sizeof *p2); 1200 clearp(p1); 1201 clearp(p2); 1202 p1->link = p2; 1203 p2->link = p->link; 1204 p->link = p1; 1205 p1->lineno = p->lineno; 1206 p2->lineno = p->lineno; 1207 p1->pc = 9999; 1208 p2->pc = 9999; 1209 p->as = cmpptr; 1210 p->to.type = D_CONST; 1211 p->to.offset = 0; 1212 p1->as = AJNE; 1213 p1->from.type = D_CONST; 1214 p1->from.offset = 1; // likely 1215 p1->to.type = D_BRANCH; 1216 p1->to.u.branch = p2->link; 1217 // crash by write to memory address 0. 1218 // if possible, since we know arg is 0, use 0(arg), 1219 // which will be shorter to encode than plain 0. 1220 p2->as = AMOVL; 1221 p2->from.type = D_AX; 1222 if(regtyp(&p->from)) 1223 p2->to.type = p->from.type + D_INDIR; 1224 else 1225 p2->to.type = D_INDIR+D_NONE; 1226 p2->to.offset = 0; 1227 } 1228 }