github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/cmd/compile/internal/amd64/ggen.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/internal/obj" 10 "cmd/internal/obj/x86" 11 ) 12 13 func defframe(ptxt *obj.Prog) { 14 var n *gc.Node 15 16 // fill in argument size, stack size 17 ptxt.To.Type = obj.TYPE_TEXTSIZE 18 19 ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr))) 20 frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg))) 21 ptxt.To.Offset = int64(frame) 22 23 // insert code to zero ambiguously live variables 24 // so that the garbage collector only sees initialized values 25 // when it looks for pointers. 26 p := ptxt 27 28 hi := int64(0) 29 lo := hi 30 ax := uint32(0) 31 x0 := uint32(0) 32 33 // iterate through declarations - they are sorted in decreasing xoffset order. 34 for l := gc.Curfn.Func.Dcl; l != nil; l = l.Next { 35 n = l.N 36 if !n.Name.Needzero { 37 continue 38 } 39 if n.Class != gc.PAUTO { 40 gc.Fatalf("needzero class %d", n.Class) 41 } 42 if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 { 43 gc.Fatalf("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset)) 44 } 45 46 if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthreg) { 47 // merge with range we already have 48 lo = n.Xoffset 49 50 continue 51 } 52 53 // zero old range 54 p = zerorange(p, int64(frame), lo, hi, &ax, &x0) 55 56 // set new range 57 hi = n.Xoffset + n.Type.Width 58 59 lo = n.Xoffset 60 } 61 62 // zero final range 63 zerorange(p, int64(frame), lo, hi, &ax, &x0) 64 } 65 66 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD, 67 // See runtime/mkduff.go. 68 const ( 69 dzBlocks = 16 // number of MOV/ADD blocks 70 dzBlockLen = 4 // number of clears per block 71 dzBlockSize = 19 // size of instructions in a single block 72 dzMovSize = 4 // size of single MOV instruction w/ offset 73 dzAddSize = 4 // size of single ADD instruction 74 dzClearStep = 16 // number of bytes cleared by each MOV instruction 75 76 dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block 77 dzSize = dzBlocks * dzBlockSize 78 ) 79 80 // dzOff returns the offset for a jump into DUFFZERO. 81 // b is the number of bytes to zero. 82 func dzOff(b int64) int64 { 83 off := int64(dzSize) 84 off -= b / dzClearLen * dzBlockSize 85 tailLen := b % dzClearLen 86 if tailLen >= dzClearStep { 87 off -= dzAddSize + dzMovSize*(tailLen/dzClearStep) 88 } 89 return off 90 } 91 92 // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO. 93 // b is the number of bytes to zero. 94 func dzDI(b int64) int64 { 95 tailLen := b % dzClearLen 96 if tailLen < dzClearStep { 97 return 0 98 } 99 tailSteps := tailLen / dzClearStep 100 return -dzClearStep * (dzBlockLen - tailSteps) 101 } 102 103 func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32, x0 *uint32) *obj.Prog { 104 cnt := hi - lo 105 if cnt == 0 { 106 return p 107 } 108 109 if cnt%int64(gc.Widthreg) != 0 { 110 // should only happen with nacl 111 if cnt%int64(gc.Widthptr) != 0 { 112 gc.Fatalf("zerorange count not a multiple of widthptr %d", cnt) 113 } 114 if *ax == 0 { 115 p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) 116 *ax = 1 117 } 118 p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo) 119 lo += int64(gc.Widthptr) 120 cnt -= int64(gc.Widthptr) 121 } 122 123 if cnt == 8 { 124 if *ax == 0 { 125 p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) 126 *ax = 1 127 } 128 p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo) 129 } else if cnt <= int64(8*gc.Widthreg) { 130 if *x0 == 0 { 131 p = appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0) 132 *x0 = 1 133 } 134 135 for i := int64(0); i < cnt/16; i++ { 136 p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i*16) 137 } 138 139 if cnt%16 != 0 { 140 p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+cnt-int64(16)) 141 } 142 } else if !gc.Nacl && (cnt <= int64(128*gc.Widthreg)) { 143 if *x0 == 0 { 144 p = appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0) 145 *x0 = 1 146 } 147 148 p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0) 149 p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt)) 150 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 151 152 if cnt%16 != 0 { 153 p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8)) 154 } 155 } else { 156 if *ax == 0 { 157 p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) 158 *ax = 1 159 } 160 161 p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0) 162 p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0) 163 p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) 164 p = appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) 165 } 166 167 return p 168 } 169 170 func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog { 171 q := gc.Ctxt.NewProg() 172 gc.Clearp(q) 173 q.As = int16(as) 174 q.Lineno = p.Lineno 175 q.From.Type = int16(ftype) 176 q.From.Reg = int16(freg) 177 q.From.Offset = foffset 178 q.To.Type = int16(ttype) 179 q.To.Reg = int16(treg) 180 q.To.Offset = toffset 181 q.Link = p.Link 182 p.Link = q 183 return q 184 } 185 186 var panicdiv *gc.Node 187 188 /* 189 * generate division. 190 * generates one of: 191 * res = nl / nr 192 * res = nl % nr 193 * according to op. 194 */ 195 func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) { 196 // Have to be careful about handling 197 // most negative int divided by -1 correctly. 198 // The hardware will trap. 199 // Also the byte divide instruction needs AH, 200 // which we otherwise don't have to deal with. 201 // Easiest way to avoid for int8, int16: use int32. 202 // For int32 and int64, use explicit test. 203 // Could use int64 hw for int32. 204 t := nl.Type 205 206 t0 := t 207 check := false 208 if gc.Issigned[t.Etype] { 209 check = true 210 if gc.Isconst(nl, gc.CTINT) && nl.Int() != -(1<<uint64(t.Width*8-1)) { 211 check = false 212 } else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 { 213 check = false 214 } 215 } 216 217 if t.Width < 4 { 218 if gc.Issigned[t.Etype] { 219 t = gc.Types[gc.TINT32] 220 } else { 221 t = gc.Types[gc.TUINT32] 222 } 223 check = false 224 } 225 226 a := optoas(op, t) 227 228 var n3 gc.Node 229 gc.Regalloc(&n3, t0, nil) 230 var ax gc.Node 231 var oldax gc.Node 232 if nl.Ullman >= nr.Ullman { 233 savex(x86.REG_AX, &ax, &oldax, res, t0) 234 gc.Cgen(nl, &ax) 235 gc.Regalloc(&ax, t0, &ax) // mark ax live during cgen 236 gc.Cgen(nr, &n3) 237 gc.Regfree(&ax) 238 } else { 239 gc.Cgen(nr, &n3) 240 savex(x86.REG_AX, &ax, &oldax, res, t0) 241 gc.Cgen(nl, &ax) 242 } 243 244 if t != t0 { 245 // Convert 246 ax1 := ax 247 248 n31 := n3 249 ax.Type = t 250 n3.Type = t 251 gmove(&ax1, &ax) 252 gmove(&n31, &n3) 253 } 254 255 var n4 gc.Node 256 if gc.Nacl { 257 // Native Client does not relay the divide-by-zero trap 258 // to the executing program, so we must insert a check 259 // for ourselves. 260 gc.Nodconst(&n4, t, 0) 261 262 gins(optoas(gc.OCMP, t), &n3, &n4) 263 p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) 264 if panicdiv == nil { 265 panicdiv = gc.Sysfunc("panicdivide") 266 } 267 gc.Ginscall(panicdiv, -1) 268 gc.Patch(p1, gc.Pc) 269 } 270 271 var p2 *obj.Prog 272 if check { 273 gc.Nodconst(&n4, t, -1) 274 gins(optoas(gc.OCMP, t), &n3, &n4) 275 p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) 276 if op == gc.ODIV { 277 // a / (-1) is -a. 278 gins(optoas(gc.OMINUS, t), nil, &ax) 279 280 gmove(&ax, res) 281 } else { 282 // a % (-1) is 0. 283 gc.Nodconst(&n4, t, 0) 284 285 gmove(&n4, res) 286 } 287 288 p2 = gc.Gbranch(obj.AJMP, nil, 0) 289 gc.Patch(p1, gc.Pc) 290 } 291 292 var olddx gc.Node 293 var dx gc.Node 294 savex(x86.REG_DX, &dx, &olddx, res, t) 295 if !gc.Issigned[t.Etype] { 296 gc.Nodconst(&n4, t, 0) 297 gmove(&n4, &dx) 298 } else { 299 gins(optoas(gc.OEXTEND, t), nil, nil) 300 } 301 gins(a, &n3, nil) 302 gc.Regfree(&n3) 303 if op == gc.ODIV { 304 gmove(&ax, res) 305 } else { 306 gmove(&dx, res) 307 } 308 restx(&dx, &olddx) 309 if check { 310 gc.Patch(p2, gc.Pc) 311 } 312 restx(&ax, &oldax) 313 } 314 315 /* 316 * register dr is one of the special ones (AX, CX, DI, SI, etc.). 317 * we need to use it. if it is already allocated as a temporary 318 * (r > 1; can only happen if a routine like sgen passed a 319 * special as cgen's res and then cgen used regalloc to reuse 320 * it as its own temporary), then move it for now to another 321 * register. caller must call restx to move it back. 322 * the move is not necessary if dr == res, because res is 323 * known to be dead. 324 */ 325 func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) { 326 r := uint8(gc.GetReg(dr)) 327 328 // save current ax and dx if they are live 329 // and not the destination 330 *oldx = gc.Node{} 331 332 gc.Nodreg(x, t, dr) 333 if r > 1 && !gc.Samereg(x, res) { 334 gc.Regalloc(oldx, gc.Types[gc.TINT64], nil) 335 x.Type = gc.Types[gc.TINT64] 336 gmove(x, oldx) 337 x.Type = t 338 // TODO(marvin): Fix Node.EType type union. 339 oldx.Etype = gc.EType(r) // squirrel away old r value 340 gc.SetReg(dr, 1) 341 } 342 } 343 344 func restx(x *gc.Node, oldx *gc.Node) { 345 if oldx.Op != 0 { 346 x.Type = gc.Types[gc.TINT64] 347 gc.SetReg(int(x.Reg), int(oldx.Etype)) 348 gmove(oldx, x) 349 gc.Regfree(oldx) 350 } 351 } 352 353 /* 354 * generate high multiply: 355 * res = (nl*nr) >> width 356 */ 357 func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { 358 t := nl.Type 359 a := optoas(gc.OHMUL, t) 360 if nl.Ullman < nr.Ullman { 361 nl, nr = nr, nl 362 } 363 364 var n1 gc.Node 365 gc.Cgenr(nl, &n1, res) 366 var n2 gc.Node 367 gc.Cgenr(nr, &n2, nil) 368 var ax, oldax, dx, olddx gc.Node 369 savex(x86.REG_AX, &ax, &oldax, res, gc.Types[gc.TUINT64]) 370 savex(x86.REG_DX, &dx, &olddx, res, gc.Types[gc.TUINT64]) 371 gmove(&n1, &ax) 372 gins(a, &n2, nil) 373 gc.Regfree(&n2) 374 gc.Regfree(&n1) 375 376 if t.Width == 1 { 377 // byte multiply behaves differently. 378 var byteAH, byteDX gc.Node 379 gc.Nodreg(&byteAH, t, x86.REG_AH) 380 gc.Nodreg(&byteDX, t, x86.REG_DX) 381 gmove(&byteAH, &byteDX) 382 } 383 gmove(&dx, res) 384 385 restx(&ax, &oldax) 386 restx(&dx, &olddx) 387 } 388 389 /* 390 * generate shift according to op, one of: 391 * res = nl << nr 392 * res = nl >> nr 393 */ 394 func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { 395 a := optoas(op, nl.Type) 396 397 if nr.Op == gc.OLITERAL { 398 var n1 gc.Node 399 gc.Regalloc(&n1, nl.Type, res) 400 gc.Cgen(nl, &n1) 401 sc := uint64(nr.Int()) 402 if sc >= uint64(nl.Type.Width*8) { 403 // large shift gets 2 shifts by width-1 404 var n3 gc.Node 405 gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) 406 407 gins(a, &n3, &n1) 408 gins(a, &n3, &n1) 409 } else { 410 gins(a, nr, &n1) 411 } 412 gmove(&n1, res) 413 gc.Regfree(&n1) 414 return 415 } 416 417 if nl.Ullman >= gc.UINF { 418 var n4 gc.Node 419 gc.Tempname(&n4, nl.Type) 420 gc.Cgen(nl, &n4) 421 nl = &n4 422 } 423 424 if nr.Ullman >= gc.UINF { 425 var n5 gc.Node 426 gc.Tempname(&n5, nr.Type) 427 gc.Cgen(nr, &n5) 428 nr = &n5 429 } 430 431 rcx := gc.GetReg(x86.REG_CX) 432 var n1 gc.Node 433 gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX) 434 435 // Allow either uint32 or uint64 as shift type, 436 // to avoid unnecessary conversion from uint32 to uint64 437 // just to do the comparison. 438 tcount := gc.Types[gc.Simtype[nr.Type.Etype]] 439 440 if tcount.Etype < gc.TUINT32 { 441 tcount = gc.Types[gc.TUINT32] 442 } 443 444 gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX 445 var n3 gc.Node 446 gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX 447 448 var cx gc.Node 449 gc.Nodreg(&cx, gc.Types[gc.TUINT64], x86.REG_CX) 450 451 var oldcx gc.Node 452 if rcx > 0 && !gc.Samereg(&cx, res) { 453 gc.Regalloc(&oldcx, gc.Types[gc.TUINT64], nil) 454 gmove(&cx, &oldcx) 455 } 456 457 cx.Type = tcount 458 459 var n2 gc.Node 460 if gc.Samereg(&cx, res) { 461 gc.Regalloc(&n2, nl.Type, nil) 462 } else { 463 gc.Regalloc(&n2, nl.Type, res) 464 } 465 if nl.Ullman >= nr.Ullman { 466 gc.Cgen(nl, &n2) 467 gc.Cgen(nr, &n1) 468 gmove(&n1, &n3) 469 } else { 470 gc.Cgen(nr, &n1) 471 gmove(&n1, &n3) 472 gc.Cgen(nl, &n2) 473 } 474 475 gc.Regfree(&n3) 476 477 // test and fix up large shifts 478 if !bounded { 479 gc.Nodconst(&n3, tcount, nl.Type.Width*8) 480 gins(optoas(gc.OCMP, tcount), &n1, &n3) 481 p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, +1) 482 if op == gc.ORSH && gc.Issigned[nl.Type.Etype] { 483 gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) 484 gins(a, &n3, &n2) 485 } else { 486 gc.Nodconst(&n3, nl.Type, 0) 487 gmove(&n3, &n2) 488 } 489 490 gc.Patch(p1, gc.Pc) 491 } 492 493 gins(a, &n1, &n2) 494 495 if oldcx.Op != 0 { 496 cx.Type = gc.Types[gc.TUINT64] 497 gmove(&oldcx, &cx) 498 gc.Regfree(&oldcx) 499 } 500 501 gmove(&n2, res) 502 503 gc.Regfree(&n1) 504 gc.Regfree(&n2) 505 } 506 507 /* 508 * generate byte multiply: 509 * res = nl * nr 510 * there is no 2-operand byte multiply instruction so 511 * we do a full-width multiplication and truncate afterwards. 512 */ 513 func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool { 514 if optoas(op, nl.Type) != x86.AIMULB { 515 return false 516 } 517 518 // largest ullman on left. 519 if nl.Ullman < nr.Ullman { 520 nl, nr = nr, nl 521 } 522 523 // generate operands in "8-bit" registers. 524 var n1b gc.Node 525 gc.Regalloc(&n1b, nl.Type, res) 526 527 gc.Cgen(nl, &n1b) 528 var n2b gc.Node 529 gc.Regalloc(&n2b, nr.Type, nil) 530 gc.Cgen(nr, &n2b) 531 532 // perform full-width multiplication. 533 t := gc.Types[gc.TUINT64] 534 535 if gc.Issigned[nl.Type.Etype] { 536 t = gc.Types[gc.TINT64] 537 } 538 var n1 gc.Node 539 gc.Nodreg(&n1, t, int(n1b.Reg)) 540 var n2 gc.Node 541 gc.Nodreg(&n2, t, int(n2b.Reg)) 542 a := optoas(op, t) 543 gins(a, &n2, &n1) 544 545 // truncate. 546 gmove(&n1, res) 547 548 gc.Regfree(&n1b) 549 gc.Regfree(&n2b) 550 return true 551 } 552 553 func clearfat(nl *gc.Node) { 554 /* clear a fat object */ 555 if gc.Debug['g'] != 0 { 556 gc.Dump("\nclearfat", nl) 557 } 558 559 // Avoid taking the address for simple enough types. 560 if gc.Componentgen(nil, nl) { 561 return 562 } 563 564 w := nl.Type.Width 565 566 if w > 1024 || (gc.Nacl && w >= 64) { 567 var oldn1 gc.Node 568 var n1 gc.Node 569 savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) 570 gc.Agen(nl, &n1) 571 572 var ax gc.Node 573 var oldax gc.Node 574 savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr]) 575 gconreg(x86.AMOVL, 0, x86.REG_AX) 576 gconreg(movptr, w/8, x86.REG_CX) 577 578 gins(x86.AREP, nil, nil) // repeat 579 gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+ 580 581 if w%8 != 0 { 582 n1.Op = gc.OINDREG 583 clearfat_tail(&n1, w%8) 584 } 585 586 restx(&n1, &oldn1) 587 restx(&ax, &oldax) 588 return 589 } 590 591 if w >= 64 { 592 var oldn1 gc.Node 593 var n1 gc.Node 594 savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) 595 gc.Agen(nl, &n1) 596 597 var vec_zero gc.Node 598 var old_x0 gc.Node 599 savex(x86.REG_X0, &vec_zero, &old_x0, nil, gc.Types[gc.TFLOAT64]) 600 gins(x86.AXORPS, &vec_zero, &vec_zero) 601 602 if di := dzDI(w); di != 0 { 603 gconreg(addptr, di, x86.REG_DI) 604 } 605 p := gins(obj.ADUFFZERO, nil, nil) 606 p.To.Type = obj.TYPE_ADDR 607 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 608 p.To.Offset = dzOff(w) 609 610 if w%16 != 0 { 611 n1.Op = gc.OINDREG 612 n1.Xoffset -= 16 - w%16 613 gins(x86.AMOVUPS, &vec_zero, &n1) 614 } 615 616 restx(&vec_zero, &old_x0) 617 restx(&n1, &oldn1) 618 return 619 } 620 621 // NOTE: Must use agen, not igen, so that optimizer sees address 622 // being taken. We are not writing on field boundaries. 623 var n1 gc.Node 624 gc.Agenr(nl, &n1, nil) 625 n1.Op = gc.OINDREG 626 627 clearfat_tail(&n1, w) 628 629 gc.Regfree(&n1) 630 } 631 632 func clearfat_tail(n1 *gc.Node, b int64) { 633 if b >= 16 { 634 var vec_zero gc.Node 635 gc.Regalloc(&vec_zero, gc.Types[gc.TFLOAT64], nil) 636 gins(x86.AXORPS, &vec_zero, &vec_zero) 637 638 for b >= 16 { 639 gins(x86.AMOVUPS, &vec_zero, n1) 640 n1.Xoffset += 16 641 b -= 16 642 } 643 644 // MOVUPS X0, off(base) is a few bytes shorter than MOV 0, off(base) 645 if b != 0 { 646 n1.Xoffset -= 16 - b 647 gins(x86.AMOVUPS, &vec_zero, n1) 648 } 649 650 gc.Regfree(&vec_zero) 651 return 652 } 653 654 // Write sequence of MOV 0, off(base) instead of using STOSQ. 655 // The hope is that although the code will be slightly longer, 656 // the MOVs will have no dependencies and pipeline better 657 // than the unrolled STOSQ loop. 658 var z gc.Node 659 gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) 660 if b >= 8 { 661 n1.Type = z.Type 662 gins(x86.AMOVQ, &z, n1) 663 n1.Xoffset += 8 664 b -= 8 665 666 if b != 0 { 667 n1.Xoffset -= 8 - b 668 gins(x86.AMOVQ, &z, n1) 669 } 670 return 671 } 672 673 if b >= 4 { 674 gc.Nodconst(&z, gc.Types[gc.TUINT32], 0) 675 n1.Type = z.Type 676 gins(x86.AMOVL, &z, n1) 677 n1.Xoffset += 4 678 b -= 4 679 680 if b != 0 { 681 n1.Xoffset -= 4 - b 682 gins(x86.AMOVL, &z, n1) 683 } 684 return 685 } 686 687 if b >= 2 { 688 gc.Nodconst(&z, gc.Types[gc.TUINT16], 0) 689 n1.Type = z.Type 690 gins(x86.AMOVW, &z, n1) 691 n1.Xoffset += 2 692 b -= 2 693 } 694 695 gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) 696 for b > 0 { 697 n1.Type = z.Type 698 gins(x86.AMOVB, &z, n1) 699 n1.Xoffset++ 700 b-- 701 } 702 703 } 704 705 // Called after regopt and peep have run. 706 // Expand CHECKNIL pseudo-op into actual nil pointer check. 707 func expandchecks(firstp *obj.Prog) { 708 var p1 *obj.Prog 709 var p2 *obj.Prog 710 711 for p := firstp; p != nil; p = p.Link { 712 if p.As != obj.ACHECKNIL { 713 continue 714 } 715 if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers 716 gc.Warnl(int(p.Lineno), "generated nil check") 717 } 718 719 // check is 720 // CMP arg, $0 721 // JNE 2(PC) (likely) 722 // MOV AX, 0 723 p1 = gc.Ctxt.NewProg() 724 725 p2 = gc.Ctxt.NewProg() 726 gc.Clearp(p1) 727 gc.Clearp(p2) 728 p1.Link = p2 729 p2.Link = p.Link 730 p.Link = p1 731 p1.Lineno = p.Lineno 732 p2.Lineno = p.Lineno 733 p1.Pc = 9999 734 p2.Pc = 9999 735 p.As = int16(cmpptr) 736 p.To.Type = obj.TYPE_CONST 737 p.To.Offset = 0 738 p1.As = x86.AJNE 739 p1.From.Type = obj.TYPE_CONST 740 p1.From.Offset = 1 // likely 741 p1.To.Type = obj.TYPE_BRANCH 742 p1.To.Val = p2.Link 743 744 // crash by write to memory address 0. 745 // if possible, since we know arg is 0, use 0(arg), 746 // which will be shorter to encode than plain 0. 747 p2.As = x86.AMOVL 748 749 p2.From.Type = obj.TYPE_REG 750 p2.From.Reg = x86.REG_AX 751 if regtyp(&p.From) { 752 p2.To.Type = obj.TYPE_MEM 753 p2.To.Reg = p.From.Reg 754 } else { 755 p2.To.Type = obj.TYPE_MEM 756 p2.To.Reg = x86.REG_NONE 757 } 758 759 p2.To.Offset = 0 760 } 761 } 762 763 // addr += index*width if possible. 764 func addindex(index *gc.Node, width int64, addr *gc.Node) bool { 765 switch width { 766 case 1, 2, 4, 8: 767 p1 := gins(x86.ALEAQ, index, addr) 768 p1.From.Type = obj.TYPE_MEM 769 p1.From.Scale = int16(width) 770 p1.From.Index = p1.From.Reg 771 p1.From.Reg = p1.To.Reg 772 return true 773 } 774 return false 775 } 776 777 // res = runtime.getg() 778 func getg(res *gc.Node) { 779 var n1 gc.Node 780 gc.Regalloc(&n1, res.Type, res) 781 mov := optoas(gc.OAS, gc.Types[gc.Tptr]) 782 p := gins(mov, nil, &n1) 783 p.From.Type = obj.TYPE_REG 784 p.From.Reg = x86.REG_TLS 785 p = gins(mov, nil, &n1) 786 p.From = p.To 787 p.From.Type = obj.TYPE_MEM 788 p.From.Index = x86.REG_TLS 789 p.From.Scale = 1 790 gmove(&n1, res) 791 gc.Regfree(&n1) 792 }