github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/amd64/ggen.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/internal/obj" 10 "cmd/internal/obj/x86" 11 ) 12 13 // no floating point in note handlers on Plan 9 14 var isPlan9 = obj.Getgoos() == "plan9" 15 16 func defframe(ptxt *obj.Prog) { 17 // fill in argument size, stack size 18 ptxt.To.Type = obj.TYPE_TEXTSIZE 19 20 ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.ArgWidth(), int64(gc.Widthptr))) 21 frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg))) 22 ptxt.To.Offset = int64(frame) 23 24 // insert code to zero ambiguously live variables 25 // so that the garbage collector only sees initialized values 26 // when it looks for pointers. 27 p := ptxt 28 29 hi := int64(0) 30 lo := hi 31 ax := uint32(0) 32 x0 := uint32(0) 33 34 // iterate through declarations - they are sorted in decreasing xoffset order. 35 for _, n := range gc.Curfn.Func.Dcl { 36 if !n.Name.Needzero { 37 continue 38 } 39 if n.Class != gc.PAUTO { 40 gc.Fatalf("needzero class %d", n.Class) 41 } 42 if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 { 43 gc.Fatalf("var %v has size %d offset %d", gc.Nconv(n, gc.FmtLong), int(n.Type.Width), int(n.Xoffset)) 44 } 45 46 if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthreg) { 47 // merge with range we already have 48 lo = n.Xoffset 49 50 continue 51 } 52 53 // zero old range 54 p = zerorange(p, int64(frame), lo, hi, &ax, &x0) 55 56 // set new range 57 hi = n.Xoffset + n.Type.Width 58 59 lo = n.Xoffset 60 } 61 62 // zero final range 63 zerorange(p, int64(frame), lo, hi, &ax, &x0) 64 } 65 66 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD, 67 // See runtime/mkduff.go. 68 const ( 69 dzBlocks = 16 // number of MOV/ADD blocks 70 dzBlockLen = 4 // number of clears per block 71 dzBlockSize = 19 // size of instructions in a single block 72 dzMovSize = 4 // size of single MOV instruction w/ offset 73 dzAddSize = 4 // size of single ADD instruction 74 dzClearStep = 16 // number of bytes cleared by each MOV instruction 75 76 dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block 77 dzSize = dzBlocks * dzBlockSize 78 ) 79 80 // dzOff returns the offset for a jump into DUFFZERO. 81 // b is the number of bytes to zero. 82 func dzOff(b int64) int64 { 83 off := int64(dzSize) 84 off -= b / dzClearLen * dzBlockSize 85 tailLen := b % dzClearLen 86 if tailLen >= dzClearStep { 87 off -= dzAddSize + dzMovSize*(tailLen/dzClearStep) 88 } 89 return off 90 } 91 92 // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO. 93 // b is the number of bytes to zero. 94 func dzDI(b int64) int64 { 95 tailLen := b % dzClearLen 96 if tailLen < dzClearStep { 97 return 0 98 } 99 tailSteps := tailLen / dzClearStep 100 return -dzClearStep * (dzBlockLen - tailSteps) 101 } 102 103 func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32, x0 *uint32) *obj.Prog { 104 cnt := hi - lo 105 if cnt == 0 { 106 return p 107 } 108 109 if cnt%int64(gc.Widthreg) != 0 { 110 // should only happen with nacl 111 if cnt%int64(gc.Widthptr) != 0 { 112 gc.Fatalf("zerorange count not a multiple of widthptr %d", cnt) 113 } 114 if *ax == 0 { 115 p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) 116 *ax = 1 117 } 118 p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo) 119 lo += int64(gc.Widthptr) 120 cnt -= int64(gc.Widthptr) 121 } 122 123 if cnt == 8 { 124 if *ax == 0 { 125 p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) 126 *ax = 1 127 } 128 p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo) 129 } else if !isPlan9 && cnt <= int64(8*gc.Widthreg) { 130 if *x0 == 0 { 131 p = appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0) 132 *x0 = 1 133 } 134 135 for i := int64(0); i < cnt/16; i++ { 136 p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i*16) 137 } 138 139 if cnt%16 != 0 { 140 p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+cnt-int64(16)) 141 } 142 } else if !gc.Nacl && !isPlan9 && (cnt <= int64(128*gc.Widthreg)) { 143 if *x0 == 0 { 144 p = appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0) 145 *x0 = 1 146 } 147 p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0) 148 p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt)) 149 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 150 151 if cnt%16 != 0 { 152 p = appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8)) 153 } 154 } else { 155 if *ax == 0 { 156 p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) 157 *ax = 1 158 } 159 160 p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0) 161 p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0) 162 p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) 163 p = appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) 164 } 165 166 return p 167 } 168 169 func appendpp(p *obj.Prog, as obj.As, ftype obj.AddrType, freg int, foffset int64, ttype obj.AddrType, treg int, toffset int64) *obj.Prog { 170 q := gc.Ctxt.NewProg() 171 gc.Clearp(q) 172 q.As = as 173 q.Lineno = p.Lineno 174 q.From.Type = ftype 175 q.From.Reg = int16(freg) 176 q.From.Offset = foffset 177 q.To.Type = ttype 178 q.To.Reg = int16(treg) 179 q.To.Offset = toffset 180 q.Link = p.Link 181 p.Link = q 182 return q 183 } 184 185 var panicdiv *gc.Node 186 187 /* 188 * generate division. 189 * generates one of: 190 * res = nl / nr 191 * res = nl % nr 192 * according to op. 193 */ 194 func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) { 195 // Have to be careful about handling 196 // most negative int divided by -1 correctly. 197 // The hardware will trap. 198 // Also the byte divide instruction needs AH, 199 // which we otherwise don't have to deal with. 200 // Easiest way to avoid for int8, int16: use int32. 201 // For int32 and int64, use explicit test. 202 // Could use int64 hw for int32. 203 t := nl.Type 204 205 t0 := t 206 check := false 207 if t.IsSigned() { 208 check = true 209 if gc.Isconst(nl, gc.CTINT) && nl.Int64() != -(1<<uint64(t.Width*8-1)) { 210 check = false 211 } else if gc.Isconst(nr, gc.CTINT) && nr.Int64() != -1 { 212 check = false 213 } 214 } 215 216 if t.Width < 4 { 217 if t.IsSigned() { 218 t = gc.Types[gc.TINT32] 219 } else { 220 t = gc.Types[gc.TUINT32] 221 } 222 check = false 223 } 224 225 a := optoas(op, t) 226 227 var n3 gc.Node 228 gc.Regalloc(&n3, t0, nil) 229 var ax gc.Node 230 var oldax gc.Node 231 if nl.Ullman >= nr.Ullman { 232 savex(x86.REG_AX, &ax, &oldax, res, t0) 233 gc.Cgen(nl, &ax) 234 gc.Regalloc(&ax, t0, &ax) // mark ax live during cgen 235 gc.Cgen(nr, &n3) 236 gc.Regfree(&ax) 237 } else { 238 gc.Cgen(nr, &n3) 239 savex(x86.REG_AX, &ax, &oldax, res, t0) 240 gc.Cgen(nl, &ax) 241 } 242 243 if t != t0 { 244 // Convert 245 ax1 := ax 246 247 n31 := n3 248 ax.Type = t 249 n3.Type = t 250 gmove(&ax1, &ax) 251 gmove(&n31, &n3) 252 } 253 254 var n4 gc.Node 255 if gc.Nacl { 256 // Native Client does not relay the divide-by-zero trap 257 // to the executing program, so we must insert a check 258 // for ourselves. 259 gc.Nodconst(&n4, t, 0) 260 261 gins(optoas(gc.OCMP, t), &n3, &n4) 262 p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) 263 if panicdiv == nil { 264 panicdiv = gc.Sysfunc("panicdivide") 265 } 266 gc.Ginscall(panicdiv, -1) 267 gc.Patch(p1, gc.Pc) 268 } 269 270 var p2 *obj.Prog 271 if check { 272 gc.Nodconst(&n4, t, -1) 273 gins(optoas(gc.OCMP, t), &n3, &n4) 274 p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) 275 if op == gc.ODIV { 276 // a / (-1) is -a. 277 gins(optoas(gc.OMINUS, t), nil, &ax) 278 279 gmove(&ax, res) 280 } else { 281 // a % (-1) is 0. 282 gc.Nodconst(&n4, t, 0) 283 284 gmove(&n4, res) 285 } 286 287 p2 = gc.Gbranch(obj.AJMP, nil, 0) 288 gc.Patch(p1, gc.Pc) 289 } 290 291 var olddx gc.Node 292 var dx gc.Node 293 savex(x86.REG_DX, &dx, &olddx, res, t) 294 if !t.IsSigned() { 295 gc.Nodconst(&n4, t, 0) 296 gmove(&n4, &dx) 297 } else { 298 gins(optoas(gc.OEXTEND, t), nil, nil) 299 } 300 gins(a, &n3, nil) 301 gc.Regfree(&n3) 302 if op == gc.ODIV { 303 gmove(&ax, res) 304 } else { 305 gmove(&dx, res) 306 } 307 restx(&dx, &olddx) 308 if check { 309 gc.Patch(p2, gc.Pc) 310 } 311 restx(&ax, &oldax) 312 } 313 314 /* 315 * register dr is one of the special ones (AX, CX, DI, SI, etc.). 316 * we need to use it. if it is already allocated as a temporary 317 * (r > 1; can only happen if a routine like sgen passed a 318 * special as cgen's res and then cgen used regalloc to reuse 319 * it as its own temporary), then move it for now to another 320 * register. caller must call restx to move it back. 321 * the move is not necessary if dr == res, because res is 322 * known to be dead. 323 */ 324 func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) { 325 r := uint8(gc.GetReg(dr)) 326 327 // save current ax and dx if they are live 328 // and not the destination 329 *oldx = gc.Node{} 330 331 gc.Nodreg(x, t, dr) 332 if r > 1 && !gc.Samereg(x, res) { 333 gc.Regalloc(oldx, gc.Types[gc.TINT64], nil) 334 x.Type = gc.Types[gc.TINT64] 335 gmove(x, oldx) 336 x.Type = t 337 // TODO(marvin): Fix Node.EType type union. 338 oldx.Etype = gc.EType(r) // squirrel away old r value 339 gc.SetReg(dr, 1) 340 } 341 } 342 343 func restx(x *gc.Node, oldx *gc.Node) { 344 if oldx.Op != 0 { 345 x.Type = gc.Types[gc.TINT64] 346 gc.SetReg(int(x.Reg), int(oldx.Etype)) 347 gmove(oldx, x) 348 gc.Regfree(oldx) 349 } 350 } 351 352 /* 353 * generate high multiply: 354 * res = (nl*nr) >> width 355 */ 356 func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { 357 t := nl.Type 358 a := optoas(gc.OHMUL, t) 359 if nl.Ullman < nr.Ullman { 360 nl, nr = nr, nl 361 } 362 363 var n1 gc.Node 364 gc.Cgenr(nl, &n1, res) 365 var n2 gc.Node 366 gc.Cgenr(nr, &n2, nil) 367 var ax, oldax, dx, olddx gc.Node 368 savex(x86.REG_AX, &ax, &oldax, res, gc.Types[gc.TUINT64]) 369 savex(x86.REG_DX, &dx, &olddx, res, gc.Types[gc.TUINT64]) 370 gmove(&n1, &ax) 371 gins(a, &n2, nil) 372 gc.Regfree(&n2) 373 gc.Regfree(&n1) 374 375 if t.Width == 1 { 376 // byte multiply behaves differently. 377 var byteAH, byteDX gc.Node 378 gc.Nodreg(&byteAH, t, x86.REG_AH) 379 gc.Nodreg(&byteDX, t, x86.REG_DX) 380 gmove(&byteAH, &byteDX) 381 } 382 gmove(&dx, res) 383 384 restx(&ax, &oldax) 385 restx(&dx, &olddx) 386 } 387 388 /* 389 * generate shift according to op, one of: 390 * res = nl << nr 391 * res = nl >> nr 392 */ 393 func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { 394 a := optoas(op, nl.Type) 395 396 if nr.Op == gc.OLITERAL { 397 var n1 gc.Node 398 gc.Regalloc(&n1, nl.Type, res) 399 gc.Cgen(nl, &n1) 400 sc := uint64(nr.Int64()) 401 if sc >= uint64(nl.Type.Width*8) { 402 // large shift gets 2 shifts by width-1 403 var n3 gc.Node 404 gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) 405 406 gins(a, &n3, &n1) 407 gins(a, &n3, &n1) 408 } else { 409 gins(a, nr, &n1) 410 } 411 gmove(&n1, res) 412 gc.Regfree(&n1) 413 return 414 } 415 416 if nl.Ullman >= gc.UINF { 417 var n4 gc.Node 418 gc.Tempname(&n4, nl.Type) 419 gc.Cgen(nl, &n4) 420 nl = &n4 421 } 422 423 if nr.Ullman >= gc.UINF { 424 var n5 gc.Node 425 gc.Tempname(&n5, nr.Type) 426 gc.Cgen(nr, &n5) 427 nr = &n5 428 } 429 430 rcx := gc.GetReg(x86.REG_CX) 431 var n1 gc.Node 432 gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX) 433 434 // Allow either uint32 or uint64 as shift type, 435 // to avoid unnecessary conversion from uint32 to uint64 436 // just to do the comparison. 437 tcount := gc.Types[gc.Simtype[nr.Type.Etype]] 438 439 if tcount.Etype < gc.TUINT32 { 440 tcount = gc.Types[gc.TUINT32] 441 } 442 443 gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX 444 var n3 gc.Node 445 gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX 446 447 var cx gc.Node 448 gc.Nodreg(&cx, gc.Types[gc.TUINT64], x86.REG_CX) 449 450 var oldcx gc.Node 451 if rcx > 0 && !gc.Samereg(&cx, res) { 452 gc.Regalloc(&oldcx, gc.Types[gc.TUINT64], nil) 453 gmove(&cx, &oldcx) 454 } 455 456 cx.Type = tcount 457 458 var n2 gc.Node 459 if gc.Samereg(&cx, res) { 460 gc.Regalloc(&n2, nl.Type, nil) 461 } else { 462 gc.Regalloc(&n2, nl.Type, res) 463 } 464 if nl.Ullman >= nr.Ullman { 465 gc.Cgen(nl, &n2) 466 gc.Cgen(nr, &n1) 467 gmove(&n1, &n3) 468 } else { 469 gc.Cgen(nr, &n1) 470 gmove(&n1, &n3) 471 gc.Cgen(nl, &n2) 472 } 473 474 gc.Regfree(&n3) 475 476 // test and fix up large shifts 477 if !bounded { 478 gc.Nodconst(&n3, tcount, nl.Type.Width*8) 479 gins(optoas(gc.OCMP, tcount), &n1, &n3) 480 p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, +1) 481 if op == gc.ORSH && nl.Type.IsSigned() { 482 gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) 483 gins(a, &n3, &n2) 484 } else { 485 gc.Nodconst(&n3, nl.Type, 0) 486 gmove(&n3, &n2) 487 } 488 489 gc.Patch(p1, gc.Pc) 490 } 491 492 gins(a, &n1, &n2) 493 494 if oldcx.Op != 0 { 495 cx.Type = gc.Types[gc.TUINT64] 496 gmove(&oldcx, &cx) 497 gc.Regfree(&oldcx) 498 } 499 500 gmove(&n2, res) 501 502 gc.Regfree(&n1) 503 gc.Regfree(&n2) 504 } 505 506 /* 507 * generate byte multiply: 508 * res = nl * nr 509 * there is no 2-operand byte multiply instruction so 510 * we do a full-width multiplication and truncate afterwards. 511 */ 512 func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool { 513 if optoas(op, nl.Type) != x86.AIMULB { 514 return false 515 } 516 517 // largest ullman on left. 518 if nl.Ullman < nr.Ullman { 519 nl, nr = nr, nl 520 } 521 522 // generate operands in "8-bit" registers. 523 var n1b gc.Node 524 gc.Regalloc(&n1b, nl.Type, res) 525 526 gc.Cgen(nl, &n1b) 527 var n2b gc.Node 528 gc.Regalloc(&n2b, nr.Type, nil) 529 gc.Cgen(nr, &n2b) 530 531 // perform full-width multiplication. 532 t := gc.Types[gc.TUINT64] 533 534 if nl.Type.IsSigned() { 535 t = gc.Types[gc.TINT64] 536 } 537 var n1 gc.Node 538 gc.Nodreg(&n1, t, int(n1b.Reg)) 539 var n2 gc.Node 540 gc.Nodreg(&n2, t, int(n2b.Reg)) 541 a := optoas(op, t) 542 gins(a, &n2, &n1) 543 544 // truncate. 545 gmove(&n1, res) 546 547 gc.Regfree(&n1b) 548 gc.Regfree(&n2b) 549 return true 550 } 551 552 func clearfat(nl *gc.Node) { 553 /* clear a fat object */ 554 if gc.Debug['g'] != 0 { 555 gc.Dump("\nclearfat", nl) 556 } 557 558 // Avoid taking the address for simple enough types. 559 if gc.Componentgen(nil, nl) { 560 return 561 } 562 563 w := nl.Type.Width 564 565 if w > 1024 || (w >= 64 && (gc.Nacl || isPlan9)) { 566 var oldn1 gc.Node 567 var n1 gc.Node 568 savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) 569 gc.Agen(nl, &n1) 570 571 var ax gc.Node 572 var oldax gc.Node 573 savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr]) 574 gconreg(x86.AMOVL, 0, x86.REG_AX) 575 gconreg(movptr, w/8, x86.REG_CX) 576 577 gins(x86.AREP, nil, nil) // repeat 578 gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+ 579 580 if w%8 != 0 { 581 n1.Op = gc.OINDREG 582 clearfat_tail(&n1, w%8) 583 } 584 585 restx(&n1, &oldn1) 586 restx(&ax, &oldax) 587 return 588 } 589 590 if w >= 64 { 591 var oldn1 gc.Node 592 var n1 gc.Node 593 savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) 594 gc.Agen(nl, &n1) 595 596 var vec_zero gc.Node 597 var old_x0 gc.Node 598 savex(x86.REG_X0, &vec_zero, &old_x0, nil, gc.Types[gc.TFLOAT64]) 599 gins(x86.AXORPS, &vec_zero, &vec_zero) 600 601 if di := dzDI(w); di != 0 { 602 gconreg(addptr, di, x86.REG_DI) 603 } 604 p := gins(obj.ADUFFZERO, nil, nil) 605 p.To.Type = obj.TYPE_ADDR 606 p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) 607 p.To.Offset = dzOff(w) 608 609 if w%16 != 0 { 610 n1.Op = gc.OINDREG 611 n1.Xoffset -= 16 - w%16 612 gins(x86.AMOVUPS, &vec_zero, &n1) 613 } 614 615 restx(&vec_zero, &old_x0) 616 restx(&n1, &oldn1) 617 return 618 } 619 620 // NOTE: Must use agen, not igen, so that optimizer sees address 621 // being taken. We are not writing on field boundaries. 622 var n1 gc.Node 623 gc.Agenr(nl, &n1, nil) 624 n1.Op = gc.OINDREG 625 626 clearfat_tail(&n1, w) 627 628 gc.Regfree(&n1) 629 } 630 631 func clearfat_tail(n1 *gc.Node, b int64) { 632 if b >= 16 && isPlan9 { 633 var z gc.Node 634 gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) 635 q := b / 8 636 for ; q > 0; q-- { 637 n1.Type = z.Type 638 gins(x86.AMOVQ, &z, n1) 639 n1.Xoffset += 8 640 b -= 8 641 } 642 if b != 0 { 643 n1.Xoffset -= 8 - b 644 gins(x86.AMOVQ, &z, n1) 645 } 646 return 647 } 648 if b >= 16 { 649 var vec_zero gc.Node 650 gc.Regalloc(&vec_zero, gc.Types[gc.TFLOAT64], nil) 651 gins(x86.AXORPS, &vec_zero, &vec_zero) 652 653 for b >= 16 { 654 gins(x86.AMOVUPS, &vec_zero, n1) 655 n1.Xoffset += 16 656 b -= 16 657 } 658 659 // MOVUPS X0, off(base) is a few bytes shorter than MOV 0, off(base) 660 if b != 0 { 661 n1.Xoffset -= 16 - b 662 gins(x86.AMOVUPS, &vec_zero, n1) 663 } 664 665 gc.Regfree(&vec_zero) 666 return 667 } 668 669 // Write sequence of MOV 0, off(base) instead of using STOSQ. 670 // The hope is that although the code will be slightly longer, 671 // the MOVs will have no dependencies and pipeline better 672 // than the unrolled STOSQ loop. 673 var z gc.Node 674 gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) 675 if b >= 8 { 676 n1.Type = z.Type 677 gins(x86.AMOVQ, &z, n1) 678 n1.Xoffset += 8 679 b -= 8 680 681 if b != 0 { 682 n1.Xoffset -= 8 - b 683 gins(x86.AMOVQ, &z, n1) 684 } 685 return 686 } 687 688 if b >= 4 { 689 gc.Nodconst(&z, gc.Types[gc.TUINT32], 0) 690 n1.Type = z.Type 691 gins(x86.AMOVL, &z, n1) 692 n1.Xoffset += 4 693 b -= 4 694 695 if b != 0 { 696 n1.Xoffset -= 4 - b 697 gins(x86.AMOVL, &z, n1) 698 } 699 return 700 } 701 702 if b >= 2 { 703 gc.Nodconst(&z, gc.Types[gc.TUINT16], 0) 704 n1.Type = z.Type 705 gins(x86.AMOVW, &z, n1) 706 n1.Xoffset += 2 707 b -= 2 708 } 709 710 gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) 711 for b > 0 { 712 n1.Type = z.Type 713 gins(x86.AMOVB, &z, n1) 714 n1.Xoffset++ 715 b-- 716 } 717 718 } 719 720 // Called after regopt and peep have run. 721 // Expand CHECKNIL pseudo-op into actual nil pointer check. 722 func expandchecks(firstp *obj.Prog) { 723 var p1 *obj.Prog 724 var p2 *obj.Prog 725 726 for p := firstp; p != nil; p = p.Link { 727 if p.As != obj.ACHECKNIL { 728 continue 729 } 730 if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers 731 gc.Warnl(p.Lineno, "generated nil check") 732 } 733 734 // check is 735 // CMP arg, $0 736 // JNE 2(PC) (likely) 737 // MOV AX, 0 738 p1 = gc.Ctxt.NewProg() 739 740 p2 = gc.Ctxt.NewProg() 741 gc.Clearp(p1) 742 gc.Clearp(p2) 743 p1.Link = p2 744 p2.Link = p.Link 745 p.Link = p1 746 p1.Lineno = p.Lineno 747 p2.Lineno = p.Lineno 748 p1.Pc = 9999 749 p2.Pc = 9999 750 p.As = cmpptr 751 p.To.Type = obj.TYPE_CONST 752 p.To.Offset = 0 753 p1.As = x86.AJNE 754 p1.From.Type = obj.TYPE_CONST 755 p1.From.Offset = 1 // likely 756 p1.To.Type = obj.TYPE_BRANCH 757 p1.To.Val = p2.Link 758 759 // crash by write to memory address 0. 760 // if possible, since we know arg is 0, use 0(arg), 761 // which will be shorter to encode than plain 0. 762 p2.As = x86.AMOVL 763 764 p2.From.Type = obj.TYPE_REG 765 p2.From.Reg = x86.REG_AX 766 if regtyp(&p.From) { 767 p2.To.Type = obj.TYPE_MEM 768 p2.To.Reg = p.From.Reg 769 } else { 770 p2.To.Type = obj.TYPE_MEM 771 p2.To.Reg = x86.REG_NONE 772 } 773 774 p2.To.Offset = 0 775 } 776 } 777 778 // addr += index*width if possible. 779 func addindex(index *gc.Node, width int64, addr *gc.Node) bool { 780 switch width { 781 case 1, 2, 4, 8: 782 p1 := gins(x86.ALEAQ, index, addr) 783 p1.From.Type = obj.TYPE_MEM 784 p1.From.Scale = int16(width) 785 p1.From.Index = p1.From.Reg 786 p1.From.Reg = p1.To.Reg 787 return true 788 } 789 return false 790 } 791 792 // res = runtime.getg() 793 func getg(res *gc.Node) { 794 var n1 gc.Node 795 gc.Regalloc(&n1, res.Type, res) 796 mov := optoas(gc.OAS, gc.Types[gc.Tptr]) 797 p := gins(mov, nil, &n1) 798 p.From.Type = obj.TYPE_REG 799 p.From.Reg = x86.REG_TLS 800 p = gins(mov, nil, &n1) 801 p.From = p.To 802 p.From.Type = obj.TYPE_MEM 803 p.From.Index = x86.REG_TLS 804 p.From.Scale = 1 805 gmove(&n1, res) 806 gc.Regfree(&n1) 807 }