github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/amd64/ggen.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 9 "github.com/bir3/gocompiler/src/cmd/compile/internal/ir" 10 "github.com/bir3/gocompiler/src/cmd/compile/internal/objw" 11 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 12 "github.com/bir3/gocompiler/src/cmd/internal/obj" 13 "github.com/bir3/gocompiler/src/cmd/internal/obj/x86" 14 "github.com/bir3/gocompiler/src/internal/buildcfg" 15 ) 16 17 // no floating point in note handlers on Plan 9 18 var isPlan9 = buildcfg.GOOS == "plan9" 19 20 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ, 21 // See runtime/mkduff.go. 22 const ( 23 dzBlocks = 16 // number of MOV/ADD blocks 24 dzBlockLen = 4 // number of clears per block 25 dzBlockSize = 23 // size of instructions in a single block 26 dzMovSize = 5 // size of single MOV instruction w/ offset 27 dzLeaqSize = 4 // size of single LEAQ instruction 28 dzClearStep = 16 // number of bytes cleared by each MOV instruction 29 30 dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block 31 dzSize = dzBlocks * dzBlockSize 32 ) 33 34 // dzOff returns the offset for a jump into DUFFZERO. 35 // b is the number of bytes to zero. 36 func dzOff(b int64) int64 { 37 off := int64(dzSize) 38 off -= b / dzClearLen * dzBlockSize 39 tailLen := b % dzClearLen 40 if tailLen >= dzClearStep { 41 off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep) 42 } 43 return off 44 } 45 46 // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO. 47 // b is the number of bytes to zero. 48 func dzDI(b int64) int64 { 49 tailLen := b % dzClearLen 50 if tailLen < dzClearStep { 51 return 0 52 } 53 tailSteps := tailLen / dzClearStep 54 return -dzClearStep * (dzBlockLen - tailSteps) 55 } 56 57 func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog { 58 const ( 59 r13 = 1 << iota // if R13 is already zeroed. 60 ) 61 62 if cnt == 0 { 63 return p 64 } 65 66 if cnt%int64(types.RegSize) != 0 { 67 // should only happen with nacl 68 if cnt%int64(types.PtrSize) != 0 { 69 base.Fatalf("zerorange count not a multiple of widthptr %d", cnt) 70 } 71 if *state&r13 == 0 { 72 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0) 73 *state |= r13 74 } 75 p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off) 76 off += int64(types.PtrSize) 77 cnt -= int64(types.PtrSize) 78 } 79 80 if cnt == 8 { 81 if *state&r13 == 0 { 82 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0) 83 *state |= r13 84 } 85 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off) 86 } else if !isPlan9 && cnt <= int64(8*types.RegSize) { 87 for i := int64(0); i < cnt/16; i++ { 88 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16) 89 } 90 91 if cnt%16 != 0 { 92 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16)) 93 } 94 } else if !isPlan9 && (cnt <= int64(128*types.RegSize)) { 95 // Save DI to r12. With the amd64 Go register abi, DI can contain 96 // an incoming parameter, whereas R12 is always scratch. 97 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0) 98 // Emit duffzero call 99 p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0) 100 p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt)) 101 p.To.Sym = ir.Syms.Duffzero 102 if cnt%16 != 0 { 103 p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8)) 104 } 105 // Restore DI from r12 106 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) 107 108 } else { 109 // When the register ABI is in effect, at this point in the 110 // prolog we may have live values in all of RAX,RDI,RCX. Save 111 // them off to registers before the REPSTOSQ below, then 112 // restore. Note that R12 and R13 are always available as 113 // scratch regs; here we also use R15 (this is safe to do 114 // since there won't be any globals accessed in the prolog). 115 // See rewriteToUseGot() in obj6.go for more on r15 use. 116 117 // Save rax/rdi/rcx 118 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0) 119 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0) 120 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0) 121 122 // Set up the REPSTOSQ and kick it off. 123 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) 124 p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0) 125 p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0) 126 p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) 127 p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) 128 129 // Restore rax/rdi/rcx 130 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) 131 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0) 132 p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0) 133 134 // Record the fact that r13 is no longer zero. 135 *state &= ^uint32(r13) 136 } 137 138 return p 139 } 140 141 func ginsnop(pp *objw.Progs) *obj.Prog { 142 // This is a hardware nop (1-byte 0x90) instruction, 143 // even though we describe it as an explicit XCHGL here. 144 // Particularly, this does not zero the high 32 bits 145 // like typical *L opcodes. 146 // (gas assembles "xchg %eax,%eax" to 0x87 0xc0, which 147 // does zero the high 32 bits.) 148 p := pp.Prog(x86.AXCHGL) 149 p.From.Type = obj.TYPE_REG 150 p.From.Reg = x86.REG_AX 151 p.To.Type = obj.TYPE_REG 152 p.To.Reg = x86.REG_AX 153 return p 154 }