github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go (about) 1 package amd64 2 3 import ( 4 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" 5 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" 6 ) 7 8 // PostRegAlloc implements backend.Machine. 9 func (m *machine) PostRegAlloc() { 10 m.setupPrologue() 11 m.postRegAlloc() 12 } 13 14 func (m *machine) setupPrologue() { 15 cur := m.ectx.RootInstr 16 prevInitInst := cur.next 17 18 // At this point, we have the stack layout as follows: 19 // 20 // (high address) 21 // +-----------------+ <----- RBP (somewhere in the middle of the stack) 22 // | ....... | 23 // | ret Y | 24 // | ....... | 25 // | ret 0 | 26 // | arg X | 27 // | ....... | 28 // | arg 1 | 29 // | arg 0 | 30 // | Return Addr | 31 // RSP ----> +-----------------+ 32 // (low address) 33 34 // First, we push the RBP, and update the RBP to the current RSP. 35 // 36 // (high address) (high address) 37 // RBP ----> +-----------------+ +-----------------+ 38 // | ....... | | ....... | 39 // | ret Y | | ret Y | 40 // | ....... | | ....... | 41 // | ret 0 | | ret 0 | 42 // | arg X | | arg X | 43 // | ....... | ====> | ....... | 44 // | arg 1 | | arg 1 | 45 // | arg 0 | | arg 0 | 46 // | Return Addr | | Return Addr | 47 // RSP ----> +-----------------+ | Caller_RBP | 48 // (low address) +-----------------+ <----- RSP, RBP 49 // 50 cur = m.setupRBPRSP(cur) 51 52 if !m.stackBoundsCheckDisabled { 53 cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur) 54 } 55 56 // 57 // (high address) 58 // +-----------------+ +-----------------+ 59 // | ....... | | ....... | 60 // | ret Y | | ret Y | 61 // | ....... | | ....... | 62 // | ret 0 | | ret 0 | 63 // | arg X | | arg X | 64 // | ....... | | ....... | 65 // | arg 1 | | arg 1 | 66 // | arg 0 | | arg 0 | 67 // | xxxxx | | xxxxx | 68 // | Return Addr | | Return Addr | 69 // | Caller_RBP | ====> | Caller_RBP | 70 // RBP,RSP->+-----------------+ +-----------------+ <----- RBP 71 // (low address) | clobbered M | 72 // | clobbered 1 | 73 // | ........... | 74 // | clobbered 0 | 75 // +-----------------+ <----- RSP 76 // 77 if regs := m.clobberedRegs; len(regs) > 0 { 78 for i := range regs { 79 r := regs[len(regs)-1-i] // Reverse order. 80 if r.RegType() == regalloc.RegTypeInt { 81 cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r))) 82 } else { 83 // Push the XMM register is not supported by the PUSH instruction. 84 cur = m.addRSP(-16, cur) 85 push := m.allocateInstr().asXmmMovRM( 86 sseOpcodeMovdqu, r, newOperandMem(m.newAmodeImmReg(0, rspVReg)), 87 ) 88 cur = linkInstr(cur, push) 89 } 90 } 91 } 92 93 if size := m.spillSlotSize; size > 0 { 94 // Simply decrease the RSP to allocate the spill slots. 95 // sub $size, %rsp 96 cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(size)), rspVReg, true)) 97 98 // At this point, we have the stack layout as follows: 99 // 100 // (high address) 101 // +-----------------+ 102 // | ....... | 103 // | ret Y | 104 // | ....... | 105 // | ret 0 | 106 // | arg X | 107 // | ....... | 108 // | arg 1 | 109 // | arg 0 | 110 // | ReturnAddress | 111 // | Caller_RBP | 112 // +-----------------+ <--- RBP 113 // | clobbered M | 114 // | ............ | 115 // | clobbered 1 | 116 // | clobbered 0 | 117 // | spill slot N | 118 // | ............ | 119 // | spill slot 0 | 120 // +-----------------+ <--- RSP 121 // (low address) 122 } 123 124 linkInstr(cur, prevInitInst) 125 } 126 127 // postRegAlloc does multiple things while walking through the instructions: 128 // 1. Inserts the epilogue code. 129 // 2. Removes the redundant copy instruction. 130 // 3. Inserts the dec/inc RSP instruction right before/after the call instruction. 131 // 4. Lowering that is supposed to be done after regalloc. 132 func (m *machine) postRegAlloc() { 133 ectx := m.ectx 134 for cur := ectx.RootInstr; cur != nil; cur = cur.next { 135 switch k := cur.kind; k { 136 case ret: 137 m.setupEpilogueAfter(cur.prev) 138 continue 139 case fcvtToSintSequence, fcvtToUintSequence: 140 m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] 141 if k == fcvtToSintSequence { 142 m.lowerFcvtToSintSequenceAfterRegalloc(cur) 143 } else { 144 m.lowerFcvtToUintSequenceAfterRegalloc(cur) 145 } 146 prev := cur.prev 147 next := cur.next 148 cur := prev 149 for _, instr := range m.ectx.PendingInstructions { 150 cur = linkInstr(cur, instr) 151 } 152 linkInstr(cur, next) 153 continue 154 case xmmCMov: 155 m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] 156 m.lowerXmmCmovAfterRegAlloc(cur) 157 prev := cur.prev 158 next := cur.next 159 cur := prev 160 for _, instr := range m.ectx.PendingInstructions { 161 cur = linkInstr(cur, instr) 162 } 163 linkInstr(cur, next) 164 continue 165 case idivRemSequence: 166 m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] 167 m.lowerIDivRemSequenceAfterRegAlloc(cur) 168 prev := cur.prev 169 next := cur.next 170 cur := prev 171 for _, instr := range m.ectx.PendingInstructions { 172 cur = linkInstr(cur, instr) 173 } 174 linkInstr(cur, next) 175 continue 176 case call, callIndirect: 177 // At this point, reg alloc is done, therefore we can safely insert dec/inc RPS instruction 178 // right before/after the call instruction. If this is done before reg alloc, the stack slot 179 // can point to the wrong location and therefore results in a wrong value. 180 call := cur 181 next := call.next 182 _, _, _, _, size := backend.ABIInfoFromUint64(call.u2) 183 if size > 0 { 184 dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true) 185 linkInstr(call.prev, dec) 186 linkInstr(dec, call) 187 inc := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(size), rspVReg, true) 188 linkInstr(call, inc) 189 linkInstr(inc, next) 190 } 191 continue 192 } 193 194 // Removes the redundant copy instruction. 195 if cur.IsCopy() && cur.op1.reg().RealReg() == cur.op2.reg().RealReg() { 196 prev, next := cur.prev, cur.next 197 // Remove the copy instruction. 198 prev.next = next 199 if next != nil { 200 next.prev = prev 201 } 202 } 203 } 204 } 205 206 func (m *machine) setupEpilogueAfter(cur *instruction) { 207 prevNext := cur.next 208 209 // At this point, we have the stack layout as follows: 210 // 211 // (high address) 212 // +-----------------+ 213 // | ....... | 214 // | ret Y | 215 // | ....... | 216 // | ret 0 | 217 // | arg X | 218 // | ....... | 219 // | arg 1 | 220 // | arg 0 | 221 // | ReturnAddress | 222 // | Caller_RBP | 223 // +-----------------+ <--- RBP 224 // | clobbered M | 225 // | ............ | 226 // | clobbered 1 | 227 // | clobbered 0 | 228 // | spill slot N | 229 // | ............ | 230 // | spill slot 0 | 231 // +-----------------+ <--- RSP 232 // (low address) 233 234 if size := m.spillSlotSize; size > 0 { 235 // Simply increase the RSP to free the spill slots. 236 // add $size, %rsp 237 cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(uint32(size)), rspVReg, true)) 238 } 239 240 // 241 // (high address) 242 // +-----------------+ +-----------------+ 243 // | ....... | | ....... | 244 // | ret Y | | ret Y | 245 // | ....... | | ....... | 246 // | ret 0 | | ret 0 | 247 // | arg X | | arg X | 248 // | ....... | | ....... | 249 // | arg 1 | | arg 1 | 250 // | arg 0 | | arg 0 | 251 // | ReturnAddress | | ReturnAddress | 252 // | Caller_RBP | | Caller_RBP | 253 // RBP ---> +-----------------+ ========> +-----------------+ <---- RSP, RBP 254 // | clobbered M | 255 // | ............ | 256 // | clobbered 1 | 257 // | clobbered 0 | 258 // RSP ---> +-----------------+ 259 // (low address) 260 // 261 if regs := m.clobberedRegs; len(regs) > 0 { 262 for _, r := range regs { 263 if r.RegType() == regalloc.RegTypeInt { 264 cur = linkInstr(cur, m.allocateInstr().asPop64(r)) 265 } else { 266 // Pop the XMM register is not supported by the POP instruction. 267 pop := m.allocateInstr().asXmmUnaryRmR( 268 sseOpcodeMovdqu, newOperandMem(m.newAmodeImmReg(0, rspVReg)), r, 269 ) 270 cur = linkInstr(cur, pop) 271 cur = m.addRSP(16, cur) 272 } 273 } 274 } 275 276 // Now roll back the RSP to RBP, and pop the caller's RBP. 277 cur = m.revertRBPRSP(cur) 278 279 linkInstr(cur, prevNext) 280 } 281 282 func (m *machine) addRSP(offset int32, cur *instruction) *instruction { 283 if offset == 0 { 284 return cur 285 } 286 opcode := aluRmiROpcodeAdd 287 if offset < 0 { 288 opcode = aluRmiROpcodeSub 289 offset = -offset 290 } 291 return linkInstr(cur, m.allocateInstr().asAluRmiR(opcode, newOperandImm32(uint32(offset)), rspVReg, true)) 292 } 293 294 func (m *machine) setupRBPRSP(cur *instruction) *instruction { 295 cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(rbpVReg))) 296 cur = linkInstr(cur, m.allocateInstr().asMovRR(rspVReg, rbpVReg, true)) 297 return cur 298 } 299 300 func (m *machine) revertRBPRSP(cur *instruction) *instruction { 301 cur = linkInstr(cur, m.allocateInstr().asMovRR(rbpVReg, rspVReg, true)) 302 cur = linkInstr(cur, m.allocateInstr().asPop64(rbpVReg)) 303 return cur 304 }