github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go (about) 1 package amd64 2 3 import ( 4 "fmt" 5 6 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" 7 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" 8 "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" 9 "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" 10 ) 11 12 func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) { 13 switch kind := i.kind; kind { 14 case nop0, sourceOffsetInfo, defineUninitializedReg, fcvtToSintSequence, fcvtToUintSequence, nopUseReg: 15 case ret: 16 encodeRet(c) 17 case imm: 18 dst := regEncodings[i.op2.reg().RealReg()] 19 con := i.u1 20 if i.b1 { // 64 bit. 21 if lower32willSignExtendTo64(con) { 22 // Sign extend mov(imm32). 23 encodeRegReg(c, 24 legacyPrefixesNone, 25 0xc7, 1, 26 0, 27 dst, 28 rexInfo(0).setW(), 29 ) 30 c.Emit4Bytes(uint32(con)) 31 } else { 32 c.EmitByte(rexEncodingW | dst.rexBit()) 33 c.EmitByte(0xb8 | dst.encoding()) 34 c.Emit8Bytes(con) 35 } 36 } else { 37 if dst.rexBit() > 0 { 38 c.EmitByte(rexEncodingDefault | 0x1) 39 } 40 c.EmitByte(0xb8 | dst.encoding()) 41 c.Emit4Bytes(uint32(con)) 42 } 43 44 case aluRmiR: 45 var rex rexInfo 46 if i.b1 { 47 rex = rex.setW() 48 } else { 49 rex = rex.clearW() 50 } 51 52 dst := regEncodings[i.op2.reg().RealReg()] 53 54 aluOp := aluRmiROpcode(i.u1) 55 if aluOp == aluRmiROpcodeMul { 56 op1 := i.op1 57 const regMemOpc, regMemOpcNum = 0x0FAF, 2 58 switch op1.kind { 59 case operandKindReg: 60 src := regEncodings[op1.reg().RealReg()] 61 encodeRegReg(c, legacyPrefixesNone, regMemOpc, regMemOpcNum, dst, src, rex) 62 case operandKindMem: 63 m := i.op1.addressMode() 64 encodeRegMem(c, legacyPrefixesNone, regMemOpc, regMemOpcNum, dst, m, rex) 65 case operandKindImm32: 66 imm8 := lower8willSignExtendTo32(op1.imm32()) 67 var opc uint32 68 if imm8 { 69 opc = 0x6b 70 } else { 71 opc = 0x69 72 } 73 encodeRegReg(c, legacyPrefixesNone, opc, 1, dst, dst, rex) 74 if imm8 { 75 c.EmitByte(byte(op1.imm32())) 76 } else { 77 c.Emit4Bytes(op1.imm32()) 78 } 79 default: 80 panic("BUG: invalid operand kind") 81 } 82 } else { 83 const opcodeNum = 1 84 var opcR, opcM, subOpcImm uint32 85 switch aluOp { 86 case aluRmiROpcodeAdd: 87 opcR, opcM, subOpcImm = 0x01, 0x03, 0x0 88 case aluRmiROpcodeSub: 89 opcR, opcM, subOpcImm = 0x29, 0x2b, 0x5 90 case aluRmiROpcodeAnd: 91 opcR, opcM, subOpcImm = 0x21, 0x23, 0x4 92 case aluRmiROpcodeOr: 93 opcR, opcM, subOpcImm = 0x09, 0x0b, 0x1 94 case aluRmiROpcodeXor: 95 opcR, opcM, subOpcImm = 0x31, 0x33, 0x6 96 default: 97 panic("BUG: invalid aluRmiROpcode") 98 } 99 100 op1 := i.op1 101 switch op1.kind { 102 case operandKindReg: 103 src := regEncodings[op1.reg().RealReg()] 104 encodeRegReg(c, legacyPrefixesNone, opcR, opcodeNum, src, dst, rex) 105 case operandKindMem: 106 m := i.op1.addressMode() 107 encodeRegMem(c, legacyPrefixesNone, opcM, opcodeNum, dst, m, rex) 108 case operandKindImm32: 109 imm8 := lower8willSignExtendTo32(op1.imm32()) 110 var opc uint32 111 if imm8 { 112 opc = 0x83 113 } else { 114 opc = 0x81 115 } 116 encodeRegReg(c, legacyPrefixesNone, opc, opcodeNum, regEnc(subOpcImm), dst, rex) 117 if imm8 { 118 c.EmitByte(byte(op1.imm32())) 119 } else { 120 c.Emit4Bytes(op1.imm32()) 121 } 122 default: 123 panic("BUG: invalid operand kind") 124 } 125 } 126 127 case movRR: 128 src := regEncodings[i.op1.reg().RealReg()] 129 dst := regEncodings[i.op2.reg().RealReg()] 130 var rex rexInfo 131 if i.b1 { 132 rex = rex.setW() 133 } else { 134 rex = rex.clearW() 135 } 136 encodeRegReg(c, legacyPrefixesNone, 0x89, 1, src, dst, rex) 137 138 case xmmRmR, blendvpd: 139 op := sseOpcode(i.u1) 140 var legPrex legacyPrefixes 141 var opcode uint32 142 var opcodeNum uint32 143 switch op { 144 case sseOpcodeAddps: 145 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F58, 2 146 case sseOpcodeAddpd: 147 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F58, 2 148 case sseOpcodeAddss: 149 legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F58, 2 150 case sseOpcodeAddsd: 151 legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F58, 2 152 case sseOpcodeAndps: 153 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F54, 2 154 case sseOpcodeAndpd: 155 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F54, 2 156 case sseOpcodeAndnps: 157 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F55, 2 158 case sseOpcodeAndnpd: 159 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F55, 2 160 case sseOpcodeBlendvps: 161 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3814, 3 162 case sseOpcodeBlendvpd: 163 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3815, 3 164 case sseOpcodeDivps: 165 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5E, 2 166 case sseOpcodeDivpd: 167 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5E, 2 168 case sseOpcodeDivss: 169 legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5E, 2 170 case sseOpcodeDivsd: 171 legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5E, 2 172 case sseOpcodeMaxps: 173 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5F, 2 174 case sseOpcodeMaxpd: 175 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5F, 2 176 case sseOpcodeMaxss: 177 legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5F, 2 178 case sseOpcodeMaxsd: 179 legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5F, 2 180 case sseOpcodeMinps: 181 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5D, 2 182 case sseOpcodeMinpd: 183 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5D, 2 184 case sseOpcodeMinss: 185 legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5D, 2 186 case sseOpcodeMinsd: 187 legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5D, 2 188 case sseOpcodeMovlhps: 189 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F16, 2 190 case sseOpcodeMovsd: 191 legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F10, 2 192 case sseOpcodeMulps: 193 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F59, 2 194 case sseOpcodeMulpd: 195 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F59, 2 196 case sseOpcodeMulss: 197 legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F59, 2 198 case sseOpcodeMulsd: 199 legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F59, 2 200 case sseOpcodeOrpd: 201 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F56, 2 202 case sseOpcodeOrps: 203 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F56, 2 204 case sseOpcodePackssdw: 205 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F6B, 2 206 case sseOpcodePacksswb: 207 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F63, 2 208 case sseOpcodePackusdw: 209 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F382B, 3 210 case sseOpcodePackuswb: 211 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F67, 2 212 case sseOpcodePaddb: 213 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFC, 2 214 case sseOpcodePaddd: 215 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFE, 2 216 case sseOpcodePaddq: 217 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD4, 2 218 case sseOpcodePaddw: 219 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFD, 2 220 case sseOpcodePaddsb: 221 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEC, 2 222 case sseOpcodePaddsw: 223 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FED, 2 224 case sseOpcodePaddusb: 225 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDC, 2 226 case sseOpcodePaddusw: 227 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDD, 2 228 case sseOpcodePand: 229 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDB, 2 230 case sseOpcodePandn: 231 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDF, 2 232 case sseOpcodePavgb: 233 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE0, 2 234 case sseOpcodePavgw: 235 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE3, 2 236 case sseOpcodePcmpeqb: 237 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F74, 2 238 case sseOpcodePcmpeqw: 239 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F75, 2 240 case sseOpcodePcmpeqd: 241 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F76, 2 242 case sseOpcodePcmpeqq: 243 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3829, 3 244 case sseOpcodePcmpgtb: 245 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F64, 2 246 case sseOpcodePcmpgtw: 247 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F65, 2 248 case sseOpcodePcmpgtd: 249 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F66, 2 250 case sseOpcodePcmpgtq: 251 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3837, 3 252 case sseOpcodePmaddwd: 253 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF5, 2 254 case sseOpcodePmaxsb: 255 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383C, 3 256 case sseOpcodePmaxsw: 257 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEE, 2 258 case sseOpcodePmaxsd: 259 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383D, 3 260 case sseOpcodePmaxub: 261 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDE, 2 262 case sseOpcodePmaxuw: 263 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383E, 3 264 case sseOpcodePmaxud: 265 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383F, 3 266 case sseOpcodePminsb: 267 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3838, 3 268 case sseOpcodePminsw: 269 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEA, 2 270 case sseOpcodePminsd: 271 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3839, 3 272 case sseOpcodePminub: 273 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDA, 2 274 case sseOpcodePminuw: 275 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383A, 3 276 case sseOpcodePminud: 277 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383B, 3 278 case sseOpcodePmulld: 279 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3840, 3 280 case sseOpcodePmullw: 281 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD5, 2 282 case sseOpcodePmuludq: 283 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF4, 2 284 case sseOpcodePor: 285 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEB, 2 286 case sseOpcodePshufb: 287 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3800, 3 288 case sseOpcodePsubb: 289 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF8, 2 290 case sseOpcodePsubd: 291 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFA, 2 292 case sseOpcodePsubq: 293 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFB, 2 294 case sseOpcodePsubw: 295 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF9, 2 296 case sseOpcodePsubsb: 297 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE8, 2 298 case sseOpcodePsubsw: 299 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE9, 2 300 case sseOpcodePsubusb: 301 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD8, 2 302 case sseOpcodePsubusw: 303 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD9, 2 304 case sseOpcodePunpckhbw: 305 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F68, 2 306 case sseOpcodePunpcklbw: 307 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F60, 2 308 case sseOpcodePxor: 309 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEF, 2 310 case sseOpcodeSubps: 311 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5C, 2 312 case sseOpcodeSubpd: 313 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5C, 2 314 case sseOpcodeSubss: 315 legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5C, 2 316 case sseOpcodeSubsd: 317 legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5C, 2 318 case sseOpcodeXorps: 319 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F57, 2 320 case sseOpcodeXorpd: 321 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F57, 2 322 case sseOpcodePmulhrsw: 323 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F380B, 3 324 case sseOpcodeUnpcklps: 325 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F14, 2 326 case sseOpcodePmaddubsw: 327 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3804, 3 328 default: 329 if kind == blendvpd { 330 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3815, 3 331 } else { 332 panic(fmt.Sprintf("Unsupported sseOpcode: %s", op)) 333 } 334 } 335 336 dst := regEncodings[i.op2.reg().RealReg()] 337 338 rex := rexInfo(0).clearW() 339 op1 := i.op1 340 if op1.kind == operandKindReg { 341 src := regEncodings[op1.reg().RealReg()] 342 encodeRegReg(c, legPrex, opcode, opcodeNum, dst, src, rex) 343 } else if i.op1.kind == operandKindMem { 344 m := i.op1.addressMode() 345 encodeRegMem(c, legPrex, opcode, opcodeNum, dst, m, rex) 346 } else { 347 panic("BUG: invalid operand kind") 348 } 349 350 case gprToXmm: 351 var legPrefix legacyPrefixes 352 var opcode uint32 353 const opcodeNum = 2 354 switch sseOpcode(i.u1) { 355 case sseOpcodeMovd, sseOpcodeMovq: 356 legPrefix, opcode = legacyPrefixes0x66, 0x0f6e 357 case sseOpcodeCvtsi2ss: 358 legPrefix, opcode = legacyPrefixes0xF3, 0x0f2a 359 case sseOpcodeCvtsi2sd: 360 legPrefix, opcode = legacyPrefixes0xF2, 0x0f2a 361 default: 362 panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1))) 363 } 364 365 var rex rexInfo 366 if i.b1 { 367 rex = rex.setW() 368 } else { 369 rex = rex.clearW() 370 } 371 dst := regEncodings[i.op2.reg().RealReg()] 372 373 op1 := i.op1 374 if op1.kind == operandKindReg { 375 src := regEncodings[op1.reg().RealReg()] 376 encodeRegReg(c, legPrefix, opcode, opcodeNum, dst, src, rex) 377 } else if i.op1.kind == operandKindMem { 378 m := i.op1.addressMode() 379 encodeRegMem(c, legPrefix, opcode, opcodeNum, dst, m, rex) 380 } else { 381 panic("BUG: invalid operand kind") 382 } 383 384 case xmmUnaryRmR: 385 var prefix legacyPrefixes 386 var opcode uint32 387 var opcodeNum uint32 388 op := sseOpcode(i.u1) 389 switch op { 390 case sseOpcodeCvtss2sd: 391 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5A, 2 392 case sseOpcodeCvtsd2ss: 393 prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5A, 2 394 case sseOpcodeMovaps: 395 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F28, 2 396 case sseOpcodeMovapd: 397 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F28, 2 398 case sseOpcodeMovdqa: 399 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F6F, 2 400 case sseOpcodeMovdqu: 401 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F6F, 2 402 case sseOpcodeMovsd: 403 prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F10, 2 404 case sseOpcodeMovss: 405 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F10, 2 406 case sseOpcodeMovups: 407 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F10, 2 408 case sseOpcodeMovupd: 409 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F10, 2 410 case sseOpcodePabsb: 411 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381C, 3 412 case sseOpcodePabsw: 413 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381D, 3 414 case sseOpcodePabsd: 415 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381E, 3 416 case sseOpcodePmovsxbd: 417 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3821, 3 418 case sseOpcodePmovsxbw: 419 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3820, 3 420 case sseOpcodePmovsxbq: 421 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3822, 3 422 case sseOpcodePmovsxwd: 423 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3823, 3 424 case sseOpcodePmovsxwq: 425 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3824, 3 426 case sseOpcodePmovsxdq: 427 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3825, 3 428 case sseOpcodePmovzxbd: 429 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3831, 3 430 case sseOpcodePmovzxbw: 431 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3830, 3 432 case sseOpcodePmovzxbq: 433 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3832, 3 434 case sseOpcodePmovzxwd: 435 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3833, 3 436 case sseOpcodePmovzxwq: 437 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3834, 3 438 case sseOpcodePmovzxdq: 439 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3835, 3 440 case sseOpcodeSqrtps: 441 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F51, 2 442 case sseOpcodeSqrtpd: 443 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F51, 2 444 case sseOpcodeSqrtss: 445 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F51, 2 446 case sseOpcodeSqrtsd: 447 prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F51, 2 448 case sseOpcodeXorps: 449 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F57, 2 450 case sseOpcodeXorpd: 451 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F57, 2 452 case sseOpcodeCvtdq2ps: 453 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F5B, 2 454 case sseOpcodeCvtdq2pd: 455 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0FE6, 2 456 case sseOpcodeCvtps2pd: 457 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F5A, 2 458 case sseOpcodeCvtpd2ps: 459 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5A, 2 460 case sseOpcodeCvttps2dq: 461 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5B, 2 462 case sseOpcodeCvttpd2dq: 463 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE6, 2 464 default: 465 panic(fmt.Sprintf("Unsupported sseOpcode: %s", op)) 466 } 467 468 dst := regEncodings[i.op2.reg().RealReg()] 469 470 rex := rexInfo(0).clearW() 471 op1 := i.op1 472 if op1.kind == operandKindReg { 473 src := regEncodings[op1.reg().RealReg()] 474 encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex) 475 } else if i.op1.kind == operandKindMem { 476 m := i.op1.addressMode() 477 needsLabelResolution = encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex) 478 } else { 479 panic("BUG: invalid operand kind") 480 } 481 482 case xmmUnaryRmRImm: 483 var prefix legacyPrefixes 484 var opcode uint32 485 var opcodeNum uint32 486 op := sseOpcode(i.u1) 487 switch op { 488 case sseOpcodeRoundps: 489 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a08, 3 490 case sseOpcodeRoundss: 491 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a0a, 3 492 case sseOpcodeRoundpd: 493 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a09, 3 494 case sseOpcodeRoundsd: 495 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a0b, 3 496 } 497 rex := rexInfo(0).clearW() 498 dst := regEncodings[i.op2.reg().RealReg()] 499 op1 := i.op1 500 if op1.kind == operandKindReg { 501 src := regEncodings[op1.reg().RealReg()] 502 encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex) 503 } else if i.op1.kind == operandKindMem { 504 m := i.op1.addressMode() 505 encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex) 506 } else { 507 panic("BUG: invalid operand kind") 508 } 509 510 c.EmitByte(byte(i.u2)) 511 512 case unaryRmR: 513 var prefix legacyPrefixes 514 var opcode uint32 515 var opcodeNum uint32 516 op := unaryRmROpcode(i.u1) 517 // We assume size is either 32 or 64. 518 switch op { 519 case unaryRmROpcodeBsr: 520 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0fbd, 2 521 case unaryRmROpcodeBsf: 522 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0fbc, 2 523 case unaryRmROpcodeLzcnt: 524 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fbd, 2 525 case unaryRmROpcodeTzcnt: 526 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fbc, 2 527 case unaryRmROpcodePopcnt: 528 prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fb8, 2 529 default: 530 panic(fmt.Sprintf("Unsupported unaryRmROpcode: %s", op)) 531 } 532 533 dst := regEncodings[i.op2.reg().RealReg()] 534 535 rex := rexInfo(0) 536 if i.b1 { // 64 bit. 537 rex = rexInfo(0).setW() 538 } else { 539 rex = rexInfo(0).clearW() 540 } 541 op1 := i.op1 542 if op1.kind == operandKindReg { 543 src := regEncodings[op1.reg().RealReg()] 544 encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex) 545 } else if i.op1.kind == operandKindMem { 546 m := i.op1.addressMode() 547 encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex) 548 } else { 549 panic("BUG: invalid operand kind") 550 } 551 552 case not: 553 var prefix legacyPrefixes 554 src := regEncodings[i.op1.reg().RealReg()] 555 rex := rexInfo(0) 556 if i.b1 { // 64 bit. 557 rex = rexInfo(0).setW() 558 } else { 559 rex = rexInfo(0).clearW() 560 } 561 subopcode := uint8(2) 562 encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex) 563 564 case neg: 565 var prefix legacyPrefixes 566 src := regEncodings[i.op1.reg().RealReg()] 567 rex := rexInfo(0) 568 if i.b1 { // 64 bit. 569 rex = rexInfo(0).setW() 570 } else { 571 rex = rexInfo(0).clearW() 572 } 573 subopcode := uint8(3) 574 encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex) 575 576 case div: 577 rex := rexInfo(0) 578 if i.b1 { // 64 bit. 579 rex = rexInfo(0).setW() 580 } else { 581 rex = rexInfo(0).clearW() 582 } 583 var subopcode uint8 584 if i.u1 != 0 { // Signed. 585 subopcode = 7 586 } else { 587 subopcode = 6 588 } 589 590 divisor := i.op1 591 if divisor.kind == operandKindReg { 592 src := regEncodings[divisor.reg().RealReg()] 593 encodeEncEnc(c, legacyPrefixesNone, 0xf7, 1, subopcode, uint8(src), rex) 594 } else if divisor.kind == operandKindMem { 595 m := divisor.addressMode() 596 encodeEncMem(c, legacyPrefixesNone, 0xf7, 1, subopcode, m, rex) 597 } else { 598 panic("BUG: invalid operand kind") 599 } 600 601 case mulHi: 602 var prefix legacyPrefixes 603 rex := rexInfo(0) 604 if i.b1 { // 64 bit. 605 rex = rexInfo(0).setW() 606 } else { 607 rex = rexInfo(0).clearW() 608 } 609 610 signed := i.u1 != 0 611 var subopcode uint8 612 if signed { 613 subopcode = 5 614 } else { 615 subopcode = 4 616 } 617 618 // src1 is implicitly rax, 619 // dst_lo is implicitly rax, 620 // dst_hi is implicitly rdx. 621 src2 := i.op1 622 if src2.kind == operandKindReg { 623 src := regEncodings[src2.reg().RealReg()] 624 encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex) 625 } else if src2.kind == operandKindMem { 626 m := src2.addressMode() 627 encodeEncMem(c, prefix, 0xf7, 1, subopcode, m, rex) 628 } else { 629 panic("BUG: invalid operand kind") 630 } 631 632 case signExtendData: 633 if i.b1 { // 64 bit. 634 c.EmitByte(0x48) 635 c.EmitByte(0x99) 636 } else { 637 c.EmitByte(0x99) 638 } 639 case movzxRmR, movsxRmR: 640 signed := i.kind == movsxRmR 641 642 ext := extMode(i.u1) 643 var opcode uint32 644 var opcodeNum uint32 645 var rex rexInfo 646 switch ext { 647 case extModeBL: 648 if signed { 649 opcode, opcodeNum, rex = 0x0fbe, 2, rex.clearW() 650 } else { 651 opcode, opcodeNum, rex = 0x0fb6, 2, rex.clearW() 652 } 653 case extModeBQ: 654 if signed { 655 opcode, opcodeNum, rex = 0x0fbe, 2, rex.setW() 656 } else { 657 opcode, opcodeNum, rex = 0x0fb6, 2, rex.setW() 658 } 659 case extModeWL: 660 if signed { 661 opcode, opcodeNum, rex = 0x0fbf, 2, rex.clearW() 662 } else { 663 opcode, opcodeNum, rex = 0x0fb7, 2, rex.clearW() 664 } 665 case extModeWQ: 666 if signed { 667 opcode, opcodeNum, rex = 0x0fbf, 2, rex.setW() 668 } else { 669 opcode, opcodeNum, rex = 0x0fb7, 2, rex.setW() 670 } 671 case extModeLQ: 672 if signed { 673 opcode, opcodeNum, rex = 0x63, 1, rex.setW() 674 } else { 675 opcode, opcodeNum, rex = 0x8b, 1, rex.clearW() 676 } 677 default: 678 panic("BUG: invalid extMode") 679 } 680 681 op := i.op1 682 dst := regEncodings[i.op2.reg().RealReg()] 683 switch op.kind { 684 case operandKindReg: 685 src := regEncodings[op.reg().RealReg()] 686 if ext == extModeBL || ext == extModeBQ { 687 // Some destinations must be encoded with REX.R = 1. 688 if e := src.encoding(); e >= 4 && e <= 7 { 689 rex = rex.always() 690 } 691 } 692 encodeRegReg(c, legacyPrefixesNone, opcode, opcodeNum, dst, src, rex) 693 case operandKindMem: 694 m := op.addressMode() 695 encodeRegMem(c, legacyPrefixesNone, opcode, opcodeNum, dst, m, rex) 696 default: 697 panic("BUG: invalid operand kind") 698 } 699 700 case mov64MR: 701 m := i.op1.addressMode() 702 encodeLoad64(c, m, i.op2.reg().RealReg()) 703 704 case lea: 705 needsLabelResolution = true 706 dst := regEncodings[i.op2.reg().RealReg()] 707 rex := rexInfo(0).setW() 708 const opcode, opcodeNum = 0x8d, 1 709 switch i.op1.kind { 710 case operandKindMem: 711 a := i.op1.addressMode() 712 encodeRegMem(c, legacyPrefixesNone, opcode, opcodeNum, dst, a, rex) 713 case operandKindLabel: 714 rex.encode(c, regRexBit(byte(dst)), 0) 715 c.EmitByte(byte((opcode) & 0xff)) 716 717 // Indicate "LEAQ [RIP + 32bit displacement]. 718 // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing 719 c.EmitByte(encodeModRM(0b00, dst.encoding(), 0b101)) 720 721 // This will be resolved later, so we just emit a placeholder (0xffffffff for testing). 722 c.Emit4Bytes(0xffffffff) 723 default: 724 panic("BUG: invalid operand kind") 725 } 726 727 case movRM: 728 m := i.op2.addressMode() 729 src := regEncodings[i.op1.reg().RealReg()] 730 731 var rex rexInfo 732 switch i.u1 { 733 case 1: 734 if e := src.encoding(); e >= 4 && e <= 7 { 735 rex = rex.always() 736 } 737 encodeRegMem(c, legacyPrefixesNone, 0x88, 1, src, m, rex.clearW()) 738 case 2: 739 encodeRegMem(c, legacyPrefixes0x66, 0x89, 1, src, m, rex.clearW()) 740 case 4: 741 encodeRegMem(c, legacyPrefixesNone, 0x89, 1, src, m, rex.clearW()) 742 case 8: 743 encodeRegMem(c, legacyPrefixesNone, 0x89, 1, src, m, rex.setW()) 744 default: 745 panic(fmt.Sprintf("BUG: invalid size %d: %s", i.u1, i.String())) 746 } 747 748 case shiftR: 749 src := regEncodings[i.op2.reg().RealReg()] 750 amount := i.op1 751 752 var opcode uint32 753 var prefix legacyPrefixes 754 rex := rexInfo(0) 755 if i.b1 { // 64 bit. 756 rex = rexInfo(0).setW() 757 } else { 758 rex = rexInfo(0).clearW() 759 } 760 761 switch amount.kind { 762 case operandKindReg: 763 if amount.reg() != rcxVReg { 764 panic("BUG: invalid reg operand: must be rcx") 765 } 766 opcode, prefix = 0xd3, legacyPrefixesNone 767 encodeEncEnc(c, prefix, opcode, 1, uint8(i.u1), uint8(src), rex) 768 case operandKindImm32: 769 opcode, prefix = 0xc1, legacyPrefixesNone 770 encodeEncEnc(c, prefix, opcode, 1, uint8(i.u1), uint8(src), rex) 771 c.EmitByte(byte(amount.imm32())) 772 default: 773 panic("BUG: invalid operand kind") 774 } 775 case xmmRmiReg: 776 const legPrefix = legacyPrefixes0x66 777 rex := rexInfo(0).clearW() 778 dst := regEncodings[i.op2.reg().RealReg()] 779 780 var opcode uint32 781 var regDigit uint8 782 783 op := sseOpcode(i.u1) 784 op1 := i.op1 785 if i.op1.kind == operandKindImm32 { 786 switch op { 787 case sseOpcodePsllw: 788 opcode, regDigit = 0x0f71, 6 789 case sseOpcodePslld: 790 opcode, regDigit = 0x0f72, 6 791 case sseOpcodePsllq: 792 opcode, regDigit = 0x0f73, 6 793 case sseOpcodePsraw: 794 opcode, regDigit = 0x0f71, 4 795 case sseOpcodePsrad: 796 opcode, regDigit = 0x0f72, 4 797 case sseOpcodePsrlw: 798 opcode, regDigit = 0x0f71, 2 799 case sseOpcodePsrld: 800 opcode, regDigit = 0x0f72, 2 801 case sseOpcodePsrlq: 802 opcode, regDigit = 0x0f73, 2 803 default: 804 panic("invalid opcode") 805 } 806 807 encodeEncEnc(c, legPrefix, opcode, 2, regDigit, uint8(dst), rex) 808 imm32 := op1.imm32() 809 if imm32 > 0xff&imm32 { 810 panic("immediate value does not fit 1 byte") 811 } 812 c.EmitByte(uint8(imm32)) 813 } else { 814 switch op { 815 case sseOpcodePsllw: 816 opcode = 0x0ff1 817 case sseOpcodePslld: 818 opcode = 0x0ff2 819 case sseOpcodePsllq: 820 opcode = 0x0ff3 821 case sseOpcodePsraw: 822 opcode = 0x0fe1 823 case sseOpcodePsrad: 824 opcode = 0x0fe2 825 case sseOpcodePsrlw: 826 opcode = 0x0fd1 827 case sseOpcodePsrld: 828 opcode = 0x0fd2 829 case sseOpcodePsrlq: 830 opcode = 0x0fd3 831 default: 832 panic("invalid opcode") 833 } 834 835 if op1.kind == operandKindReg { 836 reg := regEncodings[op1.reg().RealReg()] 837 encodeRegReg(c, legPrefix, opcode, 2, dst, reg, rex) 838 } else if op1.kind == operandKindMem { 839 m := op1.addressMode() 840 encodeRegMem(c, legPrefix, opcode, 2, dst, m, rex) 841 } else { 842 panic("BUG: invalid operand kind") 843 } 844 } 845 846 case cmpRmiR: 847 var opcode uint32 848 isCmp := i.u1 != 0 849 rex := rexInfo(0) 850 _64 := i.b1 851 if _64 { // 64 bit. 852 rex = rex.setW() 853 } else { 854 rex = rex.clearW() 855 } 856 dst := regEncodings[i.op2.reg().RealReg()] 857 op1 := i.op1 858 switch op1.kind { 859 case operandKindReg: 860 reg := regEncodings[op1.reg().RealReg()] 861 if isCmp { 862 opcode = 0x39 863 } else { 864 opcode = 0x85 865 } 866 // Here we swap the encoding of the operands for CMP to be consistent with the output of LLVM/GCC. 867 encodeRegReg(c, legacyPrefixesNone, opcode, 1, reg, dst, rex) 868 869 case operandKindMem: 870 if isCmp { 871 opcode = 0x3b 872 } else { 873 opcode = 0x85 874 } 875 m := op1.addressMode() 876 encodeRegMem(c, legacyPrefixesNone, opcode, 1, dst, m, rex) 877 878 case operandKindImm32: 879 imm32 := op1.imm32() 880 useImm8 := isCmp && lower8willSignExtendTo32(imm32) 881 var subopcode uint8 882 883 switch { 884 case isCmp && useImm8: 885 opcode, subopcode = 0x83, 7 886 case isCmp && !useImm8: 887 opcode, subopcode = 0x81, 7 888 default: 889 opcode, subopcode = 0xf7, 0 890 } 891 encodeEncEnc(c, legacyPrefixesNone, opcode, 1, subopcode, uint8(dst), rex) 892 if useImm8 { 893 c.EmitByte(uint8(imm32)) 894 } else { 895 c.Emit4Bytes(imm32) 896 } 897 898 default: 899 panic("BUG: invalid operand kind") 900 } 901 case setcc: 902 cc := cond(i.u1) 903 dst := regEncodings[i.op2.reg().RealReg()] 904 rex := rexInfo(0).clearW().always() 905 opcode := uint32(0x0f90) + uint32(cc) 906 encodeEncEnc(c, legacyPrefixesNone, opcode, 2, 0, uint8(dst), rex) 907 case cmove: 908 cc := cond(i.u1) 909 dst := regEncodings[i.op2.reg().RealReg()] 910 rex := rexInfo(0) 911 if i.b1 { // 64 bit. 912 rex = rex.setW() 913 } else { 914 rex = rex.clearW() 915 } 916 opcode := uint32(0x0f40) + uint32(cc) 917 src := i.op1 918 switch src.kind { 919 case operandKindReg: 920 srcReg := regEncodings[src.reg().RealReg()] 921 encodeRegReg(c, legacyPrefixesNone, opcode, 2, dst, srcReg, rex) 922 case operandKindMem: 923 m := src.addressMode() 924 encodeRegMem(c, legacyPrefixesNone, opcode, 2, dst, m, rex) 925 default: 926 panic("BUG: invalid operand kind") 927 } 928 case push64: 929 op := i.op1 930 931 switch op.kind { 932 case operandKindReg: 933 dst := regEncodings[op.reg().RealReg()] 934 if dst.rexBit() > 0 { 935 c.EmitByte(rexEncodingDefault | 0x1) 936 } 937 c.EmitByte(0x50 | dst.encoding()) 938 case operandKindMem: 939 m := op.addressMode() 940 encodeRegMem( 941 c, legacyPrefixesNone, 0xff, 1, regEnc(6), m, rexInfo(0).clearW(), 942 ) 943 case operandKindImm32: 944 c.EmitByte(0x68) 945 c.Emit4Bytes(op.imm32()) 946 default: 947 panic("BUG: invalid operand kind") 948 } 949 950 case pop64: 951 dst := regEncodings[i.op1.reg().RealReg()] 952 if dst.rexBit() > 0 { 953 c.EmitByte(rexEncodingDefault | 0x1) 954 } 955 c.EmitByte(0x58 | dst.encoding()) 956 957 case xmmMovRM: 958 var legPrefix legacyPrefixes 959 var opcode uint32 960 const opcodeNum = 2 961 switch sseOpcode(i.u1) { 962 case sseOpcodeMovaps: 963 legPrefix, opcode = legacyPrefixesNone, 0x0f29 964 case sseOpcodeMovapd: 965 legPrefix, opcode = legacyPrefixes0x66, 0x0f29 966 case sseOpcodeMovdqa: 967 legPrefix, opcode = legacyPrefixes0x66, 0x0f7f 968 case sseOpcodeMovdqu: 969 legPrefix, opcode = legacyPrefixes0xF3, 0x0f7f 970 case sseOpcodeMovss: 971 legPrefix, opcode = legacyPrefixes0xF3, 0x0f11 972 case sseOpcodeMovsd: 973 legPrefix, opcode = legacyPrefixes0xF2, 0x0f11 974 case sseOpcodeMovups: 975 legPrefix, opcode = legacyPrefixesNone, 0x0f11 976 case sseOpcodeMovupd: 977 legPrefix, opcode = legacyPrefixes0x66, 0x0f11 978 default: 979 panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1))) 980 } 981 982 dst := regEncodings[i.op1.reg().RealReg()] 983 encodeRegMem(c, legPrefix, opcode, opcodeNum, dst, i.op2.addressMode(), rexInfo(0).clearW()) 984 case xmmLoadConst: 985 panic("TODO") 986 case xmmToGpr: 987 var legPrefix legacyPrefixes 988 var opcode uint32 989 var argSwap bool 990 const opcodeNum = 2 991 switch sseOpcode(i.u1) { 992 case sseOpcodeMovd, sseOpcodeMovq: 993 legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0f7e, false 994 case sseOpcodeMovmskps: 995 legPrefix, opcode, argSwap = legacyPrefixesNone, 0x0f50, true 996 case sseOpcodeMovmskpd: 997 legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0f50, true 998 case sseOpcodePmovmskb: 999 legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0fd7, true 1000 case sseOpcodeCvttss2si: 1001 legPrefix, opcode, argSwap = legacyPrefixes0xF3, 0x0f2c, true 1002 case sseOpcodeCvttsd2si: 1003 legPrefix, opcode, argSwap = legacyPrefixes0xF2, 0x0f2c, true 1004 default: 1005 panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1))) 1006 } 1007 1008 var rex rexInfo 1009 if i.b1 { 1010 rex = rex.setW() 1011 } else { 1012 rex = rex.clearW() 1013 } 1014 src := regEncodings[i.op1.reg().RealReg()] 1015 dst := regEncodings[i.op2.reg().RealReg()] 1016 if argSwap { 1017 src, dst = dst, src 1018 } 1019 encodeRegReg(c, legPrefix, opcode, opcodeNum, src, dst, rex) 1020 1021 case cvtUint64ToFloatSeq: 1022 panic("TODO") 1023 case cvtFloatToSintSeq: 1024 panic("TODO") 1025 case cvtFloatToUintSeq: 1026 panic("TODO") 1027 case xmmMinMaxSeq: 1028 panic("TODO") 1029 case xmmCmpRmR: 1030 var prefix legacyPrefixes 1031 var opcode uint32 1032 var opcodeNum uint32 1033 rex := rexInfo(0) 1034 _64 := i.b1 1035 if _64 { // 64 bit. 1036 rex = rex.setW() 1037 } else { 1038 rex = rex.clearW() 1039 } 1040 1041 op := sseOpcode(i.u1) 1042 switch op { 1043 case sseOpcodePtest: 1044 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3817, 3 1045 case sseOpcodeUcomisd: 1046 prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f2e, 2 1047 case sseOpcodeUcomiss: 1048 prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0f2e, 2 1049 default: 1050 panic(fmt.Sprintf("Unsupported sseOpcode: %s", op)) 1051 } 1052 1053 dst := regEncodings[i.op2.reg().RealReg()] 1054 op1 := i.op1 1055 switch op1.kind { 1056 case operandKindReg: 1057 reg := regEncodings[op1.reg().RealReg()] 1058 encodeRegReg(c, prefix, opcode, opcodeNum, dst, reg, rex) 1059 1060 case operandKindMem: 1061 m := op1.addressMode() 1062 encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex) 1063 1064 default: 1065 panic("BUG: invalid operand kind") 1066 } 1067 case xmmRmRImm: 1068 op := sseOpcode(i.u1) 1069 var legPrex legacyPrefixes 1070 var opcode uint32 1071 var opcodeNum uint32 1072 var swap bool 1073 switch op { 1074 case sseOpcodeCmpps: 1075 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0FC2, 2 1076 case sseOpcodeCmppd: 1077 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC2, 2 1078 case sseOpcodeCmpss: 1079 legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0FC2, 2 1080 case sseOpcodeCmpsd: 1081 legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0FC2, 2 1082 case sseOpcodeInsertps: 1083 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A21, 3 1084 case sseOpcodePalignr: 1085 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A0F, 3 1086 case sseOpcodePinsrb: 1087 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A20, 3 1088 case sseOpcodePinsrw: 1089 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC4, 2 1090 case sseOpcodePinsrd, sseOpcodePinsrq: 1091 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A22, 3 1092 case sseOpcodePextrb: 1093 swap = true 1094 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A14, 3 1095 case sseOpcodePextrw: 1096 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC5, 2 1097 case sseOpcodePextrd, sseOpcodePextrq: 1098 swap = true 1099 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A16, 3 1100 case sseOpcodePshufd: 1101 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F70, 2 1102 case sseOpcodeRoundps: 1103 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A08, 3 1104 case sseOpcodeRoundpd: 1105 legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A09, 3 1106 case sseOpcodeShufps: 1107 legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0FC6, 2 1108 default: 1109 panic(fmt.Sprintf("Unsupported sseOpcode: %s", op)) 1110 } 1111 1112 dst := regEncodings[i.op2.reg().RealReg()] 1113 1114 var rex rexInfo 1115 if op == sseOpcodePextrq || op == sseOpcodePinsrq { 1116 rex = rexInfo(0).setW() 1117 } else { 1118 rex = rexInfo(0).clearW() 1119 } 1120 op1 := i.op1 1121 if op1.kind == operandKindReg { 1122 src := regEncodings[op1.reg().RealReg()] 1123 if swap { 1124 src, dst = dst, src 1125 } 1126 encodeRegReg(c, legPrex, opcode, opcodeNum, dst, src, rex) 1127 } else if i.op1.kind == operandKindMem { 1128 if swap { 1129 panic("BUG: this is not possible to encode") 1130 } 1131 m := i.op1.addressMode() 1132 encodeRegMem(c, legPrex, opcode, opcodeNum, dst, m, rex) 1133 } else { 1134 panic("BUG: invalid operand kind") 1135 } 1136 1137 c.EmitByte(byte(i.u2)) 1138 1139 case jmp: 1140 const ( 1141 regMemOpcode = 0xff 1142 regMemOpcodeNum = 1 1143 regMemSubOpcode = 4 1144 ) 1145 op := i.op1 1146 switch op.kind { 1147 case operandKindLabel: 1148 needsLabelResolution = true 1149 fallthrough 1150 case operandKindImm32: 1151 c.EmitByte(0xe9) 1152 c.Emit4Bytes(op.imm32()) 1153 case operandKindMem: 1154 m := op.addressMode() 1155 encodeRegMem(c, 1156 legacyPrefixesNone, 1157 regMemOpcode, regMemOpcodeNum, 1158 regMemSubOpcode, m, rexInfo(0).clearW(), 1159 ) 1160 case operandKindReg: 1161 r := op.reg().RealReg() 1162 encodeRegReg( 1163 c, 1164 legacyPrefixesNone, 1165 regMemOpcode, regMemOpcodeNum, 1166 regMemSubOpcode, 1167 regEncodings[r], rexInfo(0).clearW(), 1168 ) 1169 default: 1170 panic("BUG: invalid operand kind") 1171 } 1172 1173 case jmpIf: 1174 op := i.op1 1175 switch op.kind { 1176 case operandKindLabel: 1177 needsLabelResolution = true 1178 fallthrough 1179 case operandKindImm32: 1180 c.EmitByte(0x0f) 1181 c.EmitByte(0x80 | cond(i.u1).encoding()) 1182 c.Emit4Bytes(op.imm32()) 1183 default: 1184 panic("BUG: invalid operand kind") 1185 } 1186 1187 case jmpTableIsland: 1188 needsLabelResolution = true 1189 for tc := uint64(0); tc < i.u2; tc++ { 1190 c.Emit8Bytes(0) 1191 } 1192 1193 case exitSequence: 1194 execCtx := i.op1.reg() 1195 allocatedAmode := i.op2.addressMode() 1196 1197 // Restore the RBP, RSP, and return to the Go code: 1198 *allocatedAmode = amode{ 1199 kindWithShift: uint32(amodeImmReg), base: execCtx, 1200 imm32: wazevoapi.ExecutionContextOffsetOriginalFramePointer.U32(), 1201 } 1202 encodeLoad64(c, allocatedAmode, rbp) 1203 allocatedAmode.imm32 = wazevoapi.ExecutionContextOffsetOriginalStackPointer.U32() 1204 encodeLoad64(c, allocatedAmode, rsp) 1205 encodeRet(c) 1206 1207 case ud2: 1208 c.EmitByte(0x0f) 1209 c.EmitByte(0x0b) 1210 1211 case call: 1212 c.EmitByte(0xe8) 1213 // Meaning that the call target is a function value, and requires relocation. 1214 c.AddRelocationInfo(ssa.FuncRef(i.u1)) 1215 // Note that this is zero as a placeholder for the call target if it's a function value. 1216 c.Emit4Bytes(uint32(i.u2)) 1217 1218 case callIndirect: 1219 op := i.op1 1220 1221 const opcodeNum = 1 1222 const opcode = 0xff 1223 rex := rexInfo(0).clearW() 1224 switch op.kind { 1225 case operandKindReg: 1226 dst := regEncodings[op.reg().RealReg()] 1227 encodeRegReg(c, 1228 legacyPrefixesNone, 1229 opcode, opcodeNum, 1230 regEnc(2), 1231 dst, 1232 rex, 1233 ) 1234 case operandKindMem: 1235 m := op.addressMode() 1236 encodeRegMem(c, 1237 legacyPrefixesNone, 1238 opcode, opcodeNum, 1239 regEnc(2), 1240 m, 1241 rex, 1242 ) 1243 default: 1244 panic("BUG: invalid operand kind") 1245 } 1246 1247 case xchg: 1248 src, dst := regEncodings[i.op1.reg().RealReg()], i.op2 1249 size := i.u1 1250 1251 var rex rexInfo 1252 var opcode uint32 1253 lp := legacyPrefixesNone 1254 switch size { 1255 case 8: 1256 opcode = 0x87 1257 rex = rexInfo(0).setW() 1258 case 4: 1259 opcode = 0x87 1260 rex = rexInfo(0).clearW() 1261 case 2: 1262 lp = legacyPrefixes0x66 1263 opcode = 0x87 1264 rex = rexInfo(0).clearW() 1265 case 1: 1266 opcode = 0x86 1267 if i.op2.kind == operandKindReg { 1268 panic("TODO?: xchg on two 1-byte registers") 1269 } 1270 // Some destinations must be encoded with REX.R = 1. 1271 if e := src.encoding(); e >= 4 && e <= 7 { 1272 rex = rexInfo(0).always() 1273 } 1274 default: 1275 panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String())) 1276 } 1277 1278 switch dst.kind { 1279 case operandKindMem: 1280 m := dst.addressMode() 1281 encodeRegMem(c, lp, opcode, 1, src, m, rex) 1282 case operandKindReg: 1283 r := dst.reg().RealReg() 1284 encodeRegReg(c, lp, opcode, 1, src, regEncodings[r], rex) 1285 default: 1286 panic("BUG: invalid operand kind") 1287 } 1288 1289 case lockcmpxchg: 1290 src, dst := regEncodings[i.op1.reg().RealReg()], i.op2 1291 size := i.u1 1292 1293 var rex rexInfo 1294 var opcode uint32 1295 lp := legacyPrefixes0xF0 // Lock prefix. 1296 switch size { 1297 case 8: 1298 opcode = 0x0FB1 1299 rex = rexInfo(0).setW() 1300 case 4: 1301 opcode = 0x0FB1 1302 rex = rexInfo(0).clearW() 1303 case 2: 1304 lp = legacyPrefixes0x660xF0 // Legacy prefix + Lock prefix. 1305 opcode = 0x0FB1 1306 rex = rexInfo(0).clearW() 1307 case 1: 1308 opcode = 0x0FB0 1309 // Some destinations must be encoded with REX.R = 1. 1310 if e := src.encoding(); e >= 4 && e <= 7 { 1311 rex = rexInfo(0).always() 1312 } 1313 default: 1314 panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String())) 1315 } 1316 1317 switch dst.kind { 1318 case operandKindMem: 1319 m := dst.addressMode() 1320 encodeRegMem(c, lp, opcode, 2, src, m, rex) 1321 default: 1322 panic("BUG: invalid operand kind") 1323 } 1324 1325 case lockxadd: 1326 src, dst := regEncodings[i.op1.reg().RealReg()], i.op2 1327 size := i.u1 1328 1329 var rex rexInfo 1330 var opcode uint32 1331 lp := legacyPrefixes0xF0 // Lock prefix. 1332 switch size { 1333 case 8: 1334 opcode = 0x0FC1 1335 rex = rexInfo(0).setW() 1336 case 4: 1337 opcode = 0x0FC1 1338 rex = rexInfo(0).clearW() 1339 case 2: 1340 lp = legacyPrefixes0x660xF0 // Legacy prefix + Lock prefix. 1341 opcode = 0x0FC1 1342 rex = rexInfo(0).clearW() 1343 case 1: 1344 opcode = 0x0FC0 1345 // Some destinations must be encoded with REX.R = 1. 1346 if e := src.encoding(); e >= 4 && e <= 7 { 1347 rex = rexInfo(0).always() 1348 } 1349 default: 1350 panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String())) 1351 } 1352 1353 switch dst.kind { 1354 case operandKindMem: 1355 m := dst.addressMode() 1356 encodeRegMem(c, lp, opcode, 2, src, m, rex) 1357 default: 1358 panic("BUG: invalid operand kind") 1359 } 1360 1361 case zeros: 1362 r := i.op2.reg() 1363 if r.RegType() == regalloc.RegTypeInt { 1364 i.asAluRmiR(aluRmiROpcodeXor, newOperandReg(r), r, true) 1365 } else { 1366 i.asXmmRmR(sseOpcodePxor, newOperandReg(r), r) 1367 } 1368 i.encode(c) 1369 1370 case mfence: 1371 // https://www.felixcloutier.com/x86/mfence 1372 c.EmitByte(0x0f) 1373 c.EmitByte(0xae) 1374 c.EmitByte(0xf0) 1375 1376 default: 1377 panic(fmt.Sprintf("TODO: %v", i.kind)) 1378 } 1379 return 1380 } 1381 1382 func encodeLoad64(c backend.Compiler, m *amode, rd regalloc.RealReg) { 1383 dst := regEncodings[rd] 1384 encodeRegMem(c, legacyPrefixesNone, 0x8b, 1, dst, m, rexInfo(0).setW()) 1385 } 1386 1387 func encodeRet(c backend.Compiler) { 1388 c.EmitByte(0xc3) 1389 } 1390 1391 func encodeEncEnc( 1392 c backend.Compiler, 1393 legPrefixes legacyPrefixes, 1394 opcodes uint32, 1395 opcodeNum uint32, 1396 r uint8, 1397 rm uint8, 1398 rex rexInfo, 1399 ) { 1400 legPrefixes.encode(c) 1401 rex.encode(c, r>>3, rm>>3) 1402 1403 for opcodeNum > 0 { 1404 opcodeNum-- 1405 c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff)) 1406 } 1407 c.EmitByte(encodeModRM(3, r&7, rm&7)) 1408 } 1409 1410 func encodeRegReg( 1411 c backend.Compiler, 1412 legPrefixes legacyPrefixes, 1413 opcodes uint32, 1414 opcodeNum uint32, 1415 r regEnc, 1416 rm regEnc, 1417 rex rexInfo, 1418 ) { 1419 encodeEncEnc(c, legPrefixes, opcodes, opcodeNum, uint8(r), uint8(rm), rex) 1420 } 1421 1422 func encodeModRM(mod byte, reg byte, rm byte) byte { 1423 return mod<<6 | reg<<3 | rm 1424 } 1425 1426 func encodeSIB(shift byte, encIndex byte, encBase byte) byte { 1427 return shift<<6 | encIndex<<3 | encBase 1428 } 1429 1430 func encodeRegMem( 1431 c backend.Compiler, legPrefixes legacyPrefixes, opcodes uint32, opcodeNum uint32, r regEnc, m *amode, rex rexInfo, 1432 ) (needsLabelResolution bool) { 1433 needsLabelResolution = encodeEncMem(c, legPrefixes, opcodes, opcodeNum, uint8(r), m, rex) 1434 return 1435 } 1436 1437 func encodeEncMem( 1438 c backend.Compiler, legPrefixes legacyPrefixes, opcodes uint32, opcodeNum uint32, r uint8, m *amode, rex rexInfo, 1439 ) (needsLabelResolution bool) { 1440 legPrefixes.encode(c) 1441 1442 const ( 1443 modNoDisplacement = 0b00 1444 modShortDisplacement = 0b01 1445 modLongDisplacement = 0b10 1446 1447 useSBI = 4 // the encoding of rsp or r12 register. 1448 ) 1449 1450 switch m.kind() { 1451 case amodeImmReg, amodeImmRBP: 1452 base := m.base.RealReg() 1453 baseEnc := regEncodings[base] 1454 1455 rex.encode(c, regRexBit(r), baseEnc.rexBit()) 1456 1457 for opcodeNum > 0 { 1458 opcodeNum-- 1459 c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff)) 1460 } 1461 1462 // SIB byte is the last byte of the memory encoding before the displacement 1463 const sibByte = 0x24 // == encodeSIB(0, 4, 4) 1464 1465 immZero, baseRbp, baseR13 := m.imm32 == 0, base == rbp, base == r13 1466 short := lower8willSignExtendTo32(m.imm32) 1467 rspOrR12 := base == rsp || base == r12 1468 1469 if immZero && !baseRbp && !baseR13 { // rbp or r13 can't be used as base for without displacement encoding. 1470 c.EmitByte(encodeModRM(modNoDisplacement, regEncoding(r), baseEnc.encoding())) 1471 if rspOrR12 { 1472 c.EmitByte(sibByte) 1473 } 1474 } else if short { // Note: this includes the case where m.imm32 == 0 && base == rbp || base == r13. 1475 c.EmitByte(encodeModRM(modShortDisplacement, regEncoding(r), baseEnc.encoding())) 1476 if rspOrR12 { 1477 c.EmitByte(sibByte) 1478 } 1479 c.EmitByte(byte(m.imm32)) 1480 } else { 1481 c.EmitByte(encodeModRM(modLongDisplacement, regEncoding(r), baseEnc.encoding())) 1482 if rspOrR12 { 1483 c.EmitByte(sibByte) 1484 } 1485 c.Emit4Bytes(m.imm32) 1486 } 1487 1488 case amodeRegRegShift: 1489 base := m.base.RealReg() 1490 baseEnc := regEncodings[base] 1491 index := m.index.RealReg() 1492 indexEnc := regEncodings[index] 1493 1494 if index == rsp { 1495 panic("BUG: rsp can't be used as index of addressing mode") 1496 } 1497 1498 rex.encodeForIndex(c, regEnc(r), indexEnc, baseEnc) 1499 1500 for opcodeNum > 0 { 1501 opcodeNum-- 1502 c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff)) 1503 } 1504 1505 immZero, baseRbp, baseR13 := m.imm32 == 0, base == rbp, base == r13 1506 if immZero && !baseRbp && !baseR13 { // rbp or r13 can't be used as base for without displacement encoding. (curious why? because it's interpreted as RIP relative addressing). 1507 c.EmitByte(encodeModRM(modNoDisplacement, regEncoding(r), useSBI)) 1508 c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding())) 1509 } else if lower8willSignExtendTo32(m.imm32) { 1510 c.EmitByte(encodeModRM(modShortDisplacement, regEncoding(r), useSBI)) 1511 c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding())) 1512 c.EmitByte(byte(m.imm32)) 1513 } else { 1514 c.EmitByte(encodeModRM(modLongDisplacement, regEncoding(r), useSBI)) 1515 c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding())) 1516 c.Emit4Bytes(m.imm32) 1517 } 1518 1519 case amodeRipRel: 1520 rex.encode(c, regRexBit(r), 0) 1521 for opcodeNum > 0 { 1522 opcodeNum-- 1523 c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff)) 1524 } 1525 1526 // Indicate "LEAQ [RIP + 32bit displacement]. 1527 // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing 1528 c.EmitByte(encodeModRM(0b00, regEncoding(r), 0b101)) 1529 1530 // This will be resolved later, so we just emit a placeholder. 1531 needsLabelResolution = true 1532 c.Emit4Bytes(0) 1533 1534 default: 1535 panic("BUG: invalid addressing mode") 1536 } 1537 return 1538 } 1539 1540 const ( 1541 rexEncodingDefault byte = 0x40 1542 rexEncodingW = rexEncodingDefault | 0x08 1543 ) 1544 1545 // rexInfo is a bit set to indicate: 1546 // 1547 // 0x01: W bit must be cleared. 1548 // 0x02: REX prefix must be emitted. 1549 type rexInfo byte 1550 1551 func (ri rexInfo) setW() rexInfo { 1552 return ri | 0x01 1553 } 1554 1555 func (ri rexInfo) clearW() rexInfo { 1556 return ri & 0x02 1557 } 1558 1559 func (ri rexInfo) always() rexInfo { 1560 return ri | 0x02 1561 } 1562 1563 func (ri rexInfo) notAlways() rexInfo { //nolint 1564 return ri & 0x01 1565 } 1566 1567 func (ri rexInfo) encode(c backend.Compiler, r uint8, b uint8) { 1568 var w byte = 0 1569 if ri&0x01 != 0 { 1570 w = 0x01 1571 } 1572 rex := rexEncodingDefault | w<<3 | r<<2 | b 1573 if rex != rexEncodingDefault || ri&0x02 != 0 { 1574 c.EmitByte(rex) 1575 } 1576 } 1577 1578 func (ri rexInfo) encodeForIndex(c backend.Compiler, encR regEnc, encIndex regEnc, encBase regEnc) { 1579 var w byte = 0 1580 if ri&0x01 != 0 { 1581 w = 0x01 1582 } 1583 r := encR.rexBit() 1584 x := encIndex.rexBit() 1585 b := encBase.rexBit() 1586 rex := byte(0x40) | w<<3 | r<<2 | x<<1 | b 1587 if rex != 0x40 || ri&0x02 != 0 { 1588 c.EmitByte(rex) 1589 } 1590 } 1591 1592 type regEnc byte 1593 1594 func (r regEnc) rexBit() byte { 1595 return regRexBit(byte(r)) 1596 } 1597 1598 func (r regEnc) encoding() byte { 1599 return regEncoding(byte(r)) 1600 } 1601 1602 func regRexBit(r byte) byte { 1603 return r >> 3 1604 } 1605 1606 func regEncoding(r byte) byte { 1607 return r & 0x07 1608 } 1609 1610 var regEncodings = [...]regEnc{ 1611 rax: 0b000, 1612 rcx: 0b001, 1613 rdx: 0b010, 1614 rbx: 0b011, 1615 rsp: 0b100, 1616 rbp: 0b101, 1617 rsi: 0b110, 1618 rdi: 0b111, 1619 r8: 0b1000, 1620 r9: 0b1001, 1621 r10: 0b1010, 1622 r11: 0b1011, 1623 r12: 0b1100, 1624 r13: 0b1101, 1625 r14: 0b1110, 1626 r15: 0b1111, 1627 xmm0: 0b000, 1628 xmm1: 0b001, 1629 xmm2: 0b010, 1630 xmm3: 0b011, 1631 xmm4: 0b100, 1632 xmm5: 0b101, 1633 xmm6: 0b110, 1634 xmm7: 0b111, 1635 xmm8: 0b1000, 1636 xmm9: 0b1001, 1637 xmm10: 0b1010, 1638 xmm11: 0b1011, 1639 xmm12: 0b1100, 1640 xmm13: 0b1101, 1641 xmm14: 0b1110, 1642 xmm15: 0b1111, 1643 } 1644 1645 type legacyPrefixes byte 1646 1647 const ( 1648 legacyPrefixesNone legacyPrefixes = iota 1649 legacyPrefixes0x66 1650 legacyPrefixes0xF0 1651 legacyPrefixes0x660xF0 1652 legacyPrefixes0xF2 1653 legacyPrefixes0xF3 1654 ) 1655 1656 func (p legacyPrefixes) encode(c backend.Compiler) { 1657 switch p { 1658 case legacyPrefixesNone: 1659 case legacyPrefixes0x66: 1660 c.EmitByte(0x66) 1661 case legacyPrefixes0xF0: 1662 c.EmitByte(0xf0) 1663 case legacyPrefixes0x660xF0: 1664 c.EmitByte(0x66) 1665 c.EmitByte(0xf0) 1666 case legacyPrefixes0xF2: 1667 c.EmitByte(0xf2) 1668 case legacyPrefixes0xF3: 1669 c.EmitByte(0xf3) 1670 default: 1671 panic("BUG: invalid legacy prefix") 1672 } 1673 } 1674 1675 func lower32willSignExtendTo64(x uint64) bool { 1676 xs := int64(x) 1677 return xs == int64(uint64(int32(xs))) 1678 } 1679 1680 func lower8willSignExtendTo32(x uint32) bool { 1681 xs := int32(x) 1682 return xs == ((xs << 24) >> 24) 1683 }