github.com/cloudwego/iasm@v0.2.0/x86_64/program.go (about) 1 // 2 // Copyright 2024 CloudWeGo Authors 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 package x86_64 18 19 import ( 20 "fmt" 21 "math" 22 "math/bits" 23 24 "github.com/cloudwego/iasm/expr" 25 ) 26 27 type ( 28 _PseudoType int 29 _InstructionEncoder func(*Program, ...interface{}) *Instruction 30 ) 31 32 const ( 33 _PseudoNop _PseudoType = iota + 1 34 _PseudoByte 35 _PseudoWord 36 _PseudoLong 37 _PseudoQuad 38 _PseudoData 39 _PseudoAlign 40 ) 41 42 func (self _PseudoType) String() string { 43 switch self { 44 case _PseudoNop: 45 return ".nop" 46 case _PseudoByte: 47 return ".byte" 48 case _PseudoWord: 49 return ".word" 50 case _PseudoLong: 51 return ".long" 52 case _PseudoQuad: 53 return ".quad" 54 case _PseudoData: 55 return ".data" 56 case _PseudoAlign: 57 return ".align" 58 default: 59 panic("unreachable") 60 } 61 } 62 63 type _Pseudo struct { 64 kind _PseudoType 65 data []byte 66 uint uint64 67 expr *expr.Expr 68 } 69 70 func (self *_Pseudo) free() { 71 if self.expr != nil { 72 self.expr.Free() 73 } 74 } 75 76 func (self *_Pseudo) encode(m *[]byte, pc uintptr) int { 77 switch self.kind { 78 case _PseudoNop: 79 return 0 80 case _PseudoByte: 81 self.encodeByte(m) 82 return 1 83 case _PseudoWord: 84 self.encodeWord(m) 85 return 2 86 case _PseudoLong: 87 self.encodeLong(m) 88 return 4 89 case _PseudoQuad: 90 self.encodeQuad(m) 91 return 8 92 case _PseudoData: 93 self.encodeData(m) 94 return len(self.data) 95 case _PseudoAlign: 96 self.encodeAlign(m, pc) 97 return self.alignSize(pc) 98 default: 99 panic("invalid pseudo instruction") 100 } 101 } 102 103 func (self *_Pseudo) evalExpr(low int64, high int64) int64 { 104 if v, err := self.expr.Evaluate(); err != nil { 105 panic(err) 106 } else if v < low || v > high { 107 panic(fmt.Sprintf("expression out of range [%d, %d]: %d", low, high, v)) 108 } else { 109 return v 110 } 111 } 112 113 func (self *_Pseudo) alignSize(pc uintptr) int { 114 if !ispow2(self.uint) { 115 panic(fmt.Sprintf("aligment should be a power of 2, not %d", self.uint)) 116 } else { 117 return align(int(pc), bits.TrailingZeros64(self.uint)) - int(pc) 118 } 119 } 120 121 func (self *_Pseudo) encodeData(m *[]byte) { 122 if m != nil { 123 *m = append(*m, self.data...) 124 } 125 } 126 127 func (self *_Pseudo) encodeByte(m *[]byte) { 128 if m != nil { 129 append8(m, byte(self.evalExpr(math.MinInt8, math.MaxUint8))) 130 } 131 } 132 133 func (self *_Pseudo) encodeWord(m *[]byte) { 134 if m != nil { 135 append16(m, uint16(self.evalExpr(math.MinInt16, math.MaxUint16))) 136 } 137 } 138 139 func (self *_Pseudo) encodeLong(m *[]byte) { 140 if m != nil { 141 append32(m, uint32(self.evalExpr(math.MinInt32, math.MaxUint32))) 142 } 143 } 144 145 func (self *_Pseudo) encodeQuad(m *[]byte) { 146 if m != nil { 147 if v, err := self.expr.Evaluate(); err != nil { 148 panic(err) 149 } else { 150 append64(m, uint64(v)) 151 } 152 } 153 } 154 155 func (self *_Pseudo) encodeAlign(m *[]byte, pc uintptr) { 156 if m != nil { 157 if self.expr == nil { 158 expandmm(m, self.alignSize(pc), 0) 159 } else { 160 expandmm(m, self.alignSize(pc), byte(self.evalExpr(math.MinInt8, math.MaxUint8))) 161 } 162 } 163 } 164 165 // Operands represents a sequence of operand required by an instruction. 166 type Operands [_N_args]interface{} 167 168 // InstructionDomain represents the domain of an instruction. 169 type InstructionDomain uint8 170 171 const ( 172 DomainGeneric InstructionDomain = iota 173 DomainMMXSSE 174 DomainAVX 175 DomainFMA 176 DomainCrypto 177 DomainMask 178 DomainAMDSpecific 179 DomainMisc 180 DomainPseudo 181 ) 182 183 type ( 184 _BranchType uint8 185 ) 186 187 const ( 188 _B_none _BranchType = iota 189 _B_conditional 190 _B_unconditional 191 ) 192 193 // Instruction represents an unencoded instruction. 194 type Instruction struct { 195 next *Instruction 196 pc uintptr 197 nb int 198 len int 199 argc int 200 name string 201 argv Operands 202 forms [_N_forms]_Encoding 203 pseudo _Pseudo 204 branch _BranchType 205 domain InstructionDomain 206 prefix []byte 207 } 208 209 func (self *Instruction) add(flags int, encoder func(m *_Encoding, v []interface{})) { 210 self.forms[self.len].flags = flags 211 self.forms[self.len].encoder = encoder 212 self.len++ 213 } 214 215 func (self *Instruction) free() { 216 self.clear() 217 self.pseudo.free() 218 //freeInstruction(self) 219 } 220 221 func (self *Instruction) clear() { 222 for i := 0; i < self.argc; i++ { 223 if v, ok := self.argv[i].(Disposable); ok { 224 v.Free() 225 } 226 } 227 } 228 229 func (self *Instruction) check(e *_Encoding) bool { 230 if (e.flags & _F_rel1) != 0 { 231 return isRel8(self.argv[0]) 232 } else if (e.flags & _F_rel4) != 0 { 233 return isRel32(self.argv[0]) || isLabel(self.argv[0]) 234 } else { 235 return true 236 } 237 } 238 239 func (self *Instruction) encode(m *[]byte) int { 240 n := math.MaxInt64 241 p := (*_Encoding)(nil) 242 243 /* encode prefixes if any */ 244 if self.nb = len(self.prefix); m != nil { 245 *m = append(*m, self.prefix...) 246 } 247 248 /* check for pseudo-instructions */ 249 if self.pseudo.kind != 0 { 250 self.nb += self.pseudo.encode(m, self.pc) 251 return self.nb 252 } 253 254 /* find the shortest encoding */ 255 for i := 0; i < self.len; i++ { 256 if e := &self.forms[i]; self.check(e) { 257 if v := e.encode(self.argv[:self.argc]); v < n { 258 n = v 259 p = e 260 } 261 } 262 } 263 264 /* add to buffer if needed */ 265 if m != nil { 266 *m = append(*m, p.bytes[:n]...) 267 } 268 269 /* update the instruction length */ 270 self.nb += n 271 return self.nb 272 } 273 274 /** Instruction Prefixes **/ 275 276 const ( 277 _P_cs = 0x2e 278 _P_ds = 0x3e 279 _P_es = 0x26 280 _P_fs = 0x64 281 _P_gs = 0x65 282 _P_ss = 0x36 283 _P_lock = 0xf0 284 ) 285 286 // CS overrides the memory operation of this instruction to CS. 287 func (self *Instruction) CS() *Instruction { 288 self.prefix = append(self.prefix, _P_cs) 289 return self 290 } 291 292 // DS overrides the memory operation of this instruction to DS, 293 // this is the default section for most instructions if not specified. 294 func (self *Instruction) DS() *Instruction { 295 self.prefix = append(self.prefix, _P_ds) 296 return self 297 } 298 299 // ES overrides the memory operation of this instruction to ES. 300 func (self *Instruction) ES() *Instruction { 301 self.prefix = append(self.prefix, _P_es) 302 return self 303 } 304 305 // FS overrides the memory operation of this instruction to FS. 306 func (self *Instruction) FS() *Instruction { 307 self.prefix = append(self.prefix, _P_fs) 308 return self 309 } 310 311 // GS overrides the memory operation of this instruction to GS. 312 func (self *Instruction) GS() *Instruction { 313 self.prefix = append(self.prefix, _P_gs) 314 return self 315 } 316 317 // SS overrides the memory operation of this instruction to SS. 318 func (self *Instruction) SS() *Instruction { 319 self.prefix = append(self.prefix, _P_ss) 320 return self 321 } 322 323 // LOCK causes the processor's LOCK# signal to be asserted during execution of 324 // the accompanying instruction (turns the instruction into an atomic instruction). 325 // In a multiprocessor environment, the LOCK# signal insures that the processor 326 // has exclusive use of any shared memory while the signal is asserted. 327 func (self *Instruction) LOCK() *Instruction { 328 self.prefix = append(self.prefix, _P_lock) 329 return self 330 } 331 332 /** Basic Instruction Properties **/ 333 334 // Name returns the instruction name. 335 func (self *Instruction) Name() string { 336 return self.name 337 } 338 339 // Domain returns the domain of this instruction. 340 func (self *Instruction) Domain() InstructionDomain { 341 return self.domain 342 } 343 344 // Operands returns the operands of this instruction. 345 func (self *Instruction) Operands() []interface{} { 346 return self.argv[:self.argc] 347 } 348 349 // Program represents a sequence of instructions. 350 type Program struct { 351 arch *Arch 352 head *Instruction 353 tail *Instruction 354 } 355 356 const ( 357 _N_near = 2 // near-branch (-128 ~ +127) takes 2 bytes to encode 358 _N_far_cond = 6 // conditional far-branch takes 6 bytes to encode 359 _N_far_uncond = 5 // unconditional far-branch takes 5 bytes to encode 360 ) 361 362 func (self *Program) clear() { 363 for p, q := self.head, self.head; p != nil; p = q { 364 q = p.next 365 p.free() 366 } 367 } 368 369 func (self *Program) alloc(name string, argc int, argv Operands) *Instruction { 370 p := self.tail 371 q := newInstruction(name, argc, argv) 372 373 /* attach to tail if any */ 374 if p != nil { 375 p.next = q 376 } else { 377 self.head = q 378 } 379 380 /* set the new tail */ 381 self.tail = q 382 return q 383 } 384 385 func (self *Program) pseudo(kind _PseudoType) (p *Instruction) { 386 p = self.alloc(kind.String(), 0, Operands{}) 387 p.domain = DomainPseudo 388 p.pseudo.kind = kind 389 return 390 } 391 392 func (self *Program) require(isa ISA) { 393 if !self.arch.HasISA(isa) { 394 panic("ISA '" + isa.String() + "' was not enabled") 395 } 396 } 397 398 func (self *Program) branchSize(p *Instruction) int { 399 switch p.branch { 400 case _B_none: 401 panic("p is not a branch") 402 case _B_conditional: 403 return _N_far_cond 404 case _B_unconditional: 405 return _N_far_uncond 406 default: 407 panic("invalid instruction") 408 } 409 } 410 411 /** Pseudo-Instructions **/ 412 413 // Byte is a pseudo-instruction to add raw byte to the assembled code. 414 func (self *Program) Byte(v *expr.Expr) (p *Instruction) { 415 p = self.pseudo(_PseudoByte) 416 p.pseudo.expr = v 417 return 418 } 419 420 // Word is a pseudo-instruction to add raw uint16 as little-endian to the assembled code. 421 func (self *Program) Word(v *expr.Expr) (p *Instruction) { 422 p = self.pseudo(_PseudoWord) 423 p.pseudo.expr = v 424 return 425 } 426 427 // Long is a pseudo-instruction to add raw uint32 as little-endian to the assembled code. 428 func (self *Program) Long(v *expr.Expr) (p *Instruction) { 429 p = self.pseudo(_PseudoLong) 430 p.pseudo.expr = v 431 return 432 } 433 434 // Quad is a pseudo-instruction to add raw uint64 as little-endian to the assembled code. 435 func (self *Program) Quad(v *expr.Expr) (p *Instruction) { 436 p = self.pseudo(_PseudoQuad) 437 p.pseudo.expr = v 438 return 439 } 440 441 // Data is a pseudo-instruction to add raw bytes to the assembled code. 442 func (self *Program) Data(v []byte) (p *Instruction) { 443 p = self.pseudo(_PseudoData) 444 p.pseudo.data = v 445 return 446 } 447 448 // Align is a pseudo-instruction to ensure the PC is aligned to a certain value. 449 func (self *Program) Align(align uint64, padding *expr.Expr) (p *Instruction) { 450 p = self.pseudo(_PseudoAlign) 451 p.pseudo.uint = align 452 p.pseudo.expr = padding 453 return 454 } 455 456 /** Program Assembler **/ 457 458 // Free returns the Program object into pool. 459 // Any operation performed after Free is undefined behavior. 460 // 461 // NOTE: This also frees all the instructions, labels, memory 462 // 463 // operands and expressions associated with this program. 464 func (self *Program) Free() { 465 self.clear() 466 //freeProgram(self) 467 } 468 469 // Link pins a label at the current position. 470 func (self *Program) Link(p *Label) { 471 if p.Dest != nil { 472 panic("lable was alreay linked") 473 } else { 474 p.Dest = self.pseudo(_PseudoNop) 475 } 476 } 477 478 // Assemble assembles and links the entire program into machine code. 479 func (self *Program) Assemble(pc uintptr) (ret []byte) { 480 orig := pc 481 next := true 482 offs := uintptr(0) 483 484 /* Pass 0: PC-precompute, assume all labeled branches are far-branches. */ 485 for p := self.head; p != nil; p = p.next { 486 if p.pc = pc; !isLabel(p.argv[0]) || p.branch == _B_none { 487 pc += uintptr(p.encode(nil)) 488 } else { 489 pc += uintptr(self.branchSize(p)) 490 } 491 } 492 493 /* allocate space for the machine code */ 494 nb := int(pc - orig) 495 ret = make([]byte, 0, nb) 496 497 /* Pass 1: adjust all the jumps */ 498 for next { 499 next = false 500 offs = uintptr(0) 501 502 /* scan all the branches */ 503 for p := self.head; p != nil; p = p.next { 504 var ok bool 505 var lb *Label 506 507 /* re-calculate the alignment here */ 508 if nb = p.nb; p.pseudo.kind == _PseudoAlign { 509 p.pc -= offs 510 offs += uintptr(nb - p.encode(nil)) 511 continue 512 } 513 514 /* adjust the program counter */ 515 p.pc -= offs 516 lb, ok = p.argv[0].(*Label) 517 518 /* only care about labeled far-branches */ 519 if !ok || p.nb == _N_near || p.branch == _B_none { 520 continue 521 } 522 523 /* calculate the jump offset */ 524 size := self.branchSize(p) 525 diff := lb.offset(p.pc, size) 526 527 /* too far to be a near jump */ 528 if diff > 127 || diff < -128 { 529 p.nb = size 530 continue 531 } 532 533 /* a far jump becomes a near jump, calculate 534 * the PC adjustment value and assemble again */ 535 next = true 536 p.nb = _N_near 537 offs += uintptr(size - _N_near) 538 } 539 } 540 541 /* Pass 3: link all the cross-references */ 542 for p := self.head; p != nil; p = p.next { 543 for i := 0; i < p.argc; i++ { 544 var ok bool 545 var lb *Label 546 var op *MemoryOperand 547 548 /* resolve labels */ 549 if lb, ok = p.argv[i].(*Label); ok { 550 p.argv[i] = lb.offset(p.pc, p.nb) 551 continue 552 } 553 554 /* check for memory operands */ 555 if op, ok = p.argv[i].(*MemoryOperand); !ok { 556 continue 557 } 558 559 /* check for label references */ 560 if op.Addr.Type != Reference { 561 continue 562 } 563 564 /* replace the label with the real offset */ 565 op.Addr.Type = Offset 566 op.Addr.Offset = op.Addr.Reference.offset(p.pc, p.nb) 567 } 568 } 569 570 /* Pass 4: actually encode all the instructions */ 571 for p := self.head; p != nil; p = p.next { 572 p.encode(&ret) 573 } 574 575 /* all done */ 576 return ret 577 } 578 579 // AssembleAndFree is like Assemble, but it frees the Program after assembling. 580 func (self *Program) AssembleAndFree(pc uintptr) (ret []byte) { 581 ret = self.Assemble(pc) 582 self.Free() 583 return 584 }