github.com/chenzhuoyu/iasm@v0.9.1/x86_64/program.go (about) 1 package x86_64 2 3 import ( 4 "fmt" 5 "math" 6 "math/bits" 7 8 "github.com/chenzhuoyu/iasm/expr" 9 ) 10 11 type ( 12 _PseudoType int 13 _InstructionEncoder func(*Program, ...interface{}) *Instruction 14 ) 15 16 const ( 17 _PseudoNop _PseudoType = iota + 1 18 _PseudoByte 19 _PseudoWord 20 _PseudoLong 21 _PseudoQuad 22 _PseudoData 23 _PseudoAlign 24 ) 25 26 func (self _PseudoType) String() string { 27 switch self { 28 case _PseudoNop: 29 return ".nop" 30 case _PseudoByte: 31 return ".byte" 32 case _PseudoWord: 33 return ".word" 34 case _PseudoLong: 35 return ".long" 36 case _PseudoQuad: 37 return ".quad" 38 case _PseudoData: 39 return ".data" 40 case _PseudoAlign: 41 return ".align" 42 default: 43 panic("unreachable") 44 } 45 } 46 47 type _Pseudo struct { 48 kind _PseudoType 49 data []byte 50 uint uint64 51 expr *expr.Expr 52 } 53 54 func (self *_Pseudo) free() { 55 if self.expr != nil { 56 self.expr.Free() 57 } 58 } 59 60 func (self *_Pseudo) encode(m *[]byte, pc uintptr) int { 61 switch self.kind { 62 case _PseudoNop: 63 return 0 64 case _PseudoByte: 65 self.encodeByte(m) 66 return 1 67 case _PseudoWord: 68 self.encodeWord(m) 69 return 2 70 case _PseudoLong: 71 self.encodeLong(m) 72 return 4 73 case _PseudoQuad: 74 self.encodeQuad(m) 75 return 8 76 case _PseudoData: 77 self.encodeData(m) 78 return len(self.data) 79 case _PseudoAlign: 80 self.encodeAlign(m, pc) 81 return self.alignSize(pc) 82 default: 83 panic("invalid pseudo instruction") 84 } 85 } 86 87 func (self *_Pseudo) evalExpr(low int64, high int64) int64 { 88 if v, err := self.expr.Evaluate(); err != nil { 89 panic(err) 90 } else if v < low || v > high { 91 panic(fmt.Sprintf("expression out of range [%d, %d]: %d", low, high, v)) 92 } else { 93 return v 94 } 95 } 96 97 func (self *_Pseudo) alignSize(pc uintptr) int { 98 if !ispow2(self.uint) { 99 panic(fmt.Sprintf("aligment should be a power of 2, not %d", self.uint)) 100 } else { 101 return align(int(pc), bits.TrailingZeros64(self.uint)) - int(pc) 102 } 103 } 104 105 func (self *_Pseudo) encodeData(m *[]byte) { 106 if m != nil { 107 *m = append(*m, self.data...) 108 } 109 } 110 111 func (self *_Pseudo) encodeByte(m *[]byte) { 112 if m != nil { 113 append8(m, byte(self.evalExpr(math.MinInt8, math.MaxUint8))) 114 } 115 } 116 117 func (self *_Pseudo) encodeWord(m *[]byte) { 118 if m != nil { 119 append16(m, uint16(self.evalExpr(math.MinInt16, math.MaxUint16))) 120 } 121 } 122 123 func (self *_Pseudo) encodeLong(m *[]byte) { 124 if m != nil { 125 append32(m, uint32(self.evalExpr(math.MinInt32, math.MaxUint32))) 126 } 127 } 128 129 func (self *_Pseudo) encodeQuad(m *[]byte) { 130 if m != nil { 131 if v, err := self.expr.Evaluate(); err != nil { 132 panic(err) 133 } else { 134 append64(m, uint64(v)) 135 } 136 } 137 } 138 139 func (self *_Pseudo) encodeAlign(m *[]byte, pc uintptr) { 140 if m != nil { 141 if self.expr == nil { 142 expandmm(m, self.alignSize(pc), 0) 143 } else { 144 expandmm(m, self.alignSize(pc), byte(self.evalExpr(math.MinInt8, math.MaxUint8))) 145 } 146 } 147 } 148 149 // Operands represents a sequence of operand required by an instruction. 150 type Operands [_N_args]interface{} 151 152 // InstructionDomain represents the domain of an instruction. 153 type InstructionDomain uint8 154 155 const ( 156 DomainGeneric InstructionDomain = iota 157 DomainMMXSSE 158 DomainAVX 159 DomainFMA 160 DomainCrypto 161 DomainMask 162 DomainAMDSpecific 163 DomainMisc 164 DomainPseudo 165 ) 166 167 type ( 168 _BranchType uint8 169 ) 170 171 const ( 172 _B_none _BranchType = iota 173 _B_conditional 174 _B_unconditional 175 ) 176 177 // Instruction represents an unencoded instruction. 178 type Instruction struct { 179 next *Instruction 180 pc uintptr 181 nb int 182 len int 183 argc int 184 name string 185 argv Operands 186 forms [_N_forms]_Encoding 187 pseudo _Pseudo 188 branch _BranchType 189 domain InstructionDomain 190 prefix []byte 191 } 192 193 func (self *Instruction) add(flags int, encoder func(m *_Encoding, v []interface{})) { 194 self.forms[self.len].flags = flags 195 self.forms[self.len].encoder = encoder 196 self.len++ 197 } 198 199 func (self *Instruction) free() { 200 self.clear() 201 self.pseudo.free() 202 //freeInstruction(self) 203 } 204 205 func (self *Instruction) clear() { 206 for i := 0; i < self.argc; i++ { 207 if v, ok := self.argv[i].(Disposable); ok { 208 v.Free() 209 } 210 } 211 } 212 213 func (self *Instruction) check(e *_Encoding) bool { 214 if (e.flags & _F_rel1) != 0 { 215 return isRel8(self.argv[0]) 216 } else if (e.flags & _F_rel4) != 0 { 217 return isRel32(self.argv[0]) || isLabel(self.argv[0]) 218 } else { 219 return true 220 } 221 } 222 223 func (self *Instruction) encode(m *[]byte) int { 224 n := math.MaxInt64 225 p := (*_Encoding)(nil) 226 227 /* encode prefixes if any */ 228 if self.nb = len(self.prefix); m != nil { 229 *m = append(*m, self.prefix...) 230 } 231 232 /* check for pseudo-instructions */ 233 if self.pseudo.kind != 0 { 234 self.nb += self.pseudo.encode(m, self.pc) 235 return self.nb 236 } 237 238 /* find the shortest encoding */ 239 for i := 0; i < self.len; i++ { 240 if e := &self.forms[i]; self.check(e) { 241 if v := e.encode(self.argv[:self.argc]); v < n { 242 n = v 243 p = e 244 } 245 } 246 } 247 248 /* add to buffer if needed */ 249 if m != nil { 250 *m = append(*m, p.bytes[:n]...) 251 } 252 253 /* update the instruction length */ 254 self.nb += n 255 return self.nb 256 } 257 258 /** Instruction Prefixes **/ 259 260 const ( 261 _P_cs = 0x2e 262 _P_ds = 0x3e 263 _P_es = 0x26 264 _P_fs = 0x64 265 _P_gs = 0x65 266 _P_ss = 0x36 267 _P_lock = 0xf0 268 ) 269 270 // CS overrides the memory operation of this instruction to CS. 271 func (self *Instruction) CS() *Instruction { 272 self.prefix = append(self.prefix, _P_cs) 273 return self 274 } 275 276 // DS overrides the memory operation of this instruction to DS, 277 // this is the default section for most instructions if not specified. 278 func (self *Instruction) DS() *Instruction { 279 self.prefix = append(self.prefix, _P_ds) 280 return self 281 } 282 283 // ES overrides the memory operation of this instruction to ES. 284 func (self *Instruction) ES() *Instruction { 285 self.prefix = append(self.prefix, _P_es) 286 return self 287 } 288 289 // FS overrides the memory operation of this instruction to FS. 290 func (self *Instruction) FS() *Instruction { 291 self.prefix = append(self.prefix, _P_fs) 292 return self 293 } 294 295 // GS overrides the memory operation of this instruction to GS. 296 func (self *Instruction) GS() *Instruction { 297 self.prefix = append(self.prefix, _P_gs) 298 return self 299 } 300 301 // SS overrides the memory operation of this instruction to SS. 302 func (self *Instruction) SS() *Instruction { 303 self.prefix = append(self.prefix, _P_ss) 304 return self 305 } 306 307 // LOCK causes the processor's LOCK# signal to be asserted during execution of 308 // the accompanying instruction (turns the instruction into an atomic instruction). 309 // In a multiprocessor environment, the LOCK# signal insures that the processor 310 // has exclusive use of any shared memory while the signal is asserted. 311 func (self *Instruction) LOCK() *Instruction { 312 self.prefix = append(self.prefix, _P_lock) 313 return self 314 } 315 316 /** Basic Instruction Properties **/ 317 318 // Name returns the instruction name. 319 func (self *Instruction) Name() string { 320 return self.name 321 } 322 323 // Domain returns the domain of this instruction. 324 func (self *Instruction) Domain() InstructionDomain { 325 return self.domain 326 } 327 328 // Operands returns the operands of this instruction. 329 func (self *Instruction) Operands() []interface{} { 330 return self.argv[:self.argc] 331 } 332 333 // Program represents a sequence of instructions. 334 type Program struct { 335 arch *Arch 336 head *Instruction 337 tail *Instruction 338 } 339 340 const ( 341 _N_near = 2 // near-branch (-128 ~ +127) takes 2 bytes to encode 342 _N_far_cond = 6 // conditional far-branch takes 6 bytes to encode 343 _N_far_uncond = 5 // unconditional far-branch takes 5 bytes to encode 344 ) 345 346 func (self *Program) clear() { 347 for p, q := self.head, self.head; p != nil; p = q { 348 q = p.next 349 p.free() 350 } 351 } 352 353 func (self *Program) alloc(name string, argc int, argv Operands) *Instruction { 354 p := self.tail 355 q := newInstruction(name, argc, argv) 356 357 /* attach to tail if any */ 358 if p != nil { 359 p.next = q 360 } else { 361 self.head = q 362 } 363 364 /* set the new tail */ 365 self.tail = q 366 return q 367 } 368 369 func (self *Program) pseudo(kind _PseudoType) (p *Instruction) { 370 p = self.alloc(kind.String(), 0, Operands{}) 371 p.domain = DomainPseudo 372 p.pseudo.kind = kind 373 return 374 } 375 376 func (self *Program) require(isa ISA) { 377 if !self.arch.HasISA(isa) { 378 panic("ISA '" + isa.String() + "' was not enabled") 379 } 380 } 381 382 func (self *Program) branchSize(p *Instruction) int { 383 switch p.branch { 384 case _B_none: 385 panic("p is not a branch") 386 case _B_conditional: 387 return _N_far_cond 388 case _B_unconditional: 389 return _N_far_uncond 390 default: 391 panic("invalid instruction") 392 } 393 } 394 395 /** Pseudo-Instructions **/ 396 397 // Byte is a pseudo-instruction to add raw byte to the assembled code. 398 func (self *Program) Byte(v *expr.Expr) (p *Instruction) { 399 p = self.pseudo(_PseudoByte) 400 p.pseudo.expr = v 401 return 402 } 403 404 // Word is a pseudo-instruction to add raw uint16 as little-endian to the assembled code. 405 func (self *Program) Word(v *expr.Expr) (p *Instruction) { 406 p = self.pseudo(_PseudoWord) 407 p.pseudo.expr = v 408 return 409 } 410 411 // Long is a pseudo-instruction to add raw uint32 as little-endian to the assembled code. 412 func (self *Program) Long(v *expr.Expr) (p *Instruction) { 413 p = self.pseudo(_PseudoLong) 414 p.pseudo.expr = v 415 return 416 } 417 418 // Quad is a pseudo-instruction to add raw uint64 as little-endian to the assembled code. 419 func (self *Program) Quad(v *expr.Expr) (p *Instruction) { 420 p = self.pseudo(_PseudoQuad) 421 p.pseudo.expr = v 422 return 423 } 424 425 // Data is a pseudo-instruction to add raw bytes to the assembled code. 426 func (self *Program) Data(v []byte) (p *Instruction) { 427 p = self.pseudo(_PseudoData) 428 p.pseudo.data = v 429 return 430 } 431 432 // Align is a pseudo-instruction to ensure the PC is aligned to a certain value. 433 func (self *Program) Align(align uint64, padding *expr.Expr) (p *Instruction) { 434 p = self.pseudo(_PseudoAlign) 435 p.pseudo.uint = align 436 p.pseudo.expr = padding 437 return 438 } 439 440 /** Program Assembler **/ 441 442 // Free returns the Program object into pool. 443 // Any operation performed after Free is undefined behavior. 444 // 445 // NOTE: This also frees all the instructions, labels, memory 446 // 447 // operands and expressions associated with this program. 448 func (self *Program) Free() { 449 self.clear() 450 //freeProgram(self) 451 } 452 453 // Link pins a label at the current position. 454 func (self *Program) Link(p *Label) { 455 if p.Dest != nil { 456 panic("lable was alreay linked") 457 } else { 458 p.Dest = self.pseudo(_PseudoNop) 459 } 460 } 461 462 // Assemble assembles and links the entire program into machine code. 463 func (self *Program) Assemble(pc uintptr) (ret []byte) { 464 orig := pc 465 next := true 466 offs := uintptr(0) 467 468 /* Pass 0: PC-precompute, assume all labeled branches are far-branches. */ 469 for p := self.head; p != nil; p = p.next { 470 if p.pc = pc; !isLabel(p.argv[0]) || p.branch == _B_none { 471 pc += uintptr(p.encode(nil)) 472 } else { 473 pc += uintptr(self.branchSize(p)) 474 } 475 } 476 477 /* allocate space for the machine code */ 478 nb := int(pc - orig) 479 ret = make([]byte, 0, nb) 480 481 /* Pass 1: adjust all the jumps */ 482 for next { 483 next = false 484 offs = uintptr(0) 485 486 /* scan all the branches */ 487 for p := self.head; p != nil; p = p.next { 488 var ok bool 489 var lb *Label 490 491 /* re-calculate the alignment here */ 492 if nb = p.nb; p.pseudo.kind == _PseudoAlign { 493 p.pc -= offs 494 offs += uintptr(nb - p.encode(nil)) 495 continue 496 } 497 498 /* adjust the program counter */ 499 p.pc -= offs 500 lb, ok = p.argv[0].(*Label) 501 502 /* only care about labeled far-branches */ 503 if !ok || p.nb == _N_near || p.branch == _B_none { 504 continue 505 } 506 507 /* calculate the jump offset */ 508 size := self.branchSize(p) 509 diff := lb.offset(p.pc, size) 510 511 /* too far to be a near jump */ 512 if diff > 127 || diff < -128 { 513 p.nb = size 514 continue 515 } 516 517 /* a far jump becomes a near jump, calculate 518 * the PC adjustment value and assemble again */ 519 next = true 520 p.nb = _N_near 521 offs += uintptr(size - _N_near) 522 } 523 } 524 525 /* Pass 3: link all the cross-references */ 526 for p := self.head; p != nil; p = p.next { 527 for i := 0; i < p.argc; i++ { 528 var ok bool 529 var lb *Label 530 var op *MemoryOperand 531 532 /* resolve labels */ 533 if lb, ok = p.argv[i].(*Label); ok { 534 p.argv[i] = lb.offset(p.pc, p.nb) 535 continue 536 } 537 538 /* check for memory operands */ 539 if op, ok = p.argv[i].(*MemoryOperand); !ok { 540 continue 541 } 542 543 /* check for label references */ 544 if op.Addr.Type != Reference { 545 continue 546 } 547 548 /* replace the label with the real offset */ 549 op.Addr.Type = Offset 550 op.Addr.Offset = op.Addr.Reference.offset(p.pc, p.nb) 551 } 552 } 553 554 /* Pass 4: actually encode all the instructions */ 555 for p := self.head; p != nil; p = p.next { 556 p.encode(&ret) 557 } 558 559 /* all done */ 560 return ret 561 } 562 563 // AssembleAndFree is like Assemble, but it frees the Program after assembling. 564 func (self *Program) AssembleAndFree(pc uintptr) (ret []byte) { 565 ret = self.Assemble(pc) 566 self.Free() 567 return 568 }