github.com/cloudwego/iasm@v0.2.0/x86_64/program.go (about)

     1  //
     2  // Copyright 2024 CloudWeGo Authors
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  //
    16  
    17  package x86_64
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  	"math/bits"
    23  
    24  	"github.com/cloudwego/iasm/expr"
    25  )
    26  
    27  type (
    28  	_PseudoType         int
    29  	_InstructionEncoder func(*Program, ...interface{}) *Instruction
    30  )
    31  
    32  const (
    33  	_PseudoNop _PseudoType = iota + 1
    34  	_PseudoByte
    35  	_PseudoWord
    36  	_PseudoLong
    37  	_PseudoQuad
    38  	_PseudoData
    39  	_PseudoAlign
    40  )
    41  
    42  func (self _PseudoType) String() string {
    43  	switch self {
    44  	case _PseudoNop:
    45  		return ".nop"
    46  	case _PseudoByte:
    47  		return ".byte"
    48  	case _PseudoWord:
    49  		return ".word"
    50  	case _PseudoLong:
    51  		return ".long"
    52  	case _PseudoQuad:
    53  		return ".quad"
    54  	case _PseudoData:
    55  		return ".data"
    56  	case _PseudoAlign:
    57  		return ".align"
    58  	default:
    59  		panic("unreachable")
    60  	}
    61  }
    62  
    63  type _Pseudo struct {
    64  	kind _PseudoType
    65  	data []byte
    66  	uint uint64
    67  	expr *expr.Expr
    68  }
    69  
    70  func (self *_Pseudo) free() {
    71  	if self.expr != nil {
    72  		self.expr.Free()
    73  	}
    74  }
    75  
    76  func (self *_Pseudo) encode(m *[]byte, pc uintptr) int {
    77  	switch self.kind {
    78  	case _PseudoNop:
    79  		return 0
    80  	case _PseudoByte:
    81  		self.encodeByte(m)
    82  		return 1
    83  	case _PseudoWord:
    84  		self.encodeWord(m)
    85  		return 2
    86  	case _PseudoLong:
    87  		self.encodeLong(m)
    88  		return 4
    89  	case _PseudoQuad:
    90  		self.encodeQuad(m)
    91  		return 8
    92  	case _PseudoData:
    93  		self.encodeData(m)
    94  		return len(self.data)
    95  	case _PseudoAlign:
    96  		self.encodeAlign(m, pc)
    97  		return self.alignSize(pc)
    98  	default:
    99  		panic("invalid pseudo instruction")
   100  	}
   101  }
   102  
   103  func (self *_Pseudo) evalExpr(low int64, high int64) int64 {
   104  	if v, err := self.expr.Evaluate(); err != nil {
   105  		panic(err)
   106  	} else if v < low || v > high {
   107  		panic(fmt.Sprintf("expression out of range [%d, %d]: %d", low, high, v))
   108  	} else {
   109  		return v
   110  	}
   111  }
   112  
   113  func (self *_Pseudo) alignSize(pc uintptr) int {
   114  	if !ispow2(self.uint) {
   115  		panic(fmt.Sprintf("aligment should be a power of 2, not %d", self.uint))
   116  	} else {
   117  		return align(int(pc), bits.TrailingZeros64(self.uint)) - int(pc)
   118  	}
   119  }
   120  
   121  func (self *_Pseudo) encodeData(m *[]byte) {
   122  	if m != nil {
   123  		*m = append(*m, self.data...)
   124  	}
   125  }
   126  
   127  func (self *_Pseudo) encodeByte(m *[]byte) {
   128  	if m != nil {
   129  		append8(m, byte(self.evalExpr(math.MinInt8, math.MaxUint8)))
   130  	}
   131  }
   132  
   133  func (self *_Pseudo) encodeWord(m *[]byte) {
   134  	if m != nil {
   135  		append16(m, uint16(self.evalExpr(math.MinInt16, math.MaxUint16)))
   136  	}
   137  }
   138  
   139  func (self *_Pseudo) encodeLong(m *[]byte) {
   140  	if m != nil {
   141  		append32(m, uint32(self.evalExpr(math.MinInt32, math.MaxUint32)))
   142  	}
   143  }
   144  
   145  func (self *_Pseudo) encodeQuad(m *[]byte) {
   146  	if m != nil {
   147  		if v, err := self.expr.Evaluate(); err != nil {
   148  			panic(err)
   149  		} else {
   150  			append64(m, uint64(v))
   151  		}
   152  	}
   153  }
   154  
   155  func (self *_Pseudo) encodeAlign(m *[]byte, pc uintptr) {
   156  	if m != nil {
   157  		if self.expr == nil {
   158  			expandmm(m, self.alignSize(pc), 0)
   159  		} else {
   160  			expandmm(m, self.alignSize(pc), byte(self.evalExpr(math.MinInt8, math.MaxUint8)))
   161  		}
   162  	}
   163  }
   164  
   165  // Operands represents a sequence of operand required by an instruction.
   166  type Operands [_N_args]interface{}
   167  
   168  // InstructionDomain represents the domain of an instruction.
   169  type InstructionDomain uint8
   170  
   171  const (
   172  	DomainGeneric InstructionDomain = iota
   173  	DomainMMXSSE
   174  	DomainAVX
   175  	DomainFMA
   176  	DomainCrypto
   177  	DomainMask
   178  	DomainAMDSpecific
   179  	DomainMisc
   180  	DomainPseudo
   181  )
   182  
   183  type (
   184  	_BranchType uint8
   185  )
   186  
   187  const (
   188  	_B_none _BranchType = iota
   189  	_B_conditional
   190  	_B_unconditional
   191  )
   192  
   193  // Instruction represents an unencoded instruction.
   194  type Instruction struct {
   195  	next   *Instruction
   196  	pc     uintptr
   197  	nb     int
   198  	len    int
   199  	argc   int
   200  	name   string
   201  	argv   Operands
   202  	forms  [_N_forms]_Encoding
   203  	pseudo _Pseudo
   204  	branch _BranchType
   205  	domain InstructionDomain
   206  	prefix []byte
   207  }
   208  
   209  func (self *Instruction) add(flags int, encoder func(m *_Encoding, v []interface{})) {
   210  	self.forms[self.len].flags = flags
   211  	self.forms[self.len].encoder = encoder
   212  	self.len++
   213  }
   214  
   215  func (self *Instruction) free() {
   216  	self.clear()
   217  	self.pseudo.free()
   218  	//freeInstruction(self)
   219  }
   220  
   221  func (self *Instruction) clear() {
   222  	for i := 0; i < self.argc; i++ {
   223  		if v, ok := self.argv[i].(Disposable); ok {
   224  			v.Free()
   225  		}
   226  	}
   227  }
   228  
   229  func (self *Instruction) check(e *_Encoding) bool {
   230  	if (e.flags & _F_rel1) != 0 {
   231  		return isRel8(self.argv[0])
   232  	} else if (e.flags & _F_rel4) != 0 {
   233  		return isRel32(self.argv[0]) || isLabel(self.argv[0])
   234  	} else {
   235  		return true
   236  	}
   237  }
   238  
   239  func (self *Instruction) encode(m *[]byte) int {
   240  	n := math.MaxInt64
   241  	p := (*_Encoding)(nil)
   242  
   243  	/* encode prefixes if any */
   244  	if self.nb = len(self.prefix); m != nil {
   245  		*m = append(*m, self.prefix...)
   246  	}
   247  
   248  	/* check for pseudo-instructions */
   249  	if self.pseudo.kind != 0 {
   250  		self.nb += self.pseudo.encode(m, self.pc)
   251  		return self.nb
   252  	}
   253  
   254  	/* find the shortest encoding */
   255  	for i := 0; i < self.len; i++ {
   256  		if e := &self.forms[i]; self.check(e) {
   257  			if v := e.encode(self.argv[:self.argc]); v < n {
   258  				n = v
   259  				p = e
   260  			}
   261  		}
   262  	}
   263  
   264  	/* add to buffer if needed */
   265  	if m != nil {
   266  		*m = append(*m, p.bytes[:n]...)
   267  	}
   268  
   269  	/* update the instruction length */
   270  	self.nb += n
   271  	return self.nb
   272  }
   273  
   274  /** Instruction Prefixes **/
   275  
   276  const (
   277  	_P_cs   = 0x2e
   278  	_P_ds   = 0x3e
   279  	_P_es   = 0x26
   280  	_P_fs   = 0x64
   281  	_P_gs   = 0x65
   282  	_P_ss   = 0x36
   283  	_P_lock = 0xf0
   284  )
   285  
   286  // CS overrides the memory operation of this instruction to CS.
   287  func (self *Instruction) CS() *Instruction {
   288  	self.prefix = append(self.prefix, _P_cs)
   289  	return self
   290  }
   291  
   292  // DS overrides the memory operation of this instruction to DS,
   293  // this is the default section for most instructions if not specified.
   294  func (self *Instruction) DS() *Instruction {
   295  	self.prefix = append(self.prefix, _P_ds)
   296  	return self
   297  }
   298  
   299  // ES overrides the memory operation of this instruction to ES.
   300  func (self *Instruction) ES() *Instruction {
   301  	self.prefix = append(self.prefix, _P_es)
   302  	return self
   303  }
   304  
   305  // FS overrides the memory operation of this instruction to FS.
   306  func (self *Instruction) FS() *Instruction {
   307  	self.prefix = append(self.prefix, _P_fs)
   308  	return self
   309  }
   310  
   311  // GS overrides the memory operation of this instruction to GS.
   312  func (self *Instruction) GS() *Instruction {
   313  	self.prefix = append(self.prefix, _P_gs)
   314  	return self
   315  }
   316  
   317  // SS overrides the memory operation of this instruction to SS.
   318  func (self *Instruction) SS() *Instruction {
   319  	self.prefix = append(self.prefix, _P_ss)
   320  	return self
   321  }
   322  
   323  // LOCK causes the processor's LOCK# signal to be asserted during execution of
   324  // the accompanying instruction (turns the instruction into an atomic instruction).
   325  // In a multiprocessor environment, the LOCK# signal insures that the processor
   326  // has exclusive use of any shared memory while the signal is asserted.
   327  func (self *Instruction) LOCK() *Instruction {
   328  	self.prefix = append(self.prefix, _P_lock)
   329  	return self
   330  }
   331  
   332  /** Basic Instruction Properties **/
   333  
   334  // Name returns the instruction name.
   335  func (self *Instruction) Name() string {
   336  	return self.name
   337  }
   338  
   339  // Domain returns the domain of this instruction.
   340  func (self *Instruction) Domain() InstructionDomain {
   341  	return self.domain
   342  }
   343  
   344  // Operands returns the operands of this instruction.
   345  func (self *Instruction) Operands() []interface{} {
   346  	return self.argv[:self.argc]
   347  }
   348  
   349  // Program represents a sequence of instructions.
   350  type Program struct {
   351  	arch *Arch
   352  	head *Instruction
   353  	tail *Instruction
   354  }
   355  
   356  const (
   357  	_N_near       = 2 // near-branch (-128 ~ +127) takes 2 bytes to encode
   358  	_N_far_cond   = 6 // conditional far-branch takes 6 bytes to encode
   359  	_N_far_uncond = 5 // unconditional far-branch takes 5 bytes to encode
   360  )
   361  
   362  func (self *Program) clear() {
   363  	for p, q := self.head, self.head; p != nil; p = q {
   364  		q = p.next
   365  		p.free()
   366  	}
   367  }
   368  
   369  func (self *Program) alloc(name string, argc int, argv Operands) *Instruction {
   370  	p := self.tail
   371  	q := newInstruction(name, argc, argv)
   372  
   373  	/* attach to tail if any */
   374  	if p != nil {
   375  		p.next = q
   376  	} else {
   377  		self.head = q
   378  	}
   379  
   380  	/* set the new tail */
   381  	self.tail = q
   382  	return q
   383  }
   384  
   385  func (self *Program) pseudo(kind _PseudoType) (p *Instruction) {
   386  	p = self.alloc(kind.String(), 0, Operands{})
   387  	p.domain = DomainPseudo
   388  	p.pseudo.kind = kind
   389  	return
   390  }
   391  
   392  func (self *Program) require(isa ISA) {
   393  	if !self.arch.HasISA(isa) {
   394  		panic("ISA '" + isa.String() + "' was not enabled")
   395  	}
   396  }
   397  
   398  func (self *Program) branchSize(p *Instruction) int {
   399  	switch p.branch {
   400  	case _B_none:
   401  		panic("p is not a branch")
   402  	case _B_conditional:
   403  		return _N_far_cond
   404  	case _B_unconditional:
   405  		return _N_far_uncond
   406  	default:
   407  		panic("invalid instruction")
   408  	}
   409  }
   410  
   411  /** Pseudo-Instructions **/
   412  
   413  // Byte is a pseudo-instruction to add raw byte to the assembled code.
   414  func (self *Program) Byte(v *expr.Expr) (p *Instruction) {
   415  	p = self.pseudo(_PseudoByte)
   416  	p.pseudo.expr = v
   417  	return
   418  }
   419  
   420  // Word is a pseudo-instruction to add raw uint16 as little-endian to the assembled code.
   421  func (self *Program) Word(v *expr.Expr) (p *Instruction) {
   422  	p = self.pseudo(_PseudoWord)
   423  	p.pseudo.expr = v
   424  	return
   425  }
   426  
   427  // Long is a pseudo-instruction to add raw uint32 as little-endian to the assembled code.
   428  func (self *Program) Long(v *expr.Expr) (p *Instruction) {
   429  	p = self.pseudo(_PseudoLong)
   430  	p.pseudo.expr = v
   431  	return
   432  }
   433  
   434  // Quad is a pseudo-instruction to add raw uint64 as little-endian to the assembled code.
   435  func (self *Program) Quad(v *expr.Expr) (p *Instruction) {
   436  	p = self.pseudo(_PseudoQuad)
   437  	p.pseudo.expr = v
   438  	return
   439  }
   440  
   441  // Data is a pseudo-instruction to add raw bytes to the assembled code.
   442  func (self *Program) Data(v []byte) (p *Instruction) {
   443  	p = self.pseudo(_PseudoData)
   444  	p.pseudo.data = v
   445  	return
   446  }
   447  
   448  // Align is a pseudo-instruction to ensure the PC is aligned to a certain value.
   449  func (self *Program) Align(align uint64, padding *expr.Expr) (p *Instruction) {
   450  	p = self.pseudo(_PseudoAlign)
   451  	p.pseudo.uint = align
   452  	p.pseudo.expr = padding
   453  	return
   454  }
   455  
   456  /** Program Assembler **/
   457  
   458  // Free returns the Program object into pool.
   459  // Any operation performed after Free is undefined behavior.
   460  //
   461  // NOTE: This also frees all the instructions, labels, memory
   462  //
   463  //	operands and expressions associated with this program.
   464  func (self *Program) Free() {
   465  	self.clear()
   466  	//freeProgram(self)
   467  }
   468  
   469  // Link pins a label at the current position.
   470  func (self *Program) Link(p *Label) {
   471  	if p.Dest != nil {
   472  		panic("lable was alreay linked")
   473  	} else {
   474  		p.Dest = self.pseudo(_PseudoNop)
   475  	}
   476  }
   477  
   478  // Assemble assembles and links the entire program into machine code.
   479  func (self *Program) Assemble(pc uintptr) (ret []byte) {
   480  	orig := pc
   481  	next := true
   482  	offs := uintptr(0)
   483  
   484  	/* Pass 0: PC-precompute, assume all labeled branches are far-branches. */
   485  	for p := self.head; p != nil; p = p.next {
   486  		if p.pc = pc; !isLabel(p.argv[0]) || p.branch == _B_none {
   487  			pc += uintptr(p.encode(nil))
   488  		} else {
   489  			pc += uintptr(self.branchSize(p))
   490  		}
   491  	}
   492  
   493  	/* allocate space for the machine code */
   494  	nb := int(pc - orig)
   495  	ret = make([]byte, 0, nb)
   496  
   497  	/* Pass 1: adjust all the jumps */
   498  	for next {
   499  		next = false
   500  		offs = uintptr(0)
   501  
   502  		/* scan all the branches */
   503  		for p := self.head; p != nil; p = p.next {
   504  			var ok bool
   505  			var lb *Label
   506  
   507  			/* re-calculate the alignment here */
   508  			if nb = p.nb; p.pseudo.kind == _PseudoAlign {
   509  				p.pc -= offs
   510  				offs += uintptr(nb - p.encode(nil))
   511  				continue
   512  			}
   513  
   514  			/* adjust the program counter */
   515  			p.pc -= offs
   516  			lb, ok = p.argv[0].(*Label)
   517  
   518  			/* only care about labeled far-branches */
   519  			if !ok || p.nb == _N_near || p.branch == _B_none {
   520  				continue
   521  			}
   522  
   523  			/* calculate the jump offset */
   524  			size := self.branchSize(p)
   525  			diff := lb.offset(p.pc, size)
   526  
   527  			/* too far to be a near jump */
   528  			if diff > 127 || diff < -128 {
   529  				p.nb = size
   530  				continue
   531  			}
   532  
   533  			/* a far jump becomes a near jump, calculate
   534  			 * the PC adjustment value and assemble again */
   535  			next = true
   536  			p.nb = _N_near
   537  			offs += uintptr(size - _N_near)
   538  		}
   539  	}
   540  
   541  	/* Pass 3: link all the cross-references */
   542  	for p := self.head; p != nil; p = p.next {
   543  		for i := 0; i < p.argc; i++ {
   544  			var ok bool
   545  			var lb *Label
   546  			var op *MemoryOperand
   547  
   548  			/* resolve labels */
   549  			if lb, ok = p.argv[i].(*Label); ok {
   550  				p.argv[i] = lb.offset(p.pc, p.nb)
   551  				continue
   552  			}
   553  
   554  			/* check for memory operands */
   555  			if op, ok = p.argv[i].(*MemoryOperand); !ok {
   556  				continue
   557  			}
   558  
   559  			/* check for label references */
   560  			if op.Addr.Type != Reference {
   561  				continue
   562  			}
   563  
   564  			/* replace the label with the real offset */
   565  			op.Addr.Type = Offset
   566  			op.Addr.Offset = op.Addr.Reference.offset(p.pc, p.nb)
   567  		}
   568  	}
   569  
   570  	/* Pass 4: actually encode all the instructions */
   571  	for p := self.head; p != nil; p = p.next {
   572  		p.encode(&ret)
   573  	}
   574  
   575  	/* all done */
   576  	return ret
   577  }
   578  
   579  // AssembleAndFree is like Assemble, but it frees the Program after assembling.
   580  func (self *Program) AssembleAndFree(pc uintptr) (ret []byte) {
   581  	ret = self.Assemble(pc)
   582  	self.Free()
   583  	return
   584  }