github.com/cloudwego/iasm@v0.2.0/x86_64/assembler.go (about)

     1  //
     2  // Copyright 2024 CloudWeGo Authors
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  //
    16  
    17  package x86_64
    18  
    19  import (
    20      `bytes`
    21      `errors`
    22      `fmt`
    23      `math`
    24      `strconv`
    25      `strings`
    26      `unicode`
    27  
    28      `github.com/cloudwego/iasm/expr`
    29  )
    30  
    31  type (
    32      _TokenKind   int
    33      _Punctuation int
    34  )
    35  
    36  const (
    37      _T_end _TokenKind = iota + 1
    38      _T_int
    39      _T_name
    40      _T_punc
    41      _T_space
    42  )
    43  
    44  const (
    45      _P_plus _Punctuation = iota + 1
    46      _P_minus
    47      _P_star
    48      _P_slash
    49      _P_percent
    50      _P_amp
    51      _P_bar
    52      _P_caret
    53      _P_shl
    54      _P_shr
    55      _P_tilde
    56      _P_lbrk
    57      _P_rbrk
    58      _P_dot
    59      _P_comma
    60      _P_colon
    61      _P_dollar
    62      _P_hash
    63  )
    64  
    65  var _PUNC_NAME = map[_Punctuation]string {
    66      _P_plus    : "+",
    67      _P_minus   : "-",
    68      _P_star    : "*",
    69      _P_slash   : "/",
    70      _P_percent : "%",
    71      _P_amp     : "&",
    72      _P_bar     : "|",
    73      _P_caret   : "^",
    74      _P_shl     : "<<",
    75      _P_shr     : ">>",
    76      _P_tilde   : "~",
    77      _P_lbrk    : "(",
    78      _P_rbrk    : ")",
    79      _P_dot     : ".",
    80      _P_comma   : ",",
    81      _P_colon   : ":",
    82      _P_dollar  : "$",
    83      _P_hash    : "#",
    84  }
    85  
    86  func (self _Punctuation) String() string {
    87      if v, ok := _PUNC_NAME[self]; ok {
    88          return v
    89      } else {
    90          return fmt.Sprintf("_Punctuation(%d)", self)
    91      }
    92  }
    93  
    94  type _Token struct {
    95      pos int
    96      end int
    97      u64 uint64
    98      str string
    99      tag _TokenKind
   100  }
   101  
   102  func (self *_Token) punc() _Punctuation {
   103      return _Punctuation(self.u64)
   104  }
   105  
   106  func (self *_Token) String() string {
   107      switch self.tag {
   108          case _T_end   : return "<END>"
   109          case _T_int   : return fmt.Sprintf("<INT %d>", self.u64)
   110          case _T_punc  : return fmt.Sprintf("<PUNC %s>", _Punctuation(self.u64))
   111          case _T_name  : return fmt.Sprintf("<NAME %s>", strconv.QuoteToASCII(self.str))
   112          case _T_space : return "<SPACE>"
   113          default       : return fmt.Sprintf("<UNK:%d %d %s>", self.tag, self.u64, strconv.QuoteToASCII(self.str))
   114      }
   115  }
   116  
   117  func tokenEnd(p int, end int) _Token {
   118      return _Token {
   119          pos: p,
   120          end: end,
   121          tag: _T_end,
   122      }
   123  }
   124  
   125  func tokenInt(p int, val uint64) _Token {
   126      return _Token {
   127          pos: p,
   128          u64: val,
   129          tag: _T_int,
   130      }
   131  }
   132  
   133  func tokenName(p int, name string) _Token {
   134      return _Token {
   135          pos: p,
   136          str: name,
   137          tag: _T_name,
   138      }
   139  }
   140  
   141  func tokenPunc(p int, punc _Punctuation) _Token {
   142      return _Token {
   143          pos: p,
   144          tag: _T_punc,
   145          u64: uint64(punc),
   146      }
   147  }
   148  
   149  func tokenSpace(p int, end int) _Token {
   150      return _Token {
   151          pos: p,
   152          end: end,
   153          tag: _T_space,
   154      }
   155  }
   156  
   157  // SyntaxError represents an error in the assembly syntax.
   158  type SyntaxError struct {
   159      Pos    int
   160      Row    int
   161      Src    []rune
   162      Reason string
   163  }
   164  
   165  // Error implements the error interface.
   166  func (self *SyntaxError) Error() string {
   167      if self.Pos < 0 {
   168          return fmt.Sprintf("%s at line %d", self.Reason, self.Row)
   169      } else {
   170          return fmt.Sprintf("%s at %d:%d", self.Reason, self.Row, self.Pos + 1)
   171      }
   172  }
   173  
   174  type _Tokenizer struct {
   175      pos int
   176      row int
   177      src []rune
   178  }
   179  
   180  func (self *_Tokenizer) ch() rune {
   181      return self.src[self.pos]
   182  }
   183  
   184  func (self *_Tokenizer) eof() bool {
   185      return self.pos >= len(self.src)
   186  }
   187  
   188  func (self *_Tokenizer) rch() (ret rune) {
   189      ret, self.pos = self.src[self.pos], self.pos + 1
   190      return
   191  }
   192  
   193  func (self *_Tokenizer) err(pos int, msg string) *SyntaxError {
   194      return &SyntaxError {
   195          Pos    : pos,
   196          Row    : self.row,
   197          Src    : self.src,
   198          Reason : msg,
   199      }
   200  }
   201  
   202  type _TrimState int
   203  
   204  const (
   205      _TS_normal _TrimState = iota
   206      _TS_slcomm
   207      _TS_hscomm
   208      _TS_string
   209      _TS_escape
   210      _TS_accept
   211      _TS_nolast
   212  )
   213  
   214  func (self *_Tokenizer) init(src string) {
   215      var i int
   216      var ch rune
   217      var st _TrimState
   218  
   219      /* set the source */
   220      self.pos = 0
   221      self.src = []rune(src)
   222  
   223      /* remove commends, including "//" and "##" */
   224      loop: for i, ch = range self.src {
   225          switch {
   226              case st == _TS_normal && ch == '/'  : st = _TS_slcomm
   227              case st == _TS_normal && ch == '"'  : st = _TS_string
   228              case st == _TS_normal && ch == ';'  : st = _TS_accept; break loop
   229              case st == _TS_normal && ch == '#'  : st = _TS_hscomm
   230              case st == _TS_slcomm && ch == '/'  : st = _TS_nolast; break loop
   231              case st == _TS_slcomm               : st = _TS_normal
   232              case st == _TS_hscomm && ch == '#'  : st = _TS_nolast; break loop
   233              case st == _TS_hscomm               : st = _TS_normal
   234              case st == _TS_string && ch == '"'  : st = _TS_normal
   235              case st == _TS_string && ch == '\\' : st = _TS_escape
   236              case st == _TS_escape               : st = _TS_string
   237          }
   238      }
   239  
   240      /* check for errors */
   241      switch st {
   242          case _TS_accept: self.src = self.src[:i]
   243          case _TS_nolast: self.src = self.src[:i - 1]
   244          case _TS_string: panic(self.err(i, "string is not terminated"))
   245          case _TS_escape: panic(self.err(i, "escape sequence is not terminated"))
   246      }
   247  }
   248  
   249  func (self *_Tokenizer) skip(check func(v rune) bool) {
   250      for !self.eof() && check(self.ch()) {
   251          self.pos++
   252      }
   253  }
   254  
   255  func (self *_Tokenizer) find(pos int, check func(v rune) bool) string {
   256      self.skip(check)
   257      return string(self.src[pos:self.pos])
   258  }
   259  
   260  func (self *_Tokenizer) chrv(p int) _Token {
   261      var err error
   262      var val uint64
   263  
   264      /* starting and ending position */
   265      p0 := p + 1
   266      p1 := p0 + 1
   267  
   268      /* find the end of the literal */
   269      for p1 < len(self.src) && self.src[p1] != '\'' {
   270          if p1++; self.src[p1 - 1] == '\\' {
   271              p1++
   272          }
   273      }
   274  
   275      /* empty literal */
   276      if p1 == p0 {
   277          panic(self.err(p1, "empty character constant"))
   278      }
   279  
   280      /* check for EOF */
   281      if p1 == len(self.src) {
   282          panic(self.err(p1, "unexpected EOF when scanning literals"))
   283      }
   284  
   285      /* parse the literal */
   286      if val, err = literal64(string(self.src[p0:p1])); err != nil {
   287          panic(self.err(p0, "cannot parse literal: " + err.Error()))
   288      }
   289  
   290      /* skip the closing '\'' */
   291      self.pos = p1 + 1
   292      return tokenInt(p, val)
   293  }
   294  
   295  func (self *_Tokenizer) numv(p int) _Token {
   296      if val, err := strconv.ParseUint(self.find(p, isnumber), 0, 64); err != nil {
   297          panic(self.err(p, "invalid immediate value: " + err.Error()))
   298      } else {
   299          return tokenInt(p, val)
   300      }
   301  }
   302  
   303  func (self *_Tokenizer) defv(p int, cc rune) _Token {
   304      if isdigit(cc) {
   305          return self.numv(p)
   306      } else if isident0(cc) {
   307          return tokenName(p, self.find(p, isident))
   308      } else {
   309          panic(self.err(p, "invalid char: " + strconv.QuoteRune(cc)))
   310      }
   311  }
   312  
   313  func (self *_Tokenizer) rep2(p int, pp _Punctuation, cc rune) _Token {
   314      if self.eof() {
   315          panic(self.err(self.pos, "unexpected EOF when scanning operators"))
   316      } else if c := self.rch(); c != cc {
   317          panic(self.err(p + 1, strconv.QuoteRune(cc) + " expected, got " + strconv.QuoteRune(c)))
   318      } else {
   319          return tokenPunc(p, pp)
   320      }
   321  }
   322  
   323  func (self *_Tokenizer) read() _Token {
   324      var p int
   325      var c rune
   326      var t _Token
   327  
   328      /* check for EOF */
   329      if self.eof() {
   330          return tokenEnd(self.pos, self.pos)
   331      }
   332  
   333      /* skip spaces as needed */
   334      if p = self.pos; unicode.IsSpace(self.src[p]) {
   335          self.skip(unicode.IsSpace)
   336          return tokenSpace(p, self.pos)
   337      }
   338  
   339      /* check for line comments */
   340      if p = self.pos; p < len(self.src) - 1 && self.src[p] == '/' && self.src[p + 1] == '/' {
   341          self.pos = len(self.src)
   342          return tokenEnd(p, self.pos)
   343      }
   344  
   345      /* read the next character */
   346      p = self.pos
   347      c = self.rch()
   348  
   349      /* parse the next character */
   350      switch c {
   351          case '+'  : t = tokenPunc(p, _P_plus)
   352          case '-'  : t = tokenPunc(p, _P_minus)
   353          case '*'  : t = tokenPunc(p, _P_star)
   354          case '/'  : t = tokenPunc(p, _P_slash)
   355          case '%'  : t = tokenPunc(p, _P_percent)
   356          case '&'  : t = tokenPunc(p, _P_amp)
   357          case '|'  : t = tokenPunc(p, _P_bar)
   358          case '^'  : t = tokenPunc(p, _P_caret)
   359          case '<'  : t = self.rep2(p, _P_shl, '<')
   360          case '>'  : t = self.rep2(p, _P_shr, '>')
   361          case '~'  : t = tokenPunc(p, _P_tilde)
   362          case '('  : t = tokenPunc(p, _P_lbrk)
   363          case ')'  : t = tokenPunc(p, _P_rbrk)
   364          case '.'  : t = tokenPunc(p, _P_dot)
   365          case ','  : t = tokenPunc(p, _P_comma)
   366          case ':'  : t = tokenPunc(p, _P_colon)
   367          case '$'  : t = tokenPunc(p, _P_dollar)
   368          case '#'  : t = tokenPunc(p, _P_hash)
   369          case '\'' : t = self.chrv(p)
   370          default   : t = self.defv(p, c)
   371      }
   372  
   373      /* mark the end of token */
   374      t.end = self.pos
   375      return t
   376  }
   377  
   378  func (self *_Tokenizer) next() (tk _Token) {
   379      for {
   380          if tk = self.read(); tk.tag != _T_space {
   381              return
   382          }
   383      }
   384  }
   385  
   386  // LabelKind indicates the type of label reference.
   387  type LabelKind int
   388  
   389  // OperandKind indicates the type of the operand.
   390  type OperandKind int
   391  
   392  // InstructionPrefix indicates the prefix bytes prepended to the instruction.
   393  type InstructionPrefix byte
   394  
   395  const (
   396      // OpImm means the operand is an immediate value.
   397      OpImm OperandKind = 1 << iota
   398  
   399      // OpReg means the operand is a register.
   400      OpReg
   401  
   402      // OpMem means the operand is a memory address.
   403      OpMem
   404  
   405      // OpLabel means the operand is a label, specifically for
   406      // branch instructions.
   407      OpLabel
   408  )
   409  
   410  const (
   411      // Declaration means the label is a declaration.
   412      Declaration LabelKind = iota + 1
   413  
   414      // BranchTarget means the label should be treated as a branch target.
   415      BranchTarget
   416  
   417      // RelativeAddress means the label should be treated as a reference to
   418      // the code section (e.g. RIP-relative addressing).
   419      RelativeAddress
   420  )
   421  
   422  const (
   423      // PrefixLock causes the processor's LOCK# signal to be asserted during execution of
   424      // the accompanying instruction (turns the instruction into an atomic instruction).
   425      // In a multiprocessor environment, the LOCK# signal insures that the processor
   426      // has exclusive use of any shared memory while the signal is asserted.
   427      PrefixLock InstructionPrefix = iota
   428  
   429      // PrefixSegmentCS overrides the memory operation of this instruction to CS (Code Segment).
   430      PrefixSegmentCS
   431  
   432      // PrefixSegmentDS overrides the memory operation of this instruction to DS (Data Segment),
   433      // this is the default section for most instructions if not specified.
   434      PrefixSegmentDS
   435  
   436      // PrefixSegmentES overrides the memory operation of this instruction to ES (Extra Segment).
   437      PrefixSegmentES
   438  
   439      // PrefixSegmentFS overrides the memory operation of this instruction to FS.
   440      PrefixSegmentFS
   441  
   442      // PrefixSegmentGS overrides the memory operation of this instruction to GS.
   443      PrefixSegmentGS
   444  
   445      // PrefixSegmentSS overrides the memory operation of this instruction to SS (Stack Segment).
   446      PrefixSegmentSS
   447  )
   448  
   449  // ParsedLabel represents a label in the source, either a jump target or
   450  // an RIP-relative addressing.
   451  type ParsedLabel struct {
   452      Name string
   453      Kind LabelKind
   454  }
   455  
   456  // ParsedOperand represents an operand of an instruction in the source.
   457  type ParsedOperand struct {
   458      Op     OperandKind
   459      Imm    int64
   460      Reg    Register
   461      Label  ParsedLabel
   462      Memory MemoryAddress
   463  }
   464  
   465  // ParsedInstruction represents an instruction in the source.
   466  type ParsedInstruction struct {
   467      Mnemonic string
   468      Operands []ParsedOperand
   469      Prefixes []InstructionPrefix
   470  }
   471  
   472  func (self *ParsedInstruction) imm(v int64) {
   473      self.Operands = append(self.Operands, ParsedOperand {
   474          Op  : OpImm,
   475          Imm : v,
   476      })
   477  }
   478  
   479  func (self *ParsedInstruction) reg(v Register) {
   480      self.Operands = append(self.Operands, ParsedOperand {
   481          Op  : OpReg,
   482          Reg : v,
   483      })
   484  }
   485  
   486  func (self *ParsedInstruction) mem(v MemoryAddress) {
   487      self.Operands = append(self.Operands, ParsedOperand {
   488          Op     : OpMem,
   489          Memory : v,
   490      })
   491  }
   492  
   493  func (self *ParsedInstruction) target(v string) {
   494      self.Operands = append(self.Operands, ParsedOperand {
   495          Op    : OpLabel,
   496          Label : ParsedLabel {
   497              Name: v,
   498              Kind: BranchTarget,
   499          },
   500      })
   501  }
   502  
   503  func (self *ParsedInstruction) reference(v string) {
   504      self.Operands = append(self.Operands, ParsedOperand {
   505          Op    : OpLabel,
   506          Label : ParsedLabel {
   507              Name: v,
   508              Kind: RelativeAddress,
   509          },
   510      })
   511  }
   512  
   513  // LineKind indicates the type of ParsedLine.
   514  type LineKind int
   515  
   516  const (
   517      // LineLabel means the ParsedLine is a label.
   518      LineLabel LineKind = iota + 1
   519  
   520      // LineInstr means the ParsedLine is an instruction.
   521      LineInstr
   522  
   523      // LineCommand means the ParsedLine is a ParsedCommand.
   524      LineCommand
   525  )
   526  
   527  // ParsedLine represents a parsed source line.
   528  type ParsedLine struct {
   529      Row         int
   530      Src         []rune
   531      Kind        LineKind
   532      Label       ParsedLabel
   533      Command     ParsedCommand
   534      Instruction ParsedInstruction
   535  }
   536  
   537  // ParsedCommand represents a parsed assembly directive command.
   538  type ParsedCommand struct {
   539      Cmd  string
   540      Args []ParsedCommandArg
   541  }
   542  
   543  // ParsedCommandArg represents an argument of a ParsedCommand.
   544  type ParsedCommandArg struct {
   545      Value    string
   546      IsString bool
   547  }
   548  
   549  // Parser parses the source, and generates a sequence of ParsedInstruction's.
   550  type Parser struct {
   551      lex _Tokenizer
   552      exp expr.Parser
   553  }
   554  
   555  const (
   556      rip Register64 = 0xff
   557  )
   558  
   559  var _RegBranch = map[string]bool {
   560      "jmp"   : true,
   561      "jmpq"  : true,
   562      "call"  : true,
   563      "callq" : true,
   564  }
   565  
   566  var _SegPrefix = map[string]InstructionPrefix {
   567      "cs": PrefixSegmentCS,
   568      "ds": PrefixSegmentDS,
   569      "es": PrefixSegmentES,
   570      "fs": PrefixSegmentFS,
   571      "gs": PrefixSegmentGS,
   572      "ss": PrefixSegmentSS,
   573  }
   574  
   575  func (self *Parser) i32(tk _Token, v int64) int32 {
   576      if v >= math.MinInt32 && v <= math.MaxUint32 {
   577          return int32(v)
   578      } else {
   579          panic(self.err(tk.pos, fmt.Sprintf("32-bit integer out ouf range: %d", v)))
   580      }
   581  }
   582  
   583  func (self *Parser) err(pos int, msg string) *SyntaxError {
   584      return &SyntaxError {
   585          Pos    : pos,
   586          Row    : self.lex.row,
   587          Src    : self.lex.src,
   588          Reason : msg,
   589      }
   590  }
   591  
   592  func (self *Parser) negv() int64 {
   593      tk := self.lex.read()
   594      tt := tk.tag
   595  
   596      /* must be an integer */
   597      if tt != _T_int {
   598          panic(self.err(tk.pos, "integer expected after '-'"))
   599      } else {
   600          return -int64(tk.u64)
   601      }
   602  }
   603  
   604  func (self *Parser) eval(p int) (r int64) {
   605      var e error
   606      var v *expr.Expr
   607  
   608      /* searching start */
   609      n := 1
   610      q := p + 1
   611  
   612      /* find the end of expression */
   613      for n > 0 && q < len(self.lex.src) {
   614          switch self.lex.src[q] {
   615              case '(' : q++; n++
   616              case ')' : q++; n--
   617              default  : q++
   618          }
   619      }
   620  
   621      /* check for EOF */
   622      if n != 0 {
   623          panic(self.err(q, "unexpected EOF when parsing expressions"))
   624      }
   625  
   626      /* evaluate the expression */
   627      if v, e = self.exp.SetSource(string(self.lex.src[p:q - 1])).Parse(nil); e != nil {
   628          panic(self.err(p, "cannot evaluate expression: " + e.Error()))
   629      }
   630  
   631      /* evaluate the expression */
   632      if r, e = v.Evaluate(); e != nil {
   633          panic(self.err(p, "cannot evaluate expression: " + e.Error()))
   634      }
   635  
   636      /* skip the last ')' */
   637      v.Free()
   638      self.lex.pos = q
   639      return
   640  }
   641  
   642  func (self *Parser) relx(tk _Token) {
   643      if tk.tag != _T_punc || tk.punc() != _P_lbrk {
   644          panic(self.err(tk.pos, "'(' expected for RIP-relative addressing"))
   645      } else if tk = self.lex.next(); self.regx(tk) != rip {
   646          panic(self.err(tk.pos, "RIP-relative addressing expects %rip as the base register"))
   647      } else if tk = self.lex.next(); tk.tag != _T_punc || tk.punc() != _P_rbrk {
   648          panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling"))
   649      }
   650  }
   651  
   652  func (self *Parser) immx(tk _Token) int64 {
   653      if tk.tag != _T_punc || tk.punc() != _P_dollar {
   654          panic(self.err(tk.pos, "'$' expected for registers"))
   655      } else if tk = self.lex.read(); tk.tag == _T_int {
   656          return int64(tk.u64)
   657      } else if tk.tag == _T_punc && tk.punc() == _P_lbrk {
   658          return self.eval(self.lex.pos)
   659      } else if tk.tag == _T_punc && tk.punc() == _P_minus {
   660          return self.negv()
   661      } else {
   662          panic(self.err(tk.pos, "immediate value expected"))
   663      }
   664  }
   665  
   666  func (self *Parser) regx(tk _Token) Register {
   667      if tk.tag != _T_punc || tk.punc() != _P_percent {
   668          panic(self.err(tk.pos, "'%' expected for registers"))
   669      } else if tk = self.lex.read(); tk.tag != _T_name {
   670          panic(self.err(tk.pos, "register name expected"))
   671      } else if tk.str == "rip" {
   672          return rip
   673      } else if reg, ok := Registers[tk.str]; ok {
   674          return reg
   675      } else {
   676          panic(self.err(tk.pos, "invalid register name: " + strconv.Quote(tk.str)))
   677      }
   678  }
   679  
   680  func (self *Parser) regv(tk _Token) Register {
   681      if reg := self.regx(tk); reg == rip {
   682          panic(self.err(tk.pos, "%rip is not accessable as a dedicated register"))
   683      } else {
   684          return reg
   685      }
   686  }
   687  
   688  func (self *Parser) disp(vv int32) MemoryAddress {
   689      switch tk := self.lex.next(); tk.tag {
   690          case _T_end  : return MemoryAddress { Displacement: vv }
   691          case _T_punc : return self.relm(tk, vv)
   692          default      : panic(self.err(tk.pos, "',' or '(' expected"))
   693      }
   694  }
   695  
   696  func (self *Parser) relm(tv _Token, disp int32) MemoryAddress {
   697      var tk _Token
   698      var tt _TokenKind
   699  
   700      /* check for absolute addressing */
   701      if tv.punc() == _P_comma {
   702          self.lex.pos--
   703          return MemoryAddress { Displacement: disp }
   704      }
   705  
   706      /* must be '(' now */
   707      if tv.punc() != _P_lbrk {
   708          panic(self.err(tv.pos, "',' or '(' expected"))
   709      }
   710  
   711      /* read the next token */
   712      tk = self.lex.next()
   713      tt = tk.tag
   714  
   715      /* must be a punctuation */
   716      if tt != _T_punc {
   717          panic(self.err(tk.pos, "'%' or ',' expected"))
   718      }
   719  
   720      /* check for base */
   721      switch tk.punc() {
   722          case _P_percent : return self.base(tk, disp)
   723          case _P_comma   : return self.index(nil, disp)
   724          default         : panic(self.err(tk.pos, "'%' or ',' expected"))
   725      }
   726  }
   727  
   728  func (self *Parser) base(tk _Token, disp int32) MemoryAddress {
   729      rr := self.regx(tk)
   730      nk := self.lex.next()
   731  
   732      /* check for register indirection or base-index addressing */
   733      if !isReg64(rr) {
   734          panic(self.err(tk.pos, "not a valid base register"))
   735      } else if nk.tag != _T_punc {
   736          panic(self.err(nk.pos, "',' or ')' expected"))
   737      } else if nk.punc() == _P_comma {
   738          return self.index(rr, disp)
   739      } else if nk.punc() == _P_rbrk {
   740          return MemoryAddress { Base: rr, Displacement: disp }
   741      } else {
   742          panic(self.err(nk.pos, "',' or ')' expected"))
   743      }
   744  }
   745  
   746  func (self *Parser) index(base Register, disp int32) MemoryAddress {
   747      tk := self.lex.next()
   748      rr := self.regx(tk)
   749      nk := self.lex.next()
   750  
   751      /* check for scaled indexing */
   752      if base == rip {
   753          panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling"))
   754      } else if !isIndexable(rr) {
   755          panic(self.err(tk.pos, "not a valid index register"))
   756      } else if nk.tag != _T_punc {
   757          panic(self.err(nk.pos, "',' or ')' expected"))
   758      } else if nk.punc() == _P_comma {
   759          return self.scale(base, rr, disp)
   760      } else if nk.punc() == _P_rbrk {
   761          return MemoryAddress { Base: base, Index: rr, Scale: 1, Displacement: disp }
   762      } else {
   763          panic(self.err(nk.pos, "',' or ')' expected"))
   764      }
   765  }
   766  
   767  func (self *Parser) scale(base Register, index Register, disp int32) MemoryAddress {
   768      tk := self.lex.next()
   769      tt := tk.tag
   770      tv := tk.u64
   771  
   772      /* must be an integer */
   773      if tt != _T_int {
   774          panic(self.err(tk.pos, "integer expected"))
   775      }
   776  
   777      /* scale can only be 1, 2, 4 or 8 */
   778      if tv == 0 || (_Scales & (1 << tv)) == 0 {
   779          panic(self.err(tk.pos, "scale can only be 1, 2, 4 or 8"))
   780      }
   781  
   782      /* read next token */
   783      tk = self.lex.next()
   784      tt = tk.tag
   785  
   786      /* check for the closing ')' */
   787      if tt != _T_punc || tk.punc() != _P_rbrk {
   788          panic(self.err(tk.pos, "')' expected"))
   789      }
   790  
   791      /* construct the memory address */
   792      return MemoryAddress {
   793          Base         : base,
   794          Index        : index,
   795          Scale        : uint8(tv),
   796          Displacement : disp,
   797      }
   798  }
   799  
   800  func (self *Parser) cmds() *ParsedLine {
   801      cmd := ""
   802      pos := self.lex.pos
   803      buf := []ParsedCommandArg(nil)
   804  
   805      /* find the end of command */
   806      for p := pos; pos < len(self.lex.src); pos++ {
   807          if unicode.IsSpace(self.lex.src[pos]) {
   808              cmd = string(self.lex.src[p:pos])
   809              break
   810          }
   811      }
   812  
   813      /* parse the arguments */
   814      loop: for {
   815          switch self.next(&pos) {
   816              case 0   : break loop
   817              case '#' : break loop
   818              case '"' : pos = self.strings(&buf, pos)
   819              default  : pos = self.expressions(&buf, pos)
   820          }
   821      }
   822  
   823      /* construct the line */
   824      return &ParsedLine {
   825          Row     : self.lex.row,
   826          Src     : self.lex.src,
   827          Kind    : LineCommand,
   828          Command : ParsedCommand {
   829              Cmd  : cmd,
   830              Args : buf,
   831          },
   832      }
   833  }
   834  
   835  func (self *Parser) feed(line string) *ParsedLine {
   836      ff := true
   837      rr := false
   838      lk := false
   839  
   840      /* reset the lexer */
   841      self.lex.row++
   842      self.lex.init(line)
   843  
   844      /* parse the first token */
   845      tk := self.lex.next()
   846      tt := tk.tag
   847  
   848      /* it is a directive if it starts with a dot */
   849      if tk.tag == _T_punc && tk.punc() == _P_dot {
   850          return self.cmds()
   851      }
   852  
   853      /* otherwise it could be labels or instructions */
   854      if tt != _T_name {
   855          panic(self.err(tk.pos, "identifier expected"))
   856      }
   857  
   858      /* peek the next token */
   859      lex := self.lex
   860      tkx := lex.next()
   861  
   862      /* check for labels */
   863      if tkx.tag == _T_punc && tkx.punc() == _P_colon {
   864          tkx = lex.next()
   865          ttx := tkx.tag
   866  
   867          /* the line must end here */
   868          if ttx != _T_end {
   869              panic(self.err(tkx.pos, "garbage after label definition"))
   870          }
   871  
   872          /* construct the label */
   873          return &ParsedLine {
   874              Row   : self.lex.row,
   875              Src   : self.lex.src,
   876              Kind  : LineLabel,
   877              Label : ParsedLabel {
   878                  Kind: Declaration,
   879                  Name: tk.str,
   880              },
   881          }
   882      }
   883  
   884      /* special case for the "lock" prefix */
   885      if tk.tag == _T_name && strings.ToLower(tk.str) == "lock" {
   886          lk = true
   887          tk = self.lex.next()
   888  
   889          /* must be an instruction */
   890          if tk.tag != _T_name {
   891              panic(self.err(tk.pos, "identifier expected"))
   892          }
   893      }
   894  
   895      /* set the line kind and mnemonic */
   896      ret := &ParsedLine {
   897          Row         : self.lex.row,
   898          Src         : self.lex.src,
   899          Kind        : LineInstr,
   900          Instruction : ParsedInstruction { Mnemonic: strings.ToLower(tk.str) },
   901      }
   902  
   903      /* check for LOCK prefix */
   904      if lk {
   905          ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, PrefixLock)
   906      }
   907  
   908      /* parse all the operands */
   909      for {
   910          tk = self.lex.next()
   911          tt = tk.tag
   912  
   913          /* check for end of line */
   914          if tt == _T_end {
   915              break
   916          }
   917  
   918          /* expect a comma if not the first operand */
   919          if !ff {
   920              if tt == _T_punc && tk.punc() == _P_comma {
   921                  tk = self.lex.next()
   922              } else {
   923                  panic(self.err(tk.pos, "',' expected"))
   924              }
   925          }
   926  
   927          /* not the first operand anymore */
   928          ff = false
   929          tt = tk.tag
   930  
   931          /* encountered an integer, must be a SIB memory address */
   932          if tt == _T_int {
   933              ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64))))
   934              continue
   935          }
   936  
   937          /* encountered an identifier, maybe an expression or a jump target, or a segment override prefix */
   938          if tt == _T_name {
   939              ts := tk.str
   940              tp := self.lex.pos
   941  
   942              /* if the next token is EOF or a comma, it's a jumpt target */
   943              if tk = self.lex.next(); tk.tag == _T_end || (tk.tag == _T_punc && tk.punc() == _P_comma) {
   944                  self.lex.pos = tp
   945                  ret.Instruction.target(ts)
   946                  continue
   947              }
   948  
   949              /* if it is a colon, it's a segment override prefix, otherwise it must be an RIP-relative addressing operand */
   950              if tk.tag != _T_punc || tk.punc() != _P_colon {
   951                  self.relx(tk)
   952                  ret.Instruction.reference(ts)
   953                  continue
   954              }
   955  
   956              /* lookup segment prefixes */
   957              if p, ok := _SegPrefix[strings.ToLower(ts)]; !ok {
   958                  panic(self.err(tk.pos, "invalid segment name"))
   959              } else {
   960                  ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, p)
   961              }
   962  
   963              /* read the next token */
   964              tk = self.lex.next()
   965              tt = tk.tag
   966  
   967              /* encountered an integer, must be a SIB memory address */
   968              if tt == _T_int {
   969                  ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64))))
   970                  continue
   971              }
   972          }
   973  
   974          /* certain instructions may have a "*" before operands */
   975          if tt == _T_punc && tk.punc() == _P_star {
   976              tk = self.lex.next()
   977              tt = tk.tag
   978              rr = true
   979          }
   980  
   981          /* ... otherwise it must be a punctuation */
   982          if tt != _T_punc {
   983              panic(self.err(tk.pos, "'$', '%', '-' or '(' expected"))
   984          }
   985  
   986          /* check the operator */
   987          switch tk.punc() {
   988              case _P_lbrk    : break
   989              case _P_minus   : ret.Instruction.mem(self.disp(self.i32(tk, self.negv()))) ; continue
   990              case _P_dollar  : ret.Instruction.imm(self.immx(tk))                        ; continue
   991              case _P_percent : ret.Instruction.reg(self.regv(tk))                        ; continue
   992              default         : panic(self.err(tk.pos, "'$', '%', '-' or '(' expected"))
   993          }
   994  
   995          /* special case of '(', might be either `(expr)(SIB)` or just `(SIB)`
   996           * read one more token to confirm */
   997          tk = self.lex.next()
   998          tt = tk.tag
   999  
  1000          /* the next token is '%', it's a memory address,
  1001           * or ',' if it's a memory address without base,
  1002           * otherwise it must be in `(expr)(SIB)` form */
  1003          if tk.tag == _T_punc && tk.punc() == _P_percent {
  1004              ret.Instruction.mem(self.base(tk, 0))
  1005          } else if tk.tag == _T_punc && tk.punc() == _P_comma {
  1006              ret.Instruction.mem(self.index(nil, 0))
  1007          } else {
  1008              ret.Instruction.mem(self.disp(self.i32(tk, self.eval(tk.pos))))
  1009          }
  1010      }
  1011  
  1012      /* check "jmp" and "call" instructions */
  1013      if !_RegBranch[ret.Instruction.Mnemonic] {
  1014          return ret
  1015      } else if len(ret.Instruction.Operands) != 1 {
  1016          panic(self.err(tk.pos, fmt.Sprintf(`"%s" requires exact 1 argument`, ret.Instruction.Mnemonic)))
  1017      } else if !rr && ret.Instruction.Operands[0].Op != OpReg && ret.Instruction.Operands[0].Op != OpLabel {
  1018          panic(self.err(tk.pos, fmt.Sprintf(`invalid operand for "%s" instruction`, ret.Instruction.Mnemonic)))
  1019      } else {
  1020          return ret
  1021      }
  1022  }
  1023  
  1024  func (self *Parser) next(p *int) rune {
  1025      for {
  1026          if *p >= len(self.lex.src) {
  1027              return 0
  1028          } else if cc := self.lex.src[*p]; !unicode.IsSpace(cc) {
  1029              return cc
  1030          } else {
  1031              *p++
  1032          }
  1033      }
  1034  }
  1035  
  1036  func (self *Parser) delim(p int) int {
  1037      if cc := self.next(&p); cc == 0 {
  1038          return p
  1039      } else if cc == ',' {
  1040          return p + 1
  1041      } else {
  1042          panic(self.err(p, "',' expected"))
  1043      }
  1044  }
  1045  
  1046  func (self *Parser) strings(argv *[]ParsedCommandArg, p int) int {
  1047      var i int
  1048      var e error
  1049      var v string
  1050  
  1051      /* find the end of string */
  1052      for i = p + 1; i < len(self.lex.src) && self.lex.src[i] != '"'; i++ {
  1053          if self.lex.src[i] == '\\' {
  1054              i++
  1055          }
  1056      }
  1057  
  1058      /* check for EOF */
  1059      if i == len(self.lex.src) {
  1060          panic(self.err(i, "unexpected EOF when scanning strings"))
  1061      }
  1062  
  1063      /* unquote the string */
  1064      if v, e = strconv.Unquote(string(self.lex.src[p:i + 1])); e != nil {
  1065          panic(self.err(p, "invalid string: " + e.Error()))
  1066      }
  1067  
  1068      /* add the argument to buffer */
  1069      *argv = append(*argv, ParsedCommandArg { Value: v, IsString: true })
  1070      return self.delim(i + 1)
  1071  }
  1072  
  1073  func (self *Parser) directives(line string) {
  1074      self.lex.row++
  1075      self.lex.init(line)
  1076  
  1077      /* parse the first token */
  1078      tk := self.lex.next()
  1079      tt := tk.tag
  1080  
  1081      /* check for EOF */
  1082      if tt == _T_end {
  1083          return
  1084      }
  1085  
  1086      /* must be a directive */
  1087      if tt != _T_punc || tk.punc() != _P_hash {
  1088          panic(self.err(tk.pos, "'#' expected"))
  1089      }
  1090  
  1091      /* parse the line number */
  1092      tk = self.lex.next()
  1093      tt = tk.tag
  1094  
  1095      /* must be a line number, if it is, set the row number, and ignore the rest of the line */
  1096      if tt != _T_int {
  1097          panic(self.err(tk.pos, "line number expected"))
  1098      } else {
  1099          self.lex.row = int(tk.u64) - 1
  1100      }
  1101  }
  1102  
  1103  func (self *Parser) expressions(argv *[]ParsedCommandArg, p int) int {
  1104      var i int
  1105      var n int
  1106      var s int
  1107  
  1108      /* scan until the first standalone ',' or EOF */
  1109      loop: for i = p; i < len(self.lex.src); i++ {
  1110          switch self.lex.src[i] {
  1111              case ','           : if s == 0 { if n == 0 { break loop } }
  1112              case ']', '}', '>' : if s == 0 { if n == 0 { break loop } else { n-- } }
  1113              case '[', '{', '<' : if s == 0 { n++ }
  1114              case '\\'          : if s != 0 { i++ }
  1115              case '\''          : if s != 2 { s ^= 1 }
  1116              case '"'           : if s != 1 { s ^= 2 }
  1117          }
  1118      }
  1119  
  1120      /* check for EOF in strings */
  1121      if s != 0 {
  1122          panic(self.err(i, "unexpected EOF when scanning strings"))
  1123      }
  1124  
  1125      /* check for bracket matching */
  1126      if n != 0 {
  1127          panic(self.err(i, "unbalanced '{' or '[' or '<'"))
  1128      }
  1129  
  1130      /* add the argument to buffer */
  1131      *argv = append(*argv, ParsedCommandArg { Value: string(self.lex.src[p:i]) })
  1132      return self.delim(i)
  1133  }
  1134  
  1135  // Feed feeds the parser with one more line, and the parser
  1136  // parses it into a ParsedLine.
  1137  //
  1138  // NOTE: Feed does not handle empty lines or multiple lines,
  1139  //       it panics when this happens. Use Parse to parse multiple
  1140  //       lines of assembly source.
  1141  //
  1142  func (self *Parser) Feed(src string) (ret *ParsedLine, err error) {
  1143      var ok bool
  1144      var ss string
  1145      var vv interface{}
  1146  
  1147      /* check for multiple lines */
  1148      if strings.ContainsRune(src, '\n') {
  1149          return nil, errors.New("passing multiple lines to Feed()")
  1150      }
  1151  
  1152      /* check for blank lines */
  1153      if ss = strings.TrimSpace(src); ss == "" || ss[0] == '#' || strings.HasPrefix(ss, "//") {
  1154          return nil, errors.New("blank line or line with only comments or line-marks")
  1155      }
  1156  
  1157      /* setup error handler */
  1158      defer func() {
  1159          if vv = recover(); vv != nil {
  1160              if err, ok = vv.(*SyntaxError); !ok {
  1161                  panic(vv)
  1162              }
  1163          }
  1164      }()
  1165  
  1166      /* call the actual parser */
  1167      ret = self.feed(src)
  1168      return
  1169  }
  1170  
  1171  // Parse parses the entire assembly source (possibly multiple lines) into
  1172  // a sequence of *ParsedLine.
  1173  func (self *Parser) Parse(src string) (ret []*ParsedLine, err error) {
  1174      var ok bool
  1175      var ss string
  1176      var vv interface{}
  1177  
  1178      /* setup error handler */
  1179      defer func() {
  1180          if vv = recover(); vv != nil {
  1181              if err, ok = vv.(*SyntaxError); !ok {
  1182                  panic(vv)
  1183              }
  1184          }
  1185      }()
  1186  
  1187      /* feed every line */
  1188      for _, line := range strings.Split(src, "\n") {
  1189          if ss = strings.TrimSpace(line); ss == "" || strings.HasPrefix(ss, "//") {
  1190              self.lex.row++
  1191          } else if ss[0] == '#' {
  1192              self.directives(line)
  1193          } else {
  1194              ret = append(ret, self.feed(line))
  1195          }
  1196      }
  1197  
  1198      /* all done */
  1199      err = nil
  1200      return
  1201  }
  1202  
  1203  // Directive handles the directive.
  1204  func (self *Parser) Directive(line string) (err error) {
  1205      var ok bool
  1206      var ss string
  1207      var vv interface{}
  1208  
  1209      /* check for directives */
  1210      if ss = strings.TrimSpace(line); ss == "" || ss[0] != '#' {
  1211          return errors.New("not a directive")
  1212      }
  1213  
  1214      /* setup error handler */
  1215      defer func() {
  1216          if vv = recover(); vv != nil {
  1217              if err, ok = vv.(*SyntaxError); !ok {
  1218                  panic(vv)
  1219              }
  1220          }
  1221      }()
  1222  
  1223      /* call the directive parser */
  1224      self.directives(line)
  1225      return
  1226  }
  1227  
  1228  type _TermRepo struct {
  1229      terms map[string]expr.Term
  1230  }
  1231  
  1232  func (self *_TermRepo) Get(name string) (expr.Term, error) {
  1233      if ret, ok := self.terms[name]; ok {
  1234          return ret, nil
  1235      } else {
  1236          return nil, errors.New("undefined name: " + name)
  1237      }
  1238  }
  1239  
  1240  func (self *_TermRepo) label(name string) (*Label, error) {
  1241      var ok bool
  1242      var lb *Label
  1243      var tr expr.Term
  1244  
  1245      /* check for existing terms */
  1246      if tr, ok = self.terms[name]; ok {
  1247          if lb, ok = tr.(*Label); ok {
  1248              return lb, nil
  1249          } else {
  1250              return nil, errors.New("name is not a label: " + name)
  1251          }
  1252      }
  1253  
  1254      /* create a new one as needed */
  1255      lb = new(Label)
  1256      lb.Name = name
  1257  
  1258      /* create the map if needed */
  1259      if self.terms == nil {
  1260          self.terms = make(map[string]expr.Term, 1)
  1261      }
  1262  
  1263      /* register the label */
  1264      self.terms[name] = lb
  1265      return lb, nil
  1266  }
  1267  
  1268  func (self *_TermRepo) define(name string, term expr.Term) {
  1269      var ok bool
  1270      var tr expr.Term
  1271  
  1272      /* create the map if needed */
  1273      if self.terms == nil {
  1274          self.terms = make(map[string]expr.Term, 1)
  1275      }
  1276  
  1277      /* check for existing terms */
  1278      if tr, ok = self.terms[name]; !ok {
  1279          self.terms[name] = term
  1280      } else if _, ok = tr.(*Label); !ok {
  1281          self.terms[name] = term
  1282      } else {
  1283          panic("conflicting term types: " + name)
  1284      }
  1285  }
  1286  
  1287  // _Command describes an assembler command.
  1288  //
  1289  // The _Command.args describes both the arity and argument type with characters,
  1290  // the length is the number of arguments, the character itself represents the
  1291  // argument type.
  1292  //
  1293  // Possible values are:
  1294  //
  1295  //      s   This argument should be a string
  1296  //      e   This argument should be an expression
  1297  //      ?   The next argument is optional, and must be the last argument.
  1298  //
  1299  type _Command struct {
  1300      args    string
  1301      handler func(*Assembler, *Program, []ParsedCommandArg) error
  1302  }
  1303  
  1304  // Options controls the behavior of Assembler.
  1305  type Options struct {
  1306      // InstructionAliasing specifies whether to enable instruction aliasing.
  1307      // Set to true enables instruction aliasing, and the Assembler will try harder to find instructions.
  1308      InstructionAliasing bool
  1309  
  1310      // IgnoreUnknownDirectives specifies whether to report errors when encountered unknown directives.
  1311      // Set to true ignores all unknwon directives silently, useful for parsing generated assembly.
  1312      IgnoreUnknownDirectives bool
  1313  }
  1314  
  1315  // Assembler assembles the entire assembly program and generates the corresponding
  1316  // machine code representations.
  1317  type Assembler struct {
  1318      cc   int
  1319      ps   Parser
  1320      pc   uintptr
  1321      buf  []byte
  1322      main string
  1323      opts Options
  1324      repo _TermRepo
  1325      expr expr.Parser
  1326      line *ParsedLine
  1327  }
  1328  
  1329  var asmCommands = map[string]_Command {
  1330      "org"     : { "e"   , (*Assembler).assembleCommandOrg     },
  1331      "set"     : { "ee"  , (*Assembler).assembleCommandSet     },
  1332      "byte"    : { "e"   , (*Assembler).assembleCommandByte    },
  1333      "word"    : { "e"   , (*Assembler).assembleCommandWord    },
  1334      "long"    : { "e"   , (*Assembler).assembleCommandLong    },
  1335      "quad"    : { "e"   , (*Assembler).assembleCommandQuad    },
  1336      "fill"    : { "e?e" , (*Assembler).assembleCommandFill    },
  1337      "space"   : { "e?e" , (*Assembler).assembleCommandFill    },
  1338      "align"   : { "e?e" , (*Assembler).assembleCommandAlign   },
  1339      "entry"   : { "e"   , (*Assembler).assembleCommandEntry   },
  1340      "ascii"   : { "s"   , (*Assembler).assembleCommandAscii   },
  1341      "asciz"   : { "s"   , (*Assembler).assembleCommandAsciz   },
  1342      "p2align" : { "e?e" , (*Assembler).assembleCommandP2Align },
  1343  }
  1344  
  1345  func (self *Assembler) err(msg string) *SyntaxError {
  1346      return &SyntaxError {
  1347          Pos    : -1,
  1348          Row    : self.line.Row,
  1349          Src    : self.line.Src,
  1350          Reason : msg,
  1351      }
  1352  }
  1353  
  1354  func (self *Assembler) eval(expr string) (int64, error) {
  1355      if exp, err := self.expr.SetSource(expr).Parse(nil); err != nil {
  1356          return 0, err
  1357      } else {
  1358          return exp.Evaluate()
  1359      }
  1360  }
  1361  
  1362  func (self *Assembler) checkArgs(i int, n int, v *ParsedCommand, isString bool) error {
  1363      if i >= len(v.Args) {
  1364          return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(v.Cmd), n))
  1365      } else if isString && !v.Args[i].IsString {
  1366          return self.err(fmt.Sprintf("argument %d of command %s must be a string", i + 1, strconv.Quote(v.Cmd)))
  1367      } else if !isString && v.Args[i].IsString {
  1368          return self.err(fmt.Sprintf("argument %d of command %s must be an expression", i + 1, strconv.Quote(v.Cmd)))
  1369      } else {
  1370          return nil
  1371      }
  1372  }
  1373  
  1374  func (self *Assembler) assembleLabel(p *Program, lb *ParsedLabel) error {
  1375      if v, err := self.repo.label(lb.Name); err != nil {
  1376          return err
  1377      } else {
  1378          p.Link(v)
  1379          return nil
  1380      }
  1381  }
  1382  
  1383  func (self *Assembler) assembleInstr(p *Program, line *ParsedInstruction) (err error) {
  1384      var ok  bool
  1385      var pfx []byte
  1386      var ops []interface{}
  1387      var enc _InstructionEncoder
  1388  
  1389      /* convert to lower-case */
  1390      opts := self.opts
  1391      name := strings.ToLower(line.Mnemonic)
  1392  
  1393      /* fix register-addressing branches if needed */
  1394      if opts.InstructionAliasing && len(line.Operands) == 1 {
  1395          switch {
  1396              case name == "retq"                                    : name = "ret"
  1397              case name == "movabsq"                                 : name = "movq"
  1398              case name == "jmp"   && line.Operands[0].Op != OpLabel : name = "jmpq"
  1399              case name == "jmpq"  && line.Operands[0].Op == OpLabel : name = "jmp"
  1400              case name == "call"  && line.Operands[0].Op != OpLabel : name = "callq"
  1401              case name == "callq" && line.Operands[0].Op == OpLabel : name = "call"
  1402          }
  1403      }
  1404  
  1405      /* lookup from the alias table if needed */
  1406      if opts.InstructionAliasing {
  1407          enc, ok = _InstructionAliases[name]
  1408      }
  1409  
  1410      /* lookup from the instruction table */
  1411      if !ok {
  1412          enc, ok = Instructions[name]
  1413      }
  1414  
  1415      /* remove size suffix if possible */
  1416      if !ok && opts.InstructionAliasing {
  1417          switch i := len(name) - 1; name[i] {
  1418              case 'b', 'w', 'l', 'q': {
  1419                  enc, ok = Instructions[name[:i]]
  1420              }
  1421          }
  1422      }
  1423  
  1424      /* check for instruction name */
  1425      if !ok {
  1426          return self.err("no such instruction: " + strconv.Quote(name))
  1427      }
  1428  
  1429      /* allocate memory for prefix if any */
  1430      if len(line.Prefixes) != 0 {
  1431          pfx = make([]byte, len(line.Prefixes))
  1432      }
  1433  
  1434      /* convert the prefixes */
  1435      for i, v := range line.Prefixes {
  1436          switch v {
  1437              case PrefixLock      : pfx[i] = _P_lock
  1438              case PrefixSegmentCS : pfx[i] = _P_cs
  1439              case PrefixSegmentDS : pfx[i] = _P_ds
  1440              case PrefixSegmentES : pfx[i] = _P_es
  1441              case PrefixSegmentFS : pfx[i] = _P_fs
  1442              case PrefixSegmentGS : pfx[i] = _P_gs
  1443              case PrefixSegmentSS : pfx[i] = _P_ss
  1444              default              : panic("unreachable: invalid segment prefix")
  1445          }
  1446      }
  1447  
  1448      /* convert the operands */
  1449      for _, op := range line.Operands {
  1450          switch op.Op {
  1451              case OpImm   : ops = append(ops, op.Imm)
  1452              case OpReg   : ops = append(ops, op.Reg)
  1453              case OpMem   : self.assembleInstrMem(&ops, op.Memory)  
  1454              case OpLabel : self.assembleInstrLabel(&ops, op.Label) 
  1455              default      : panic("parser yields an invalid operand kind")
  1456          }
  1457      }
  1458  
  1459      /* catch any exceptions in the encoder */
  1460      defer func() {
  1461          if v := recover(); v != nil {
  1462              err = self.err(fmt.Sprint(v))
  1463          }
  1464      }()
  1465  
  1466      /* encode the instruction */
  1467      enc(p, ops...).prefix = pfx
  1468      return nil
  1469  }
  1470  
  1471  func (self *Assembler) assembleInstrMem(ops *[]interface{}, addr MemoryAddress) {
  1472      mem := new(MemoryOperand)
  1473      *ops = append(*ops, mem)
  1474  
  1475      /* check for RIP-relative addressing */
  1476      if addr.Base != rip {
  1477          mem.Addr.Type = Memory
  1478          mem.Addr.Memory = addr
  1479      } else {
  1480          mem.Addr.Type = Offset
  1481          mem.Addr.Offset = RelativeOffset(addr.Displacement)
  1482      }
  1483  }
  1484  
  1485  func (self *Assembler) assembleInstrLabel(ops *[]interface{}, label ParsedLabel) {
  1486      vk := label.Kind
  1487      tr, err := self.repo.label(label.Name)
  1488  
  1489      /* check for errors */
  1490      if err != nil {
  1491          panic(err)
  1492      }
  1493  
  1494      /* check for branch target */
  1495      if vk == BranchTarget {
  1496          *ops = append(*ops, tr)
  1497          return
  1498      }
  1499  
  1500      /* add to ops */
  1501      *ops = append(*ops, &MemoryOperand {
  1502          Addr: Addressable {
  1503              Type      : Reference,
  1504              Reference : tr,
  1505          },
  1506      })
  1507  }
  1508  
  1509  func (self *Assembler) assembleCommand(p *Program, line *ParsedCommand) error {
  1510      var iv int
  1511      var cc rune
  1512      var ok bool
  1513      var va bool
  1514      var fn _Command
  1515  
  1516      /* find the command */
  1517      if fn, ok = asmCommands[line.Cmd]; !ok {
  1518          if self.opts.IgnoreUnknownDirectives {
  1519              return nil
  1520          } else {
  1521              return self.err("no such command: " + strconv.Quote(line.Cmd))
  1522          }
  1523      }
  1524  
  1525      /* expected & real argument count */
  1526      argx := len(fn.args)
  1527      argc := len(line.Args)
  1528  
  1529      /* check the arguments */
  1530      loop: for iv, cc = range fn.args {
  1531          switch cc {
  1532              case '?' : va = true; break loop
  1533              case 's' : if err := self.checkArgs(iv, argx, line, true)  ; err != nil { return err }
  1534              case 'e' : if err := self.checkArgs(iv, argx, line, false) ; err != nil { return err }
  1535              default  : panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1536          }
  1537      }
  1538  
  1539      /* simple case: non-variadic command */
  1540      if !va {
  1541          if argc == argx {
  1542              return fn.handler(self, p, line.Args)
  1543          } else {
  1544              return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(line.Cmd), argx))
  1545          }
  1546      }
  1547  
  1548      /* check for the descriptor */
  1549      if iv != argx - 2 {
  1550          panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1551      }
  1552  
  1553      /* variadic command and the final optional argument is set */
  1554      if argc == argx - 1 {
  1555          switch fn.args[argx - 1] {
  1556              case 's' : if err := self.checkArgs(iv, -1, line, true)  ; err != nil { return err }
  1557              case 'e' : if err := self.checkArgs(iv, -1, line, false) ; err != nil { return err }
  1558              default  : panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1559          }
  1560      }
  1561  
  1562      /* check argument count */
  1563      if argc == argx - 1 || argc == argx - 2 {
  1564          return fn.handler(self, p, line.Args)
  1565      } else {
  1566          return self.err(fmt.Sprintf("command %s takes %d or %d arguments", strconv.Quote(line.Cmd), argx - 2, argx - 1))
  1567      }
  1568  }
  1569  
  1570  func (self *Assembler) assembleCommandInt(p *Program, argv []ParsedCommandArg, addfn func(*Program, *expr.Expr) *Instruction) error {
  1571      var err error
  1572      var val *expr.Expr
  1573  
  1574      /* parse the expression */
  1575      if val, err = self.expr.SetSource(argv[0].Value).Parse(&self.repo); err != nil {
  1576          return err
  1577      }
  1578  
  1579      /* add to the program */
  1580      addfn(p, val)
  1581      return nil
  1582  }
  1583  
  1584  func (self *Assembler) assembleCommandOrg(_ *Program, argv []ParsedCommandArg) error {
  1585      var err error
  1586      var val int64
  1587  
  1588      /* evaluate the expression */
  1589      if val, err = self.eval(argv[0].Value); err != nil {
  1590          return err
  1591      }
  1592  
  1593      /* check for origin */
  1594      if val < 0 {
  1595          return self.err(fmt.Sprintf("negative origin: %d", val))
  1596      }
  1597  
  1598      /* ".org" must be the first command if any */
  1599      if self.cc != 1 {
  1600          return self.err(".org must be the first command if present")
  1601      }
  1602  
  1603      /* set the initial program counter */
  1604      self.pc = uintptr(val)
  1605      return nil
  1606  }
  1607  
  1608  func (self *Assembler) assembleCommandSet(_ *Program, argv []ParsedCommandArg) error {
  1609      var err error
  1610      var val *expr.Expr
  1611  
  1612      /* parse the expression */
  1613      if val, err = self.expr.SetSource(argv[1].Value).Parse(&self.repo); err != nil {
  1614          return err
  1615      }
  1616  
  1617      /* define the new identifier */
  1618      self.repo.define(argv[0].Value, val)
  1619      return nil
  1620  }
  1621  
  1622  func (self *Assembler) assembleCommandByte(p *Program, argv []ParsedCommandArg) error {
  1623      return self.assembleCommandInt(p, argv, (*Program).Byte)
  1624  }
  1625  
  1626  func (self *Assembler) assembleCommandWord(p *Program, argv []ParsedCommandArg) error {
  1627      return self.assembleCommandInt(p, argv, (*Program).Word)
  1628  }
  1629  
  1630  func (self *Assembler) assembleCommandLong(p *Program, argv []ParsedCommandArg) error {
  1631      return self.assembleCommandInt(p, argv, (*Program).Long)
  1632  }
  1633  
  1634  func (self *Assembler) assembleCommandQuad(p *Program, argv []ParsedCommandArg) error {
  1635      return self.assembleCommandInt(p, argv, (*Program).Quad)
  1636  }
  1637  
  1638  func (self *Assembler) assembleCommandFill(p *Program, argv []ParsedCommandArg) error {
  1639      var fv byte
  1640      var nb int64
  1641      var ex error
  1642  
  1643      /* evaluate the size */
  1644      if nb, ex = self.eval(argv[0].Value); ex != nil {
  1645          return ex
  1646      }
  1647  
  1648      /* check for filling size */
  1649      if nb < 0 {
  1650          return self.err(fmt.Sprintf("negative filling size: %d", nb))
  1651      }
  1652  
  1653      /* check for optional filling value */
  1654      if len(argv) == 2 {
  1655          if val, err := self.eval(argv[1].Value); err != nil {
  1656              return err
  1657          } else if val < math.MinInt8 || val > math.MaxUint8 {
  1658              return self.err(fmt.Sprintf("value %d cannot be represented with a byte", val))
  1659          } else {
  1660              fv = byte(val)
  1661          }
  1662      }
  1663  
  1664      /* fill with specified byte */
  1665      p.Data(bytes.Repeat([]byte { fv }, int(nb)))
  1666      return nil
  1667  }
  1668  
  1669  func (self *Assembler) assembleCommandAlign(p *Program, argv []ParsedCommandArg) error {
  1670      var nb int64
  1671      var ex error
  1672      var fv *expr.Expr
  1673  
  1674      /* evaluate the size */
  1675      if nb, ex = self.eval(argv[0].Value); ex != nil {
  1676          return ex
  1677      }
  1678  
  1679      /* check for alignment value */
  1680      if nb <= 0 {
  1681          return self.err(fmt.Sprintf("zero or negative alignment: %d", nb))
  1682      }
  1683  
  1684      /* alignment must be a power of 2 */
  1685      if (nb & (nb - 1)) != 0 {
  1686          return self.err(fmt.Sprintf("alignment must be a power of 2: %d", nb))
  1687      }
  1688  
  1689      /* check for optional filling value */
  1690      if len(argv) == 2 {
  1691          if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil {
  1692              fv = v
  1693          } else {
  1694              return err
  1695          }
  1696      }
  1697  
  1698      /* fill with specified byte, default to 0 if not specified */
  1699      p.Align(uint64(nb), fv)
  1700      return nil
  1701  }
  1702  
  1703  func (self *Assembler) assembleCommandEntry(_ *Program, argv []ParsedCommandArg) error {
  1704      name := argv[0].Value
  1705      rbuf := []rune(name)
  1706  
  1707      /* check all the characters */
  1708      for i, cc := range rbuf {
  1709          if !isident0(cc) && (i == 0 || !isident(cc)) {
  1710              return self.err("entry point must be a label name")
  1711          }
  1712      }
  1713  
  1714      /* set the main entry point */
  1715      self.main = name
  1716      return nil
  1717  }
  1718  
  1719  func (self *Assembler) assembleCommandAscii(p *Program, argv []ParsedCommandArg) error {
  1720      p.Data([]byte(argv[0].Value))
  1721      return nil
  1722  }
  1723  
  1724  func (self *Assembler) assembleCommandAsciz(p *Program, argv []ParsedCommandArg) error {
  1725      p.Data(append([]byte(argv[0].Value), 0))
  1726      return nil
  1727  }
  1728  
  1729  func (self *Assembler) assembleCommandP2Align(p *Program, argv []ParsedCommandArg) error {
  1730      var nb int64
  1731      var ex error
  1732      var fv *expr.Expr
  1733  
  1734      /* evaluate the size */
  1735      if nb, ex = self.eval(argv[0].Value); ex != nil {
  1736          return ex
  1737      }
  1738  
  1739      /* check for alignment value */
  1740      if nb <= 0 {
  1741          return self.err(fmt.Sprintf("zero or negative alignment: %d", nb))
  1742      }
  1743  
  1744      /* check for optional filling value */
  1745      if len(argv) == 2 {
  1746          if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil {
  1747              fv = v
  1748          } else {
  1749              return err
  1750          }
  1751      }
  1752  
  1753      /* fill with specified byte, default to 0 if not specified */
  1754      p.Align(1 << nb, fv)
  1755      return nil
  1756  }
  1757  
  1758  // Base returns the origin.
  1759  func (self *Assembler) Base() uintptr {
  1760      return self.pc
  1761  }
  1762  
  1763  // Code returns the assembled machine code.
  1764  func (self *Assembler) Code() []byte {
  1765      return self.buf
  1766  }
  1767  
  1768  // Entry returns the address of the specified entry point, or the origin if not specified.
  1769  func (self *Assembler) Entry() uintptr {
  1770      if self.main == "" {
  1771          return self.pc
  1772      } else if tr, err := self.repo.Get(self.main); err != nil {
  1773          panic(err)
  1774      } else if val, err := tr.Evaluate(); err != nil {
  1775          panic(err)
  1776      } else {
  1777          return uintptr(val)
  1778      }
  1779  }
  1780  
  1781  // Options returns the internal options reference, changing it WILL affect this Assembler instance.
  1782  func (self *Assembler) Options() *Options {
  1783      return &self.opts
  1784  }
  1785  
  1786  // WithBase resets the origin to pc.
  1787  func (self *Assembler) WithBase(pc uintptr) *Assembler {
  1788      self.pc = pc
  1789      return self
  1790  }
  1791  
  1792  // Assemble assembles the assembly source and save the machine code to internal buffer.
  1793  func (self *Assembler) Assemble(src string) error {
  1794      var err error
  1795      var buf []*ParsedLine
  1796  
  1797      /* parse the source */
  1798      if buf, err = self.ps.Parse(src); err != nil {
  1799          return err
  1800      }
  1801  
  1802      /* create a new program */
  1803      p := DefaultArch.CreateProgram()
  1804      defer p.Free()
  1805  
  1806      /* process every line */
  1807      for _, self.line = range buf {
  1808          switch self.cc++; self.line.Kind {
  1809              case LineLabel   : if err = self.assembleLabel   (p, &self.line.Label)       ; err != nil { return err }
  1810              case LineInstr   : if err = self.assembleInstr   (p, &self.line.Instruction) ; err != nil { return err }
  1811              case LineCommand : if err = self.assembleCommand (p, &self.line.Command)     ; err != nil { return err }
  1812              default          : panic("parser yields an invalid line kind")
  1813          }
  1814      }
  1815  
  1816      /* assemble the program */
  1817      self.buf = p.Assemble(self.pc)
  1818      return nil
  1819  }