github.com/cloudwego/iasm@v0.2.0/expr/parser.go (about)

     1  //
     2  // Copyright 2024 CloudWeGo Authors
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  //
    16  
    17  package expr
    18  
    19  import (
    20      `strconv`
    21      `unicode`
    22      `unsafe`
    23  )
    24  
    25  type _TokenKind uint8
    26  
    27  const (
    28      _T_end _TokenKind = iota + 1
    29      _T_int
    30      _T_punc
    31      _T_name
    32  )
    33  
    34  const (
    35      _OP2 = 0x80
    36      _POW = _OP2 | '*'
    37      _SHL = _OP2 | '<'
    38      _SHR = _OP2 | '>'
    39  )
    40  
    41  type _Slice struct {
    42      p unsafe.Pointer
    43      n int
    44      c int
    45  }
    46  
    47  type _Token struct {
    48      pos int
    49      ptr *rune
    50      u64 uint64
    51      tag _TokenKind
    52  }
    53  
    54  func (self _Token) str() (v string) {
    55      return string(self.rbuf())
    56  }
    57  
    58  func (self _Token) rbuf() (v []rune) {
    59      (*_Slice)(unsafe.Pointer(&v)).c = int(self.u64)
    60      (*_Slice)(unsafe.Pointer(&v)).n = int(self.u64)
    61      (*_Slice)(unsafe.Pointer(&v)).p = unsafe.Pointer(self.ptr)
    62      return
    63  }
    64  
    65  func tokenEnd(p int) _Token {
    66      return _Token {
    67          pos: p,
    68          tag: _T_end,
    69      }
    70  }
    71  
    72  func tokenInt(p int, v uint64) _Token {
    73      return _Token {
    74          pos: p,
    75          u64: v,
    76          tag: _T_int,
    77      }
    78  }
    79  
    80  func tokenPunc(p int, v rune) _Token {
    81      return _Token {
    82          pos: p,
    83          tag: _T_punc,
    84          u64: uint64(v),
    85      }
    86  }
    87  
    88  func tokenName(p int, v []rune) _Token {
    89      return _Token {
    90          pos: p,
    91          ptr: &v[0],
    92          tag: _T_name,
    93          u64: uint64(len(v)),
    94      }
    95  }
    96  
    97  // Repository represents a repository of Term's.
    98  type Repository interface {
    99      Get(name string) (Term, error)
   100  }
   101  
   102  // Parser parses an expression string to it's AST representation.
   103  type Parser struct {
   104      pos int
   105      src []rune
   106  }
   107  
   108  var binaryOps = [...]func(*Expr, *Expr) *Expr {
   109      '+'  : (*Expr).Add,
   110      '-'  : (*Expr).Sub,
   111      '*'  : (*Expr).Mul,
   112      '/'  : (*Expr).Div,
   113      '%'  : (*Expr).Mod,
   114      '&'  : (*Expr).And,
   115      '^'  : (*Expr).Xor,
   116      '|'  : (*Expr).Or,
   117      _SHL : (*Expr).Shl,
   118      _SHR : (*Expr).Shr,
   119      _POW : (*Expr).Pow,
   120  }
   121  
   122  var precedence = [...]map[int]bool {
   123      {_SHL: true, _SHR: true},
   124      {'|' : true},
   125      {'^' : true},
   126      {'&' : true},
   127      {'+' : true, '-': true},
   128      {'*' : true, '/': true, '%': true},
   129      {_POW: true},
   130  }
   131  
   132  func (self *Parser) ch() rune {
   133      return self.src[self.pos]
   134  }
   135  
   136  func (self *Parser) eof() bool {
   137      return self.pos >= len(self.src)
   138  }
   139  
   140  func (self *Parser) rch() (v rune) {
   141      v, self.pos = self.src[self.pos], self.pos + 1
   142      return
   143  }
   144  
   145  func (self *Parser) hex(ss []rune) bool {
   146      if len(ss) == 1 && ss[0] == '0' {
   147          return unicode.ToLower(self.ch()) == 'x'
   148      } else if len(ss) <= 1 || unicode.ToLower(ss[1]) != 'x' {
   149          return unicode.IsDigit(self.ch())
   150      } else {
   151          return ishexdigit(self.ch())
   152      }
   153  }
   154  
   155  func (self *Parser) int(p int, ss []rune) (_Token, error) {
   156      var err error
   157      var val uint64
   158  
   159      /* find all the digits */
   160      for !self.eof() && self.hex(ss) {
   161          ss = append(ss, self.rch())
   162      }
   163  
   164      /* parse the value */
   165      if val, err = strconv.ParseUint(string(ss), 0, 64); err != nil {
   166          return _Token{}, err
   167      } else {
   168          return tokenInt(p, val), nil
   169      }
   170  }
   171  
   172  func (self *Parser) name(p int, ss []rune) _Token {
   173      for !self.eof() && isident(self.ch()) { ss = append(ss, self.rch()) }
   174      return tokenName(p, ss)
   175  }
   176  
   177  func (self *Parser) read(p int, ch rune) (_Token, error) {
   178      if isdigit(ch) {
   179          return self.int(p, []rune { ch })
   180      } else if isident0(ch) {
   181          return self.name(p, []rune { ch }), nil
   182      } else if isop2ch(ch) && !self.eof() && self.ch() == ch {
   183          return tokenPunc(p, _OP2 | self.rch()), nil
   184      } else if isop1ch(ch) {
   185          return tokenPunc(p, ch), nil
   186      } else {
   187          return _Token{}, newSyntaxError(self.pos, "invalid character " + strconv.QuoteRuneToASCII(ch))
   188      }
   189  }
   190  
   191  func (self *Parser) next() (_Token, error) {
   192      for {
   193          var p int
   194          var c rune
   195  
   196          /* check for EOF */
   197          if self.eof() {
   198              return tokenEnd(self.pos), nil
   199          }
   200  
   201          /* read the next char */
   202          p = self.pos
   203          c = self.rch()
   204  
   205          /* parse the token if not a space */
   206          if !unicode.IsSpace(c) {
   207              return self.read(p, c)
   208          }
   209      }
   210  }
   211  
   212  func (self *Parser) grab(tk _Token, repo Repository) (*Expr, error) {
   213      if repo == nil {
   214          return nil, newSyntaxError(tk.pos, "unresolved symbol: " + tk.str())
   215      } else if term, err := repo.Get(tk.str()); err != nil {
   216          return nil, err
   217      } else {
   218          return Ref(term), nil
   219      }
   220  }
   221  
   222  func (self *Parser) nest(nest int, repo Repository) (*Expr, error) {
   223      var err error
   224      var ret *Expr
   225      var ntk _Token
   226  
   227      /* evaluate the nested expression */
   228      if ret, err = self.expr(0, nest + 1, repo); err != nil {
   229          return nil, err
   230      }
   231  
   232      /* must follows with a ')' */
   233      if ntk, err = self.next(); err != nil {
   234          return nil, err
   235      } else if ntk.tag != _T_punc || ntk.u64 != ')' {
   236          return nil, newSyntaxError(ntk.pos, "')' expected")
   237      } else {
   238          return ret, nil
   239      }
   240  }
   241  
   242  func (self *Parser) unit(nest int, repo Repository) (*Expr, error) {
   243      if tk, err := self.next(); err != nil {
   244          return nil, err
   245      } else if tk.tag == _T_int {
   246          return Int(int64(tk.u64)), nil
   247      } else if tk.tag == _T_name {
   248          return self.grab(tk, repo)
   249      } else if tk.tag == _T_punc && tk.u64 == '(' {
   250          return self.nest(nest, repo)
   251      } else if tk.tag == _T_punc && tk.u64 == '+' {
   252          return self.unit(nest, repo)
   253      } else if tk.tag == _T_punc && tk.u64 == '-' {
   254          return neg2(self.unit(nest, repo))
   255      } else if tk.tag == _T_punc && tk.u64 == '~' {
   256          return not2(self.unit(nest, repo))
   257      } else {
   258          return nil, newSyntaxError(tk.pos, "integer, unary operator or nested expression expected")
   259      }
   260  }
   261  
   262  func (self *Parser) term(prec int, nest int, repo Repository) (*Expr, error) {
   263      var err error
   264      var val *Expr
   265  
   266      /* parse the LHS operand */
   267      if val, err = self.expr(prec + 1, nest, repo); err != nil {
   268          return nil, err
   269      }
   270  
   271      /* parse all the operators of the same precedence */
   272      for {
   273          var op int
   274          var rv *Expr
   275          var tk _Token
   276  
   277          /* peek the next token */
   278          pp := self.pos
   279          tk, err = self.next()
   280  
   281          /* check for errors */
   282          if err != nil {
   283              return nil, err
   284          }
   285  
   286          /* encountered EOF */
   287          if tk.tag == _T_end {
   288              return val, nil
   289          }
   290  
   291          /* must be an operator */
   292          if tk.tag != _T_punc {
   293              return nil, newSyntaxError(tk.pos, "operators expected")
   294          }
   295  
   296          /* check for the operator precedence */
   297          if op = int(tk.u64); !precedence[prec][op] {
   298              self.pos = pp
   299              return val, nil
   300          }
   301  
   302          /* evaluate the RHS operand, and combine the value */
   303          if rv, err = self.expr(prec + 1, nest, repo); err != nil {
   304              return nil, err
   305          } else {
   306              val = binaryOps[op](val, rv)
   307          }
   308      }
   309  }
   310  
   311  func (self *Parser) expr(prec int, nest int, repo Repository) (*Expr, error) {
   312      if prec >= len(precedence) {
   313          return self.unit(nest, repo)
   314      } else {
   315          return self.term(prec, nest, repo)
   316      }
   317  }
   318  
   319  // Parse parses the expression, and returns it's AST tree.
   320  func (self *Parser) Parse(repo Repository) (*Expr, error) {
   321      return self.expr(0, 0, repo)
   322  }
   323  
   324  // SetSource resets the expression parser and sets the expression source.
   325  func (self *Parser) SetSource(src string) *Parser {
   326      self.pos = 0
   327      self.src = []rune(src)
   328      return self
   329  }