github.com/cloudwego/iasm@v0.2.0/expr/parser.go (about) 1 // 2 // Copyright 2024 CloudWeGo Authors 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 package expr 18 19 import ( 20 `strconv` 21 `unicode` 22 `unsafe` 23 ) 24 25 type _TokenKind uint8 26 27 const ( 28 _T_end _TokenKind = iota + 1 29 _T_int 30 _T_punc 31 _T_name 32 ) 33 34 const ( 35 _OP2 = 0x80 36 _POW = _OP2 | '*' 37 _SHL = _OP2 | '<' 38 _SHR = _OP2 | '>' 39 ) 40 41 type _Slice struct { 42 p unsafe.Pointer 43 n int 44 c int 45 } 46 47 type _Token struct { 48 pos int 49 ptr *rune 50 u64 uint64 51 tag _TokenKind 52 } 53 54 func (self _Token) str() (v string) { 55 return string(self.rbuf()) 56 } 57 58 func (self _Token) rbuf() (v []rune) { 59 (*_Slice)(unsafe.Pointer(&v)).c = int(self.u64) 60 (*_Slice)(unsafe.Pointer(&v)).n = int(self.u64) 61 (*_Slice)(unsafe.Pointer(&v)).p = unsafe.Pointer(self.ptr) 62 return 63 } 64 65 func tokenEnd(p int) _Token { 66 return _Token { 67 pos: p, 68 tag: _T_end, 69 } 70 } 71 72 func tokenInt(p int, v uint64) _Token { 73 return _Token { 74 pos: p, 75 u64: v, 76 tag: _T_int, 77 } 78 } 79 80 func tokenPunc(p int, v rune) _Token { 81 return _Token { 82 pos: p, 83 tag: _T_punc, 84 u64: uint64(v), 85 } 86 } 87 88 func tokenName(p int, v []rune) _Token { 89 return _Token { 90 pos: p, 91 ptr: &v[0], 92 tag: _T_name, 93 u64: uint64(len(v)), 94 } 95 } 96 97 // Repository represents a repository of Term's. 98 type Repository interface { 99 Get(name string) (Term, error) 100 } 101 102 // Parser parses an expression string to it's AST representation. 103 type Parser struct { 104 pos int 105 src []rune 106 } 107 108 var binaryOps = [...]func(*Expr, *Expr) *Expr { 109 '+' : (*Expr).Add, 110 '-' : (*Expr).Sub, 111 '*' : (*Expr).Mul, 112 '/' : (*Expr).Div, 113 '%' : (*Expr).Mod, 114 '&' : (*Expr).And, 115 '^' : (*Expr).Xor, 116 '|' : (*Expr).Or, 117 _SHL : (*Expr).Shl, 118 _SHR : (*Expr).Shr, 119 _POW : (*Expr).Pow, 120 } 121 122 var precedence = [...]map[int]bool { 123 {_SHL: true, _SHR: true}, 124 {'|' : true}, 125 {'^' : true}, 126 {'&' : true}, 127 {'+' : true, '-': true}, 128 {'*' : true, '/': true, '%': true}, 129 {_POW: true}, 130 } 131 132 func (self *Parser) ch() rune { 133 return self.src[self.pos] 134 } 135 136 func (self *Parser) eof() bool { 137 return self.pos >= len(self.src) 138 } 139 140 func (self *Parser) rch() (v rune) { 141 v, self.pos = self.src[self.pos], self.pos + 1 142 return 143 } 144 145 func (self *Parser) hex(ss []rune) bool { 146 if len(ss) == 1 && ss[0] == '0' { 147 return unicode.ToLower(self.ch()) == 'x' 148 } else if len(ss) <= 1 || unicode.ToLower(ss[1]) != 'x' { 149 return unicode.IsDigit(self.ch()) 150 } else { 151 return ishexdigit(self.ch()) 152 } 153 } 154 155 func (self *Parser) int(p int, ss []rune) (_Token, error) { 156 var err error 157 var val uint64 158 159 /* find all the digits */ 160 for !self.eof() && self.hex(ss) { 161 ss = append(ss, self.rch()) 162 } 163 164 /* parse the value */ 165 if val, err = strconv.ParseUint(string(ss), 0, 64); err != nil { 166 return _Token{}, err 167 } else { 168 return tokenInt(p, val), nil 169 } 170 } 171 172 func (self *Parser) name(p int, ss []rune) _Token { 173 for !self.eof() && isident(self.ch()) { ss = append(ss, self.rch()) } 174 return tokenName(p, ss) 175 } 176 177 func (self *Parser) read(p int, ch rune) (_Token, error) { 178 if isdigit(ch) { 179 return self.int(p, []rune { ch }) 180 } else if isident0(ch) { 181 return self.name(p, []rune { ch }), nil 182 } else if isop2ch(ch) && !self.eof() && self.ch() == ch { 183 return tokenPunc(p, _OP2 | self.rch()), nil 184 } else if isop1ch(ch) { 185 return tokenPunc(p, ch), nil 186 } else { 187 return _Token{}, newSyntaxError(self.pos, "invalid character " + strconv.QuoteRuneToASCII(ch)) 188 } 189 } 190 191 func (self *Parser) next() (_Token, error) { 192 for { 193 var p int 194 var c rune 195 196 /* check for EOF */ 197 if self.eof() { 198 return tokenEnd(self.pos), nil 199 } 200 201 /* read the next char */ 202 p = self.pos 203 c = self.rch() 204 205 /* parse the token if not a space */ 206 if !unicode.IsSpace(c) { 207 return self.read(p, c) 208 } 209 } 210 } 211 212 func (self *Parser) grab(tk _Token, repo Repository) (*Expr, error) { 213 if repo == nil { 214 return nil, newSyntaxError(tk.pos, "unresolved symbol: " + tk.str()) 215 } else if term, err := repo.Get(tk.str()); err != nil { 216 return nil, err 217 } else { 218 return Ref(term), nil 219 } 220 } 221 222 func (self *Parser) nest(nest int, repo Repository) (*Expr, error) { 223 var err error 224 var ret *Expr 225 var ntk _Token 226 227 /* evaluate the nested expression */ 228 if ret, err = self.expr(0, nest + 1, repo); err != nil { 229 return nil, err 230 } 231 232 /* must follows with a ')' */ 233 if ntk, err = self.next(); err != nil { 234 return nil, err 235 } else if ntk.tag != _T_punc || ntk.u64 != ')' { 236 return nil, newSyntaxError(ntk.pos, "')' expected") 237 } else { 238 return ret, nil 239 } 240 } 241 242 func (self *Parser) unit(nest int, repo Repository) (*Expr, error) { 243 if tk, err := self.next(); err != nil { 244 return nil, err 245 } else if tk.tag == _T_int { 246 return Int(int64(tk.u64)), nil 247 } else if tk.tag == _T_name { 248 return self.grab(tk, repo) 249 } else if tk.tag == _T_punc && tk.u64 == '(' { 250 return self.nest(nest, repo) 251 } else if tk.tag == _T_punc && tk.u64 == '+' { 252 return self.unit(nest, repo) 253 } else if tk.tag == _T_punc && tk.u64 == '-' { 254 return neg2(self.unit(nest, repo)) 255 } else if tk.tag == _T_punc && tk.u64 == '~' { 256 return not2(self.unit(nest, repo)) 257 } else { 258 return nil, newSyntaxError(tk.pos, "integer, unary operator or nested expression expected") 259 } 260 } 261 262 func (self *Parser) term(prec int, nest int, repo Repository) (*Expr, error) { 263 var err error 264 var val *Expr 265 266 /* parse the LHS operand */ 267 if val, err = self.expr(prec + 1, nest, repo); err != nil { 268 return nil, err 269 } 270 271 /* parse all the operators of the same precedence */ 272 for { 273 var op int 274 var rv *Expr 275 var tk _Token 276 277 /* peek the next token */ 278 pp := self.pos 279 tk, err = self.next() 280 281 /* check for errors */ 282 if err != nil { 283 return nil, err 284 } 285 286 /* encountered EOF */ 287 if tk.tag == _T_end { 288 return val, nil 289 } 290 291 /* must be an operator */ 292 if tk.tag != _T_punc { 293 return nil, newSyntaxError(tk.pos, "operators expected") 294 } 295 296 /* check for the operator precedence */ 297 if op = int(tk.u64); !precedence[prec][op] { 298 self.pos = pp 299 return val, nil 300 } 301 302 /* evaluate the RHS operand, and combine the value */ 303 if rv, err = self.expr(prec + 1, nest, repo); err != nil { 304 return nil, err 305 } else { 306 val = binaryOps[op](val, rv) 307 } 308 } 309 } 310 311 func (self *Parser) expr(prec int, nest int, repo Repository) (*Expr, error) { 312 if prec >= len(precedence) { 313 return self.unit(nest, repo) 314 } else { 315 return self.term(prec, nest, repo) 316 } 317 } 318 319 // Parse parses the expression, and returns it's AST tree. 320 func (self *Parser) Parse(repo Repository) (*Expr, error) { 321 return self.expr(0, 0, repo) 322 } 323 324 // SetSource resets the expression parser and sets the expression source. 325 func (self *Parser) SetSource(src string) *Parser { 326 self.pos = 0 327 self.src = []rune(src) 328 return self 329 }