github.com/hattya/go.sh@v0.0.0-20240328132134-f53276d95cc6/interp/lexer.go (about) 1 // 2 // go.sh/interp :: lexer.go 3 // 4 // Copyright (c) 2021 Akinori Hattori <hattya@gmail.com> 5 // 6 // SPDX-License-Identifier: MIT 7 // 8 9 //go:generate goyacc -l -o arith.go arith.go.y 10 11 package interp 12 13 import ( 14 "fmt" 15 "io" 16 "strings" 17 "sync" 18 "unicode" 19 ) 20 21 var ops = map[int]string{ 22 '(': "(", 23 ')': ")", 24 INC: "++", 25 DEC: "--", 26 '+': "+", 27 '-': "-", 28 '~': "~", 29 '!': "!", 30 '*': "*", 31 '/': "/", 32 '%': "%", 33 LSH: "<<", 34 RSH: ">>", 35 '<': "<", 36 '>': ">", 37 LE: "<=", 38 GE: ">=", 39 EQ: "==", 40 NE: "!=", 41 '&': "&", 42 '^': "^", 43 '|': "|", 44 LAND: "&&", 45 LOR: "||", 46 '?': "?", 47 ':': ":", 48 '=': "=", 49 MUL_ASSIGN: "*=", 50 DIV_ASSIGN: "/=", 51 MOD_ASSIGN: "%=", 52 ADD_ASSIGN: "+=", 53 SUB_ASSIGN: "-=", 54 LSH_ASSIGN: "<<=", 55 RSH_ASSIGN: ">>=", 56 AND_ASSIGN: "&=", 57 XOR_ASSIGN: "^=", 58 OR_ASSIGN: "|=", 59 } 60 61 type lexer struct { 62 env *ExecEnv 63 r io.RuneScanner 64 n int 65 token chan interface{} 66 67 mu sync.Mutex 68 err error 69 cancel chan struct{} 70 71 b strings.Builder 72 } 73 74 func newLexer(env *ExecEnv, r io.RuneScanner) *lexer { 75 l := &lexer{ 76 env: env, 77 r: r, 78 token: make(chan interface{}), 79 cancel: make(chan struct{}), 80 } 81 go l.run() 82 return l 83 } 84 85 func (l *lexer) Lex(lval *yySymType) int { 86 switch tok := (<-l.token).(type) { 87 case token: 88 lval.expr.s = tok.val 89 return tok.typ 90 case int: 91 lval.op = ops[tok] 92 return tok 93 } 94 return 0 95 } 96 97 func (l *lexer) run() { 98 defer func() { 99 close(l.token) 100 101 if e := recover(); e != nil { 102 // re-panic 103 panic(e) 104 } 105 }() 106 107 for action := l.lexToken; action != nil; { 108 action = action() 109 } 110 } 111 112 func (l *lexer) lexToken() action { 113 Read: 114 r, err := l.read() 115 if err != nil { 116 return nil 117 } 118 switch r { 119 case ' ', '\t', '\n': 120 goto Read 121 } 122 l.unread() 123 124 switch { 125 case '0' <= r && r <= '9': 126 return l.lexNumber 127 case r == '_' || unicode.IsLetter(r): 128 return l.lexIdent 129 } 130 return l.lexOp 131 } 132 133 func (l *lexer) lexNumber() action { 134 r, _ := l.read() 135 l.b.WriteRune(r) 136 var hex bool 137 if r == '0' { 138 r, err := l.read() 139 switch { 140 case err != nil: 141 goto Number 142 case r == 'X' || r == 'x': 143 hex = true 144 case r < '0' || '9' < r: 145 l.unread() 146 goto Number 147 } 148 l.b.WriteRune(r) 149 } 150 151 for { 152 r, err := l.read() 153 switch { 154 case err != nil: 155 goto Number 156 case '0' <= r && r <= '9' || hex && ('A' <= r && r <= 'Z' || 'a' <= r && r <= 'z'): 157 l.b.WriteRune(r) 158 default: 159 l.unread() 160 goto Number 161 } 162 } 163 Number: 164 l.emit(NUMBER) 165 return l.lexToken 166 } 167 168 func (l *lexer) lexIdent() action { 169 for { 170 r, err := l.read() 171 switch { 172 case err != nil: 173 goto Ident 174 case r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r): 175 l.b.WriteRune(r) 176 default: 177 l.unread() 178 goto Ident 179 } 180 } 181 Ident: 182 l.emit(IDENT) 183 return l.lexToken 184 } 185 186 func (l *lexer) lexOp() action { 187 var op int 188 switch r, _ := l.read(); r { 189 case '(', ')', '~', '?', ':': 190 op = int(r) 191 case '+': 192 op = '+' 193 if r, err := l.read(); err == nil { 194 switch r { 195 case '+': 196 op = INC 197 case '=': 198 op = ADD_ASSIGN 199 default: 200 l.unread() 201 } 202 } 203 case '-': 204 op = '-' 205 if r, err := l.read(); err == nil { 206 switch r { 207 case '-': 208 op = DEC 209 case '=': 210 op = SUB_ASSIGN 211 default: 212 l.unread() 213 } 214 } 215 case '!': 216 op = '!' 217 if r, err := l.read(); err == nil { 218 if r == '=' { 219 op = NE 220 } else { 221 l.unread() 222 } 223 } 224 case '*': 225 op = '*' 226 if r, err := l.read(); err == nil { 227 if r == '=' { 228 op = MUL_ASSIGN 229 } else { 230 l.unread() 231 } 232 } 233 case '/': 234 op = '/' 235 if r, err := l.read(); err == nil { 236 if r == '=' { 237 op = DIV_ASSIGN 238 } else { 239 l.unread() 240 } 241 } 242 case '%': 243 op = '%' 244 if r, err := l.read(); err == nil { 245 if r == '=' { 246 op = MOD_ASSIGN 247 } else { 248 l.unread() 249 } 250 } 251 case '<': 252 op = '<' 253 if r, err := l.read(); err == nil { 254 switch r { 255 case '<': 256 op = LSH 257 if r, err := l.read(); err == nil { 258 if r == '=' { 259 op = LSH_ASSIGN 260 } else { 261 l.unread() 262 } 263 } 264 case '=': 265 op = LE 266 default: 267 l.unread() 268 } 269 } 270 case '>': 271 op = '>' 272 if r, err := l.read(); err == nil { 273 switch r { 274 case '>': 275 op = RSH 276 if r, err := l.read(); err == nil { 277 if r == '=' { 278 op = RSH_ASSIGN 279 } else { 280 l.unread() 281 } 282 } 283 case '=': 284 op = GE 285 default: 286 l.unread() 287 } 288 } 289 case '=': 290 op = '=' 291 if r, err := l.read(); err == nil { 292 if r == '=' { 293 op = EQ 294 } else { 295 l.unread() 296 } 297 } 298 case '&': 299 op = '&' 300 if r, err := l.read(); err == nil { 301 switch r { 302 case '&': 303 op = LAND 304 case '=': 305 op = AND_ASSIGN 306 default: 307 l.unread() 308 } 309 } 310 case '^': 311 op = '^' 312 if r, err := l.read(); err == nil { 313 if r == '=' { 314 op = XOR_ASSIGN 315 } else { 316 l.unread() 317 } 318 } 319 case '|': 320 op = '|' 321 if r, err := l.read(); err == nil { 322 switch r { 323 case '|': 324 op = LOR 325 case '=': 326 op = OR_ASSIGN 327 default: 328 l.unread() 329 } 330 } 331 default: 332 l.Error(fmt.Sprintf("unexpected %q", r)) 333 return nil 334 } 335 l.emit(op) 336 return l.lexToken 337 } 338 339 func (l *lexer) emit(typ int) { 340 var tok interface{} 341 switch typ { 342 case NUMBER, IDENT: 343 tok = token{ 344 typ: typ, 345 val: l.b.String(), 346 } 347 l.b.Reset() 348 default: 349 tok = typ 350 } 351 select { 352 case l.token <- tok: 353 case <-l.cancel: 354 // bailout 355 panic(nil) 356 } 357 } 358 359 func (l *lexer) read() (rune, error) { 360 r, _, err := l.r.ReadRune() 361 return r, err 362 } 363 364 func (l *lexer) unread() { 365 l.r.UnreadRune() 366 } 367 368 func (l *lexer) Error(s string) { 369 l.mu.Lock() 370 defer l.mu.Unlock() 371 372 switch { 373 case strings.HasPrefix(s, "syntax error: "): 374 s = s[14:] 375 if l.err != nil && s == "unexpected EOF" { 376 return // lexing was interrupted 377 } 378 case strings.HasPrefix(s, "runtime error: "): 379 s = s[15:] 380 } 381 l.err = ArithExprError{Msg: s} 382 383 select { 384 case <-l.cancel: 385 default: 386 close(l.cancel) 387 } 388 } 389 390 type action func() action 391 392 type token struct { 393 typ int 394 val string 395 } 396 397 // ArithExprError represents an arithmetic expression error. 398 type ArithExprError struct { 399 Expr string 400 Msg string 401 } 402 403 func (e ArithExprError) Error() string { 404 if e.Expr != "" { 405 return fmt.Sprintf("%v: %v", e.Expr, e.Msg) 406 } 407 return e.Msg 408 }