github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/parsing/parser.go (about) 1 package parsing 2 3 import ( 4 "errors" 5 "fmt" 6 7 "github.com/arnodel/golua/luastrings" 8 "github.com/arnodel/golua/ops" 9 "github.com/arnodel/golua/token" 10 11 "github.com/arnodel/golua/ast" 12 ) 13 14 // Parser can parse lua statements or expressions 15 type Parser struct { 16 scanner Scanner 17 } 18 19 type Scanner interface { 20 Scan() *token.Token 21 ErrorMsg() string 22 } 23 24 type Error struct { 25 Got *token.Token 26 Expected string 27 } 28 29 func (e Error) Error() string { 30 expected := e.Expected 31 if e.Got.Type == token.INVALID { 32 expected = "invalid token: " + expected 33 } else if e.Got.Type == token.UNFINISHED { 34 expected = "unexpected <eof>" 35 } else if expected == "" { 36 expected = "unexpected symbol" 37 } else { 38 expected = "expected " + expected 39 } 40 var tok string 41 if e.Got.Type == token.EOF { 42 tok = "<eof>" 43 } else { 44 tok = luastrings.Quote(string(e.Got.Lit), '\'') 45 } 46 return fmt.Sprintf("%d:%d: %s near %s", e.Got.Line, e.Got.Column, expected, tok) 47 } 48 49 // ParseExp takes in a function that returns tokens and builds an ExpNode for it 50 // (or returns an error). 51 func ParseExp(scanner Scanner) (exp ast.ExpNode, err error) { 52 defer func() { 53 if r := recover(); r != nil { 54 exp = nil 55 var ok bool 56 err, ok = r.(error) 57 if !ok { 58 err = errors.New("Unknown error") 59 } 60 } 61 }() 62 parser := &Parser{scanner} 63 var t *token.Token 64 exp, t = parser.Exp(parser.Scan()) 65 expectType(t, token.EOF, "<eof>") 66 return 67 } 68 69 // ParseChunk takes in a function that returns tokens and builds a BlockStat for it 70 // (or returns an error). 71 func ParseChunk(scanner Scanner) (stat ast.BlockStat, err error) { 72 defer func() { 73 if r := recover(); r != nil { 74 stat = ast.BlockStat{} 75 var ok bool 76 err, ok = r.(error) 77 if !ok { 78 err = errors.New("Unknown error") 79 } 80 } 81 }() 82 parser := &Parser{scanner} 83 var t *token.Token 84 stat, t = parser.Block(parser.Scan()) 85 expectType(t, token.EOF, "<eof>") 86 return 87 } 88 89 // Scan returns the next token. 90 func (p *Parser) Scan() *token.Token { 91 tok := p.scanner.Scan() 92 if tok.Type == token.INVALID { 93 panic(Error{Got: tok, Expected: p.scanner.ErrorMsg()}) 94 } 95 return tok 96 } 97 98 // Stat parses any statement. 99 func (p *Parser) Stat(t *token.Token) (ast.Stat, *token.Token) { 100 switch t.Type { 101 case token.SgSemicolon: 102 return ast.NewEmptyStat(t), p.Scan() 103 case token.KwBreak: 104 return ast.NewBreakStat(t), p.Scan() 105 case token.KwGoto: 106 dest := p.Scan() 107 expectIdent(dest) 108 return ast.NewGotoStat(t, ast.NewName(dest)), p.Scan() 109 case token.KwDo: 110 stat, closer := p.Block(p.Scan()) 111 expectType(closer, token.KwEnd, "'end'") 112 return stat, p.Scan() 113 case token.KwWhile: 114 cond, doTok := p.Exp(p.Scan()) 115 expectType(doTok, token.KwDo, "'do'") 116 body, endTok := p.Block(p.Scan()) 117 expectType(endTok, token.KwEnd, "'end'") 118 return ast.NewWhileStat(t, endTok, cond, body), p.Scan() 119 case token.KwRepeat: 120 body, untilTok := p.Block(p.Scan()) 121 expectType(untilTok, token.KwUntil, "'until'") 122 cond, next := p.Exp(p.Scan()) 123 return ast.NewRepeatStat(t, body, cond), next 124 case token.KwIf: 125 return p.If(t) 126 case token.KwFor: 127 return p.For(t) 128 case token.KwFunction: 129 return p.FunctionStat(t) 130 case token.KwLocal: 131 return p.Local(t) 132 case token.SgDoubleColon: 133 name, t := p.Name(p.Scan()) 134 expectType(t, token.SgDoubleColon, "'::'") 135 return ast.NewLabelStat(name), p.Scan() 136 default: 137 var exp ast.ExpNode 138 exp, t = p.PrefixExp(t) 139 switch e := exp.(type) { 140 case ast.Stat: 141 // This is a function call 142 return e, t 143 case ast.Var: 144 // This should be the start of 'varlist = explist' 145 vars := []ast.Var{e} 146 var pexp ast.ExpNode 147 for t.Type == token.SgComma { 148 pexp, t = p.PrefixExp(p.Scan()) 149 if v, ok := pexp.(ast.Var); ok { 150 vars = append(vars, v) 151 } else { 152 tokenError(t, "expected variable") 153 } 154 } 155 expectType(t, token.SgAssign, "'='") 156 exps, t := p.ExpList(p.Scan()) 157 return ast.NewAssignStat(vars, exps), t 158 default: 159 tokenError(t, "") 160 } 161 } 162 return nil, nil 163 } 164 165 // If parses an if / then / else statement. It assumes that t is the "if" 166 // token. 167 func (p *Parser) If(t *token.Token) (ast.IfStat, *token.Token) { 168 cond, thenTok := p.Exp(p.Scan()) 169 expectType(thenTok, token.KwThen, "'then'") 170 thenBlock, endTok := p.Block(p.Scan()) 171 ifStat := ast.NewIfStat(t, cond, thenBlock) 172 for { 173 switch endTok.Type { 174 case token.KwElseIf: 175 cond, thenTok = p.Exp(p.Scan()) 176 expectType(thenTok, token.KwThen, "'then'") 177 thenBlock, endTok = p.Block(p.Scan()) 178 ifStat = ifStat.AddElseIf(cond, thenBlock) 179 case token.KwEnd: 180 return ifStat, p.Scan() 181 case token.KwElse: 182 elseBlock, elseTok := p.Block(p.Scan()) 183 expectType(elseTok, token.KwEnd, "'end'") 184 ifStat = ifStat.WithElse(endTok, elseBlock) 185 return ifStat, p.Scan() 186 default: 187 tokenError(t, "'elseif' or 'end' or 'else'") 188 } 189 } 190 } 191 192 // For parses a for in / for = statement. It assumes that t is the "for" token. 193 func (p *Parser) For(t *token.Token) (ast.Stat, *token.Token) { 194 name, nextTok := p.Name(p.Scan()) 195 if nextTok.Type == token.SgAssign { 196 // Parse for Name = ... 197 params := make([]ast.ExpNode, 3) 198 params[0], nextTok = p.Exp(p.Scan()) 199 expectType(nextTok, token.SgComma, "','") 200 params[1], nextTok = p.Exp(p.Scan()) 201 if nextTok.Type == token.SgComma { 202 params[2], nextTok = p.Exp(p.Scan()) 203 } else { 204 params[2] = ast.NewInt(1) 205 } 206 expectType(nextTok, token.KwDo, "'do'") 207 body, endTok := p.Block(p.Scan()) 208 expectType(endTok, token.KwEnd, "'end'") 209 forStat := ast.NewForStat(t, endTok, name, params, body) 210 return forStat, p.Scan() 211 } 212 // Parse for namelist in explist ... 213 names := []ast.Name{name} 214 for nextTok.Type == token.SgComma { 215 name, nextTok = p.Name(p.Scan()) 216 names = append(names, name) 217 } 218 expected := "'in'" 219 if len(names) == 1 { 220 expected = "'=' or 'in'" 221 } 222 expectType(nextTok, token.KwIn, expected) 223 exp, nextTok := p.Exp(p.Scan()) 224 params := []ast.ExpNode{exp} 225 for nextTok.Type == token.SgComma { 226 exp, nextTok = p.Exp(p.Scan()) 227 params = append(params, exp) 228 } 229 expectType(nextTok, token.KwDo, "'do'") 230 body, endTok := p.Block(p.Scan()) 231 expectType(endTok, token.KwEnd, "'end'") 232 forInStat := ast.NewForInStat(t, endTok, names, params, body) 233 return forInStat, p.Scan() 234 235 } 236 237 // Local parses a "local" statement (function definition of variable 238 // declaration). It assumes that t is the "local" token. 239 func (p *Parser) Local(*token.Token) (ast.Stat, *token.Token) { 240 t := p.Scan() 241 if t.Type == token.KwFunction { 242 name, t := p.Name(p.Scan()) 243 fx, t := p.FunctionDef(t) 244 return ast.NewLocalFunctionStat(name, fx), t 245 } 246 // local namelist ['=' explist] 247 nameAttrib, t := p.NameAttrib(t) 248 nameAttribs := []ast.NameAttrib{nameAttrib} 249 for t.Type == token.SgComma { 250 nameAttrib, t = p.NameAttrib(p.Scan()) 251 nameAttribs = append(nameAttribs, nameAttrib) 252 } 253 var values []ast.ExpNode 254 if t.Type == token.SgAssign { 255 values, t = p.ExpList(p.Scan()) 256 } 257 return ast.NewLocalStat(nameAttribs, values), t 258 } 259 260 // FunctionStat parses a function definition statement. It assumes that t is the 261 // "function" token. 262 func (p *Parser) FunctionStat(*token.Token) (ast.Stat, *token.Token) { 263 name, t := p.Name(p.Scan()) 264 var v ast.Var = name 265 var method ast.Name 266 for t.Type == token.SgDot { 267 name, t = p.Name(p.Scan()) 268 v = ast.NewIndexExp(v, name.AstString()) 269 } 270 if t.Type == token.SgColon { 271 method, t = p.Name(p.Scan()) 272 } 273 fx, t := p.FunctionDef(t) 274 return ast.NewFunctionStat(v, method, fx), t 275 } 276 277 // Block parses a block whose starting token (e.g. "do") has already been 278 // consumed. Returns the token that closes the block (e.g. "end"). So the caller 279 // should check that this is the right kind of closing token. 280 func (p *Parser) Block(t *token.Token) (ast.BlockStat, *token.Token) { 281 var stats []ast.Stat 282 var next ast.Stat 283 for { 284 switch t.Type { 285 case token.KwReturn: 286 ret, t := p.Return(t) 287 return ast.NewBlockStat(stats, ret), t 288 case token.KwEnd, token.KwElse, token.KwElseIf, token.KwUntil, token.EOF: 289 return ast.NewBlockStat(stats, nil), t 290 default: 291 next, t = p.Stat(t) 292 stats = append(stats, next) 293 } 294 } 295 } 296 297 // Return parses a return statement. 298 func (p *Parser) Return(*token.Token) ([]ast.ExpNode, *token.Token) { 299 t := p.Scan() 300 switch t.Type { 301 case token.SgSemicolon: 302 return []ast.ExpNode{}, p.Scan() 303 case token.KwEnd, token.KwElse, token.KwElseIf, token.KwUntil, token.EOF: 304 return []ast.ExpNode{}, t 305 default: 306 exps, t := p.ExpList(t) 307 if t.Type == token.SgSemicolon { 308 t = p.Scan() 309 } 310 return exps, t 311 } 312 } 313 314 type item struct { 315 exp ast.ExpNode 316 op ops.Op 317 tok *token.Token 318 } 319 320 func mergepop(stack []item, it item) ([]item, item) { 321 i := len(stack) - 1 322 top := stack[i] 323 top.exp = ast.NewBinOp(top.exp, it.op, it.tok, it.exp) 324 return stack[:i], top 325 } 326 327 // Exp parses any expression. 328 func (p *Parser) Exp(t *token.Token) (ast.ExpNode, *token.Token) { 329 var exp ast.ExpNode 330 exp, t = p.ShortExp(t) 331 var op ops.Op 332 var opTok *token.Token 333 var stack []item 334 last := item{exp: exp} 335 for t.Type.IsBinOp() { 336 op = binopMap[t.Type] 337 opTok = t 338 exp, t = p.ShortExp(p.Scan()) 339 for len(stack) > 0 { 340 pdiff := op.Precedence() - last.op.Precedence() 341 if pdiff > 0 || (pdiff == 0 && op == ops.OpConcat) { 342 break 343 } 344 stack, last = mergepop(stack, last) 345 } 346 stack = append(stack, last) 347 last = item{exp: exp, op: op, tok: opTok} 348 } 349 // We are left with a stack of strictly increasing precedence 350 for len(stack) > 0 { 351 stack, last = mergepop(stack, last) 352 } 353 return last.exp, t 354 } 355 356 // ShortExp parses an expression which is either atomic, a unary operation, a 357 // prefix expression or a power operation (right associatively composed). In 358 // other words, any expression that doesn't contain a binary operator. 359 func (p *Parser) ShortExp(t *token.Token) (ast.ExpNode, *token.Token) { 360 var exp ast.ExpNode 361 switch t.Type { 362 case token.KwNil: 363 exp, t = ast.NewNil(t), p.Scan() 364 case token.KwTrue: 365 exp, t = ast.True(t), p.Scan() 366 case token.KwFalse: 367 exp, t = ast.False(t), p.Scan() 368 case token.NUMDEC, token.NUMHEX: 369 n, err := ast.NewNumber(t) 370 if err != nil { 371 panic(err) 372 } 373 exp, t = n, p.Scan() 374 case token.STRING: 375 s, err := ast.NewString(t) 376 if err != nil { 377 panic(err) 378 } 379 exp, t = s, p.Scan() 380 case token.LONGSTRING: 381 exp, t = ast.NewLongString(t), p.Scan() 382 case token.SgOpenBrace: 383 exp, t = p.TableConstructor(t) 384 case token.SgEtc: 385 exp, t = ast.NewEtc(t), p.Scan() 386 case token.KwFunction: 387 exp, t = p.FunctionDef(p.Scan()) 388 case token.SgMinus, token.KwNot, token.SgHash, token.SgTilde: 389 // A unary operator! 390 opTok := t 391 exp, t = p.ShortExp(p.Scan()) 392 exp = ast.NewUnOp(opTok, unopMap[opTok.Type], exp) 393 default: 394 exp, t = p.PrefixExp(t) 395 } 396 if t.Type == token.SgHat { 397 var pow ast.ExpNode 398 pow, t = p.ShortExp(p.Scan()) 399 exp = ast.NewBinOp(exp, ops.OpPow, t, pow) 400 } 401 return exp, t 402 } 403 404 var unopMap = map[token.Type]ops.Op{ 405 token.SgMinus: ops.OpNeg, 406 token.KwNot: ops.OpNot, 407 token.SgHash: ops.OpLen, 408 token.SgTilde: ops.OpBitNot, 409 } 410 411 var binopMap = map[token.Type]ops.Op{ 412 token.KwOr: ops.OpOr, 413 token.KwAnd: ops.OpAnd, 414 415 token.SgLess: ops.OpLt, 416 token.SgLessEqual: ops.OpLeq, 417 token.SgGreater: ops.OpGt, 418 token.SgGreaterEqual: ops.OpGeq, 419 token.SgEqual: ops.OpEq, 420 token.SgNotEqual: ops.OpNeq, 421 422 token.SgPipe: ops.OpBitOr, 423 token.SgTilde: ops.OpBitXor, 424 token.SgAmpersand: ops.OpBitAnd, 425 426 token.SgShiftLeft: ops.OpShiftL, 427 token.SgShiftRight: ops.OpShiftR, 428 429 token.SgConcat: ops.OpConcat, 430 431 token.SgPlus: ops.OpAdd, 432 token.SgMinus: ops.OpSub, 433 434 token.SgStar: ops.OpMul, 435 token.SgSlash: ops.OpDiv, 436 token.SgSlashSlash: ops.OpFloorDiv, 437 token.SgPct: ops.OpMod, 438 439 token.SgHat: ops.OpPow, 440 } 441 442 // FunctionDef parses a function definition expression. 443 func (p *Parser) FunctionDef(startTok *token.Token) (ast.Function, *token.Token) { 444 expectType(startTok, token.SgOpenBkt, "'('") 445 t := p.Scan() 446 var names []ast.Name 447 hasEtc := false 448 ParamsLoop: 449 for { 450 switch t.Type { 451 case token.IDENT: 452 names = append(names, ast.NewName(t)) 453 t = p.Scan() 454 if t.Type != token.SgComma { 455 break ParamsLoop 456 } 457 t = p.Scan() 458 case token.SgEtc: 459 hasEtc = true 460 t = p.Scan() 461 break ParamsLoop 462 case token.SgCloseBkt: 463 break ParamsLoop 464 default: 465 tokenError(t, "") 466 } 467 } 468 expectType(t, token.SgCloseBkt, "')'") 469 body, endTok := p.Block(p.Scan()) 470 expectType(endTok, token.KwEnd, "'end'") 471 def := ast.NewFunction(startTok, endTok, ast.NewParList(names, hasEtc), body) 472 return def, p.Scan() 473 } 474 475 // PrefixExp parses an expression made of a name or and expression in brackets 476 // followed by zero or more indexing operations or function applications. 477 func (p *Parser) PrefixExp(t *token.Token) (ast.ExpNode, *token.Token) { 478 var exp ast.ExpNode 479 switch t.Type { 480 case token.SgOpenBkt: 481 exp, t = p.Exp(p.Scan()) 482 if f, ok := exp.(ast.FunctionCall); ok { 483 exp = f.InBrackets() 484 } 485 expectType(t, token.SgCloseBkt, "')'") 486 case token.IDENT: 487 exp = ast.NewName(t) 488 default: 489 tokenError(t, "") 490 } 491 t = p.Scan() 492 for { 493 switch t.Type { 494 case token.SgOpenSquareBkt: 495 var idxExp ast.ExpNode 496 idxExp, t = p.Exp(p.Scan()) 497 expectType(t, token.SgCloseSquareBkt, "']'") 498 t = p.Scan() 499 exp = ast.NewIndexExp(exp, idxExp) 500 case token.SgDot: 501 var name ast.Name 502 name, t = p.Name(p.Scan()) 503 exp = ast.NewIndexExp(exp, name.AstString()) 504 case token.SgColon: 505 var name ast.Name 506 var args []ast.ExpNode 507 name, t = p.Name(p.Scan()) 508 args, t = p.Args(t) 509 if args == nil { 510 tokenError(t, "expected function arguments") 511 } 512 exp = ast.NewFunctionCall(exp, name, args) 513 default: 514 var args []ast.ExpNode 515 args, t = p.Args(t) 516 if args == nil { 517 return exp, t 518 } 519 exp = ast.NewFunctionCall(exp, ast.Name{}, args) 520 } 521 } 522 } 523 524 // Args parses the arguments of a function call. It returns nil rather than 525 // panicking if it couldn't parse arguments. 526 func (p *Parser) Args(t *token.Token) ([]ast.ExpNode, *token.Token) { 527 switch t.Type { 528 case token.SgOpenBkt: 529 t = p.Scan() 530 if t.Type == token.SgCloseBkt { 531 return []ast.ExpNode{}, p.Scan() 532 } 533 args, t := p.ExpList(t) 534 expectType(t, token.SgCloseBkt, "')'") 535 return args, p.Scan() 536 case token.SgOpenBrace: 537 arg, t := p.TableConstructor(t) 538 return []ast.ExpNode{arg}, t 539 case token.STRING: 540 arg, err := ast.NewString(t) 541 if err != nil { 542 panic(err) 543 } 544 return []ast.ExpNode{arg}, p.Scan() 545 case token.LONGSTRING: 546 return []ast.ExpNode{ast.NewLongString(t)}, p.Scan() 547 } 548 return nil, t 549 } 550 551 // ExpList parses a comma separated list of expressions. 552 func (p *Parser) ExpList(t *token.Token) ([]ast.ExpNode, *token.Token) { 553 var exp ast.ExpNode 554 exp, t = p.Exp(t) 555 exps := []ast.ExpNode{exp} 556 for t.Type == token.SgComma { 557 exp, t = p.Exp(p.Scan()) 558 exps = append(exps, exp) 559 } 560 return exps, t 561 } 562 563 // TableConstructor parses a table constructor. 564 func (p *Parser) TableConstructor(opTok *token.Token) (ast.TableConstructor, *token.Token) { 565 t := p.Scan() 566 var fields []ast.TableField 567 if t.Type != token.SgCloseBrace { 568 var field ast.TableField 569 field, t = p.Field(t) 570 fields = []ast.TableField{field} 571 for t.Type == token.SgComma || t.Type == token.SgSemicolon { 572 t = p.Scan() 573 if t.Type == token.SgCloseBrace { 574 break 575 } 576 field, t = p.Field(t) 577 fields = append(fields, field) 578 } 579 } 580 expectType(t, token.SgCloseBrace, "'}'") 581 return ast.NewTableConstructor(opTok, t, fields), p.Scan() 582 } 583 584 // Field parses a table constructor field. 585 func (p *Parser) Field(t *token.Token) (ast.TableField, *token.Token) { 586 var key ast.ExpNode = ast.NoTableKey{} 587 var val ast.ExpNode 588 if t.Type == token.SgOpenSquareBkt { 589 key, t = p.Exp(p.Scan()) 590 expectType(t, token.SgCloseSquareBkt, "']'") 591 expectType(p.Scan(), token.SgAssign, "'='") 592 val, t = p.Exp(p.Scan()) 593 } else { 594 val, t = p.Exp(t) 595 if t.Type == token.SgAssign { 596 if name, ok := val.(ast.Name); !ok { 597 tokenError(t, "") 598 } else { 599 key = name.AstString() 600 val, t = p.Exp(p.Scan()) 601 } 602 } 603 } 604 return ast.NewTableField(key, val), t 605 } 606 607 // Name parses a name. 608 func (p *Parser) Name(t *token.Token) (ast.Name, *token.Token) { 609 expectIdent(t) 610 return ast.NewName(t), p.Scan() 611 } 612 613 func (p *Parser) NameAttrib(t *token.Token) (ast.NameAttrib, *token.Token) { 614 name, t := p.Name(t) 615 attrib := ast.NoAttrib 616 var attribName *ast.Name 617 if t.Type == token.SgLess { 618 attribTok := p.Scan() 619 attribName = new(ast.Name) 620 *attribName, t = p.Name(attribTok) 621 switch attribName.Val { 622 case "const": 623 attrib = ast.ConstAttrib 624 case "close": 625 attrib = ast.CloseAttrib 626 default: 627 tokenError(attribTok, "'const' or 'close'") 628 } 629 expectType(t, token.SgGreater, "'>'") 630 t = p.Scan() 631 } 632 return ast.NewNameAttrib(name, attribName, attrib), t 633 } 634 635 func expectIdent(t *token.Token) { 636 expectType(t, token.IDENT, "name") 637 } 638 639 func expectType(t *token.Token, tp token.Type, expected string) { 640 if t.Type != tp { 641 panic(Error{Got: t, Expected: expected}) 642 } 643 } 644 645 func tokenError(t *token.Token, expected string) { 646 panic(Error{Got: t, Expected: expected}) 647 }