github.phpd.cn/thought-machine/please@v12.2.0+incompatible/src/parse/asp/grammar_parse.go (about)

     1  package asp
     2  
     3  import (
     4  	"io"
     5  	"reflect"
     6  	"strconv"
     7  	"strings"
     8  )
     9  
    10  type parser struct {
    11  	l *lex
    12  }
    13  
    14  // parseFileInput is the only external entry point to this class, it parses a file into a FileInput structure.
    15  func parseFileInput(r io.Reader) (input *FileInput, err error) {
    16  	// The rest of the parser functions signal unhappiness by panicking, we
    17  	// recover any such failures here and convert to an error.
    18  	defer func() {
    19  		if r := recover(); r != nil {
    20  			err = r.(error)
    21  		}
    22  	}()
    23  
    24  	p := &parser{l: newLexer(r)}
    25  	input = &FileInput{}
    26  	for tok := p.l.Peek(); tok.Type != EOF; tok = p.l.Peek() {
    27  		input.Statements = append(input.Statements, p.parseStatement())
    28  	}
    29  	return input, nil
    30  }
    31  
    32  func (p *parser) assert(condition bool, pos Token, message string, args ...interface{}) {
    33  	if !condition {
    34  		p.fail(pos, message, args...)
    35  	}
    36  }
    37  
    38  func (p *parser) assertTokenType(tok Token, expectedType rune) {
    39  	if tok.Type != expectedType {
    40  		p.fail(tok, "unexpected token %s, expected %s", tok, reverseSymbol(expectedType))
    41  	}
    42  }
    43  
    44  func (p *parser) next(expectedType rune) Token {
    45  	tok := p.l.Next()
    46  	p.assertTokenType(tok, expectedType)
    47  	return tok
    48  }
    49  
    50  func (p *parser) nextv(expectedValue string) Token {
    51  	tok := p.l.Next()
    52  	if tok.Value != expectedValue {
    53  		p.fail(tok, "unexpected token %s, expected %s", tok, expectedValue)
    54  	}
    55  	return tok
    56  }
    57  
    58  func (p *parser) optional(option rune) bool {
    59  	if tok := p.l.Peek(); tok.Type == option {
    60  		p.l.Next()
    61  		return true
    62  	}
    63  	return false
    64  }
    65  
    66  func (p *parser) optionalv(option string) bool {
    67  	if tok := p.l.Peek(); tok.Value == option {
    68  		p.l.Next()
    69  		return true
    70  	}
    71  	return false
    72  }
    73  
    74  func (p *parser) anythingBut(r rune) bool {
    75  	return p.l.Peek().Type != r
    76  }
    77  
    78  func (p *parser) oneof(expectedTypes ...rune) Token {
    79  	tok := p.l.Next()
    80  	for _, t := range expectedTypes {
    81  		if tok.Type == t {
    82  			return tok
    83  		}
    84  	}
    85  	p.fail(tok, "unexpected token %s, expected one of %s", tok.Value, strings.Join(reverseSymbols(expectedTypes), " "))
    86  	return Token{}
    87  }
    88  
    89  func (p *parser) oneofval(expectedValues ...string) Token {
    90  	tok := p.l.Next()
    91  	for _, v := range expectedValues {
    92  		if tok.Value == v {
    93  			return tok
    94  		}
    95  	}
    96  	p.fail(tok, "unexpected token %s, expected one of %s", tok.Value, strings.Join(expectedValues, ", "))
    97  	return Token{}
    98  }
    99  
   100  func (p *parser) fail(pos Token, message string, args ...interface{}) {
   101  	fail(pos.Pos, message, args...)
   102  }
   103  
   104  func (p *parser) parseStatement() *Statement {
   105  	s := &Statement{}
   106  	tok := p.l.Peek()
   107  	s.Pos = tok.Pos
   108  	switch tok.Value {
   109  	case "pass":
   110  		s.Pass = true
   111  		p.l.Next()
   112  		p.next(EOL)
   113  	case "continue":
   114  		s.Continue = true
   115  		p.l.Next()
   116  		p.next(EOL)
   117  	case "def":
   118  		s.FuncDef = p.parseFuncDef()
   119  	case "for":
   120  		s.For = p.parseFor()
   121  	case "if":
   122  		s.If = p.parseIf()
   123  	case "return":
   124  		p.l.Next()
   125  		s.Return = p.parseReturn()
   126  	case "raise":
   127  		p.l.Next()
   128  		s.Raise = p.parseExpression()
   129  		p.next(EOL)
   130  	case "assert":
   131  		p.initField(&s.Assert)
   132  		p.l.Next()
   133  		s.Assert.Expr = p.parseExpression()
   134  		if p.optional(',') {
   135  			s.Assert.Message = p.next(String).Value
   136  		}
   137  		p.next(EOL)
   138  	default:
   139  		if tok.Type == Ident {
   140  			s.Ident = p.parseIdentStatement()
   141  		} else {
   142  			s.Literal = p.parseExpression()
   143  		}
   144  		p.next(EOL)
   145  	}
   146  	return s
   147  }
   148  
   149  func (p *parser) parseStatements() []*Statement {
   150  	stmts := []*Statement{}
   151  	for p.anythingBut(Unindent) {
   152  		stmts = append(stmts, p.parseStatement())
   153  	}
   154  	p.next(Unindent)
   155  	return stmts
   156  }
   157  
   158  func (p *parser) parseReturn() *ReturnStatement {
   159  	r := &ReturnStatement{}
   160  	for p.anythingBut(EOL) {
   161  		r.Values = append(r.Values, p.parseExpression())
   162  		if !p.optional(',') {
   163  			break
   164  		}
   165  	}
   166  	p.next(EOL)
   167  	return r
   168  }
   169  
   170  func (p *parser) parseFuncDef() *FuncDef {
   171  	p.nextv("def")
   172  	fd := &FuncDef{
   173  		Name: p.next(Ident).Value,
   174  	}
   175  	p.next('(')
   176  	for p.anythingBut(')') {
   177  		fd.Arguments = append(fd.Arguments, p.parseArgument())
   178  		if !p.optional(',') {
   179  			break
   180  		}
   181  	}
   182  	p.next(')')
   183  	p.next(':')
   184  	p.next(EOL)
   185  	if tok := p.l.Peek(); tok.Type == String {
   186  		fd.Docstring = tok.Value
   187  		p.l.Next()
   188  		p.next(EOL)
   189  	}
   190  	fd.Statements = p.parseStatements()
   191  	return fd
   192  }
   193  
   194  func (p *parser) parseArgument() Argument {
   195  	a := Argument{
   196  		Name: p.next(Ident).Value,
   197  	}
   198  	if tok := p.l.Peek(); tok.Type == ',' || tok.Type == ')' {
   199  		return a
   200  	}
   201  	tok := p.oneof(':', '&', '=')
   202  	if tok.Type == ':' {
   203  		// Type annotations
   204  		for {
   205  			tok = p.oneofval("bool", "str", "int", "list", "dict", "function")
   206  			a.Type = append(a.Type, tok.Value)
   207  			if !p.optional('|') {
   208  				break
   209  			}
   210  		}
   211  		if tok := p.l.Peek(); tok.Type == ',' || tok.Type == ')' {
   212  			return a
   213  		}
   214  		tok = p.oneof('&', '=')
   215  	}
   216  	if tok.Type == '&' {
   217  		// Argument aliases
   218  		for {
   219  			tok = p.next(Ident)
   220  			a.Aliases = append(a.Aliases, tok.Value)
   221  			if !p.optional('&') {
   222  				break
   223  			}
   224  		}
   225  		if tok := p.l.Peek(); tok.Type == ',' || tok.Type == ')' {
   226  			return a
   227  		}
   228  		tok = p.next('=')
   229  	}
   230  	// Default value
   231  	a.Value = p.parseExpression()
   232  	return a
   233  }
   234  
   235  func (p *parser) parseIf() *IfStatement {
   236  	p.nextv("if")
   237  	i := &IfStatement{}
   238  	p.parseExpressionInPlace(&i.Condition)
   239  	p.next(':')
   240  	p.next(EOL)
   241  	i.Statements = p.parseStatements()
   242  	for p.optionalv("elif") {
   243  		elif := &i.Elif[p.newElement(&i.Elif)]
   244  		p.parseExpressionInPlace(&elif.Condition)
   245  		p.next(':')
   246  		p.next(EOL)
   247  		elif.Statements = p.parseStatements()
   248  	}
   249  	if p.optionalv("else") {
   250  		p.next(':')
   251  		p.next(EOL)
   252  		i.ElseStatements = p.parseStatements()
   253  	}
   254  	return i
   255  }
   256  
   257  // newElement is a nasty little hack to allow extending slices of types that we can't readily name.
   258  // This is added in preference to having to break everything out to separately named types.
   259  func (p *parser) newElement(x interface{}) int {
   260  	v := reflect.ValueOf(x).Elem()
   261  	v.Set(reflect.Append(v, reflect.Zero(v.Type().Elem())))
   262  	return v.Len() - 1
   263  }
   264  
   265  // initField is a similar little hack for initialising non-slice fields.
   266  func (p *parser) initField(x interface{}) {
   267  	v := reflect.ValueOf(x).Elem()
   268  	v.Set(reflect.New(v.Type().Elem()))
   269  }
   270  
   271  func (p *parser) parseFor() *ForStatement {
   272  	f := &ForStatement{}
   273  	p.nextv("for")
   274  	f.Names = p.parseIdentList()
   275  	p.nextv("in")
   276  	p.parseExpressionInPlace(&f.Expr)
   277  	p.next(':')
   278  	p.next(EOL)
   279  	f.Statements = p.parseStatements()
   280  	return f
   281  }
   282  
   283  func (p *parser) parseIdentList() []string {
   284  	ret := []string{p.next(Ident).Value} // First one is compulsory
   285  	for tok := p.l.Peek(); tok.Type == ','; tok = p.l.Peek() {
   286  		p.l.Next()
   287  		ret = append(ret, p.next(Ident).Value)
   288  	}
   289  	return ret
   290  }
   291  
   292  func (p *parser) parseExpression() *Expression {
   293  	e := p.parseUnconditionalExpression()
   294  	p.parseInlineIf(e)
   295  	return e
   296  }
   297  
   298  func (p *parser) parseExpressionInPlace(e *Expression) {
   299  	e.Pos = p.l.Peek().Pos
   300  	p.parseUnconditionalExpressionInPlace(e)
   301  	p.parseInlineIf(e)
   302  }
   303  
   304  func (p *parser) parseInlineIf(e *Expression) {
   305  	if p.optionalv("if") {
   306  		e.If = &InlineIf{Condition: p.parseExpression()}
   307  		p.nextv("else")
   308  		e.If.Else = p.parseExpression()
   309  	}
   310  }
   311  
   312  func (p *parser) parseUnconditionalExpression() *Expression {
   313  	e := &Expression{Pos: p.l.Peek().Pos}
   314  	p.parseUnconditionalExpressionInPlace(e)
   315  	return e
   316  }
   317  
   318  func (p *parser) parseUnconditionalExpressionInPlace(e *Expression) {
   319  	if tok := p.l.Peek(); tok.Type == '-' || tok.Value == "not" {
   320  		p.l.Next()
   321  		e.UnaryOp = &UnaryOp{
   322  			Op:   tok.Value,
   323  			Expr: *p.parseValueExpression(),
   324  		}
   325  	} else {
   326  		e.Val = p.parseValueExpression()
   327  	}
   328  	tok := p.l.Peek()
   329  	if tok.Value == "not" {
   330  		// Hack for "not in" which needs an extra token.
   331  		p.l.Next()
   332  		tok = p.l.Peek()
   333  		p.assert(tok.Value == "in", tok, "expected 'in', not %s", tok.Value)
   334  		tok.Value = "not in"
   335  	}
   336  	if op, present := operators[tok.Value]; present {
   337  		p.l.Next()
   338  		o := &e.Op[p.newElement(&e.Op)]
   339  		o.Op = op
   340  		o.Expr = p.parseUnconditionalExpression()
   341  		if len(o.Expr.Op) > 0 {
   342  			if op := o.Expr.Op[0].Op; op == And || op == Or || op == Is {
   343  				// Hoist logical operator back up here to fix precedence. This is a bit of a hack and
   344  				// might not be perfect in all cases...
   345  				e.Op = append(e.Op, o.Expr.Op...)
   346  				o.Expr.Op = nil
   347  			}
   348  		}
   349  		tok = p.l.Peek()
   350  	}
   351  }
   352  
   353  func (p *parser) parseValueExpression() *ValueExpression {
   354  	ve := &ValueExpression{}
   355  	tok := p.l.Peek()
   356  	if tok.Type == String {
   357  		ve.String = tok.Value
   358  		p.l.Next()
   359  	} else if tok.Type == Int {
   360  		p.assert(len(tok.Value) < 19, tok, "int literal is too large: %s", tok)
   361  		p.initField(&ve.Int)
   362  		i, err := strconv.Atoi(tok.Value)
   363  		p.assert(err == nil, tok, "invalid int value %s", tok) // Theoretically the lexer shouldn't have fed us this...
   364  		ve.Int.Int = i
   365  		p.l.Next()
   366  	} else if tok.Value == "False" || tok.Value == "True" || tok.Value == "None" {
   367  		ve.Bool = tok.Value
   368  		p.l.Next()
   369  	} else if tok.Type == '[' {
   370  		ve.List = p.parseList('[', ']')
   371  	} else if tok.Type == '(' {
   372  		ve.Tuple = p.parseList('(', ')')
   373  	} else if tok.Type == '{' {
   374  		ve.Dict = p.parseDict()
   375  	} else if tok.Value == "lambda" {
   376  		ve.Lambda = p.parseLambda()
   377  	} else if tok.Type == Ident {
   378  		ve.Ident = p.parseIdentExpr()
   379  	} else {
   380  		p.fail(tok, "Unexpected token %s", tok)
   381  	}
   382  	tok = p.l.Peek()
   383  	if tok.Type == '[' {
   384  		ve.Slice = p.parseSlice()
   385  		tok = p.l.Peek()
   386  	}
   387  	if p.optional('.') {
   388  		ve.Property = p.parseIdentExpr()
   389  	} else if p.optional('(') {
   390  		ve.Call = p.parseCall()
   391  	}
   392  	return ve
   393  }
   394  
   395  func (p *parser) parseIdentStatement() *IdentStatement {
   396  	i := &IdentStatement{
   397  		Name: p.next(Ident).Value,
   398  	}
   399  	tok := p.l.Next()
   400  	switch tok.Type {
   401  	case ',':
   402  		p.initField(&i.Unpack)
   403  		i.Unpack.Names = p.parseIdentList()
   404  		p.next('=')
   405  		i.Unpack.Expr = p.parseExpression()
   406  	case '[':
   407  		p.initField(&i.Index)
   408  		i.Index.Expr = p.parseExpression()
   409  		p.next(']')
   410  		if tok := p.oneofval("=", "+="); tok.Type == '=' {
   411  			i.Index.Assign = p.parseExpression()
   412  		} else {
   413  			i.Index.AugAssign = p.parseExpression()
   414  		}
   415  	case '.':
   416  		p.initField(&i.Action)
   417  		i.Action.Property = p.parseIdentExpr()
   418  	case '(':
   419  		p.initField(&i.Action)
   420  		i.Action.Call = p.parseCall()
   421  	case '=':
   422  		p.initField(&i.Action)
   423  		i.Action.Assign = p.parseExpression()
   424  	default:
   425  		p.assert(tok.Value == "+=", tok, "Unexpected token %s, expected one of , [ . ( = +=", tok)
   426  		p.initField(&i.Action)
   427  		i.Action.AugAssign = p.parseExpression()
   428  	}
   429  	return i
   430  }
   431  
   432  func (p *parser) parseIdentExpr() *IdentExpr {
   433  	ie := &IdentExpr{Name: p.next(Ident).Value}
   434  	for tok := p.l.Peek(); tok.Type == '.' || tok.Type == '('; tok = p.l.Peek() {
   435  		p.l.Next()
   436  		action := &ie.Action[p.newElement(&ie.Action)]
   437  		if tok.Type == '.' {
   438  			action.Property = p.parseIdentExpr()
   439  		} else {
   440  			action.Call = p.parseCall()
   441  		}
   442  	}
   443  	return ie
   444  }
   445  
   446  func (p *parser) parseCall() *Call {
   447  	// The leading ( has already been consumed (because that fits better at the various call sites)
   448  	c := &Call{}
   449  	names := map[string]bool{}
   450  	for tok := p.l.Peek(); tok.Type != ')'; tok = p.l.Peek() {
   451  		arg := CallArgument{}
   452  		if tok.Type == Ident && p.l.AssignFollows() {
   453  			// Named argument.
   454  			arg.Name = tok.Value
   455  			p.next(Ident)
   456  			p.next('=')
   457  			p.assert(!names[arg.Name], tok, "Repeated argument %s", arg.Name)
   458  			names[arg.Name] = true
   459  		}
   460  		p.parseExpressionInPlace(&arg.Value)
   461  		c.Arguments = append(c.Arguments, arg)
   462  		if !p.optional(',') {
   463  			break
   464  		}
   465  	}
   466  	p.next(')')
   467  	return c
   468  }
   469  
   470  func (p *parser) parseList(opening, closing rune) *List {
   471  	l := &List{}
   472  	p.next(opening)
   473  	for tok := p.l.Peek(); tok.Type != closing; tok = p.l.Peek() {
   474  		l.Values = append(l.Values, p.parseExpression())
   475  		if !p.optional(',') {
   476  			break
   477  		}
   478  	}
   479  	if tok := p.l.Peek(); tok.Value == "for" {
   480  		p.assert(len(l.Values) == 1, tok, "Must have exactly 1 item in a list comprehension")
   481  		l.Comprehension = p.parseComprehension()
   482  	}
   483  	p.next(closing)
   484  	return l
   485  }
   486  
   487  func (p *parser) parseDict() *Dict {
   488  	d := &Dict{}
   489  	p.next('{')
   490  	for tok := p.l.Peek(); tok.Type != '}'; tok = p.l.Peek() {
   491  		di := &DictItem{}
   492  		p.parseExpressionInPlace(&di.Key)
   493  		p.next(':')
   494  		p.parseExpressionInPlace(&di.Value)
   495  		d.Items = append(d.Items, di)
   496  		if !p.optional(',') {
   497  			break
   498  		}
   499  	}
   500  	if tok := p.l.Peek(); tok.Value == "for" {
   501  		p.assert(len(d.Items) == 1, tok, "Must have exactly 1 key:value pair in a dict comprehension")
   502  		d.Comprehension = p.parseComprehension()
   503  	}
   504  	p.next('}')
   505  	return d
   506  }
   507  
   508  func (p *parser) parseSlice() *Slice {
   509  	s := &Slice{}
   510  	p.next('[')
   511  	if p.optional(':') {
   512  		s.Colon = ":"
   513  	} else if !p.optional(':') {
   514  		s.Start = p.parseExpression()
   515  		if p.optional(':') {
   516  			s.Colon = ":"
   517  		}
   518  	}
   519  	if p.optional(']') {
   520  		return s
   521  	}
   522  	s.End = p.parseExpression()
   523  	p.next(']')
   524  	return s
   525  }
   526  
   527  func (p *parser) parseComprehension() *Comprehension {
   528  	c := &Comprehension{}
   529  	p.nextv("for")
   530  	c.Names = p.parseIdentList()
   531  	p.nextv("in")
   532  	c.Expr = p.parseUnconditionalExpression()
   533  	if p.optionalv("for") {
   534  		p.initField(&c.Second)
   535  		c.Second.Names = p.parseIdentList()
   536  		p.nextv("in")
   537  		c.Second.Expr = p.parseUnconditionalExpression()
   538  	}
   539  	if p.optionalv("if") {
   540  		c.If = p.parseUnconditionalExpression()
   541  	}
   542  	return c
   543  }
   544  
   545  func (p *parser) parseLambda() *Lambda {
   546  	l := &Lambda{}
   547  	p.nextv("lambda")
   548  	for tok := p.l.Peek(); tok.Type == Ident; tok = p.l.Peek() {
   549  		p.l.Next()
   550  		arg := Argument{Name: tok.Value}
   551  		if p.optional('=') {
   552  			arg.Value = p.parseExpression()
   553  		}
   554  		l.Arguments = append(l.Arguments, arg)
   555  		if !p.optional(',') {
   556  			break
   557  		}
   558  	}
   559  	p.next(':')
   560  	p.parseExpressionInPlace(&l.Expr)
   561  	return l
   562  }