golang.org/x/exp@v0.0.0-20240506185415-9bf2ced13842/ebnf/parser.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ebnf
     6  
     7  import (
     8  	"io"
     9  	"strconv"
    10  	"text/scanner"
    11  )
    12  
    13  type parser struct {
    14  	errors  errorList
    15  	scanner scanner.Scanner
    16  	pos     scanner.Position // token position
    17  	tok     rune             // one token look-ahead
    18  	lit     string           // token literal
    19  }
    20  
    21  func (p *parser) next() {
    22  	p.tok = p.scanner.Scan()
    23  	p.pos = p.scanner.Position
    24  	p.lit = p.scanner.TokenText()
    25  }
    26  
    27  func (p *parser) error(pos scanner.Position, msg string) {
    28  	p.errors = append(p.errors, newError(pos, msg))
    29  }
    30  
    31  func (p *parser) errorExpected(pos scanner.Position, msg string) {
    32  	msg = `expected "` + msg + `"`
    33  	if pos.Offset == p.pos.Offset {
    34  		// the error happened at the current position;
    35  		// make the error message more specific
    36  		msg += ", found " + scanner.TokenString(p.tok)
    37  		if p.tok < 0 {
    38  			msg += " " + p.lit
    39  		}
    40  	}
    41  	p.error(pos, msg)
    42  }
    43  
    44  func (p *parser) expect(tok rune) scanner.Position {
    45  	pos := p.pos
    46  	if p.tok != tok {
    47  		p.errorExpected(pos, scanner.TokenString(tok))
    48  	}
    49  	p.next() // make progress in any case
    50  	return pos
    51  }
    52  
    53  func (p *parser) parseIdentifier() *Name {
    54  	pos := p.pos
    55  	name := p.lit
    56  	p.expect(scanner.Ident)
    57  	return &Name{pos, name}
    58  }
    59  
    60  func (p *parser) parseToken() *Token {
    61  	pos := p.pos
    62  	value := ""
    63  	if p.tok == scanner.String || p.tok == scanner.RawString {
    64  		value, _ = strconv.Unquote(p.lit)
    65  		// Unquote may fail with an error, but only if the scanner found
    66  		// an illegal string in the first place. In this case the error
    67  		// has already been reported.
    68  		p.next()
    69  	} else {
    70  		p.expect(scanner.String)
    71  	}
    72  	return &Token{pos, value}
    73  }
    74  
    75  // parseTerm returns nil if no term was found.
    76  func (p *parser) parseTerm() (x Expression) {
    77  	pos := p.pos
    78  
    79  	switch p.tok {
    80  	case scanner.Ident:
    81  		x = p.parseIdentifier()
    82  
    83  	case scanner.String, scanner.RawString:
    84  		tok := p.parseToken()
    85  		x = tok
    86  		const ellipsis = '…' // U+2026, the horizontal ellipsis character
    87  		if p.tok == ellipsis {
    88  			p.next()
    89  			x = &Range{tok, p.parseToken()}
    90  		}
    91  
    92  	case '(':
    93  		p.next()
    94  		x = &Group{pos, p.parseExpression()}
    95  		p.expect(')')
    96  
    97  	case '[':
    98  		p.next()
    99  		x = &Option{pos, p.parseExpression()}
   100  		p.expect(']')
   101  
   102  	case '{':
   103  		p.next()
   104  		x = &Repetition{pos, p.parseExpression()}
   105  		p.expect('}')
   106  	}
   107  
   108  	return x
   109  }
   110  
   111  func (p *parser) parseSequence() Expression {
   112  	var list Sequence
   113  
   114  	for x := p.parseTerm(); x != nil; x = p.parseTerm() {
   115  		list = append(list, x)
   116  	}
   117  
   118  	// no need for a sequence if list.Len() < 2
   119  	switch len(list) {
   120  	case 0:
   121  		p.errorExpected(p.pos, "term")
   122  		return &Bad{p.pos, "term expected"}
   123  	case 1:
   124  		return list[0]
   125  	}
   126  
   127  	return list
   128  }
   129  
   130  func (p *parser) parseExpression() Expression {
   131  	var list Alternative
   132  
   133  	for {
   134  		list = append(list, p.parseSequence())
   135  		if p.tok != '|' {
   136  			break
   137  		}
   138  		p.next()
   139  	}
   140  	// len(list) > 0
   141  
   142  	// no need for an Alternative node if list.Len() < 2
   143  	if len(list) == 1 {
   144  		return list[0]
   145  	}
   146  
   147  	return list
   148  }
   149  
   150  func (p *parser) parseProduction() *Production {
   151  	name := p.parseIdentifier()
   152  	p.expect('=')
   153  	var expr Expression
   154  	if p.tok != '.' {
   155  		expr = p.parseExpression()
   156  	}
   157  	p.expect('.')
   158  	return &Production{name, expr}
   159  }
   160  
   161  func (p *parser) parse(filename string, src io.Reader) Grammar {
   162  	p.scanner.Init(src)
   163  	p.scanner.Filename = filename
   164  	p.next() // initializes pos, tok, lit
   165  
   166  	grammar := make(Grammar)
   167  	for p.tok != scanner.EOF {
   168  		prod := p.parseProduction()
   169  		name := prod.Name.String
   170  		if _, found := grammar[name]; !found {
   171  			grammar[name] = prod
   172  		} else {
   173  			p.error(prod.Pos(), name+" declared already")
   174  		}
   175  	}
   176  
   177  	return grammar
   178  }
   179  
   180  // Parse parses a set of EBNF productions from source src.
   181  // It returns a set of productions. Errors are reported
   182  // for incorrect syntax and if a production is declared
   183  // more than once; the filename is used only for error
   184  // positions.
   185  func Parse(filename string, src io.Reader) (Grammar, error) {
   186  	var p parser
   187  	grammar := p.parse(filename, src)
   188  	return grammar, p.errors.Err()
   189  }