golang.org/x/exp@v0.0.0-20240506185415-9bf2ced13842/ebnf/parser.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ebnf 6 7 import ( 8 "io" 9 "strconv" 10 "text/scanner" 11 ) 12 13 type parser struct { 14 errors errorList 15 scanner scanner.Scanner 16 pos scanner.Position // token position 17 tok rune // one token look-ahead 18 lit string // token literal 19 } 20 21 func (p *parser) next() { 22 p.tok = p.scanner.Scan() 23 p.pos = p.scanner.Position 24 p.lit = p.scanner.TokenText() 25 } 26 27 func (p *parser) error(pos scanner.Position, msg string) { 28 p.errors = append(p.errors, newError(pos, msg)) 29 } 30 31 func (p *parser) errorExpected(pos scanner.Position, msg string) { 32 msg = `expected "` + msg + `"` 33 if pos.Offset == p.pos.Offset { 34 // the error happened at the current position; 35 // make the error message more specific 36 msg += ", found " + scanner.TokenString(p.tok) 37 if p.tok < 0 { 38 msg += " " + p.lit 39 } 40 } 41 p.error(pos, msg) 42 } 43 44 func (p *parser) expect(tok rune) scanner.Position { 45 pos := p.pos 46 if p.tok != tok { 47 p.errorExpected(pos, scanner.TokenString(tok)) 48 } 49 p.next() // make progress in any case 50 return pos 51 } 52 53 func (p *parser) parseIdentifier() *Name { 54 pos := p.pos 55 name := p.lit 56 p.expect(scanner.Ident) 57 return &Name{pos, name} 58 } 59 60 func (p *parser) parseToken() *Token { 61 pos := p.pos 62 value := "" 63 if p.tok == scanner.String || p.tok == scanner.RawString { 64 value, _ = strconv.Unquote(p.lit) 65 // Unquote may fail with an error, but only if the scanner found 66 // an illegal string in the first place. In this case the error 67 // has already been reported. 68 p.next() 69 } else { 70 p.expect(scanner.String) 71 } 72 return &Token{pos, value} 73 } 74 75 // parseTerm returns nil if no term was found. 76 func (p *parser) parseTerm() (x Expression) { 77 pos := p.pos 78 79 switch p.tok { 80 case scanner.Ident: 81 x = p.parseIdentifier() 82 83 case scanner.String, scanner.RawString: 84 tok := p.parseToken() 85 x = tok 86 const ellipsis = '…' // U+2026, the horizontal ellipsis character 87 if p.tok == ellipsis { 88 p.next() 89 x = &Range{tok, p.parseToken()} 90 } 91 92 case '(': 93 p.next() 94 x = &Group{pos, p.parseExpression()} 95 p.expect(')') 96 97 case '[': 98 p.next() 99 x = &Option{pos, p.parseExpression()} 100 p.expect(']') 101 102 case '{': 103 p.next() 104 x = &Repetition{pos, p.parseExpression()} 105 p.expect('}') 106 } 107 108 return x 109 } 110 111 func (p *parser) parseSequence() Expression { 112 var list Sequence 113 114 for x := p.parseTerm(); x != nil; x = p.parseTerm() { 115 list = append(list, x) 116 } 117 118 // no need for a sequence if list.Len() < 2 119 switch len(list) { 120 case 0: 121 p.errorExpected(p.pos, "term") 122 return &Bad{p.pos, "term expected"} 123 case 1: 124 return list[0] 125 } 126 127 return list 128 } 129 130 func (p *parser) parseExpression() Expression { 131 var list Alternative 132 133 for { 134 list = append(list, p.parseSequence()) 135 if p.tok != '|' { 136 break 137 } 138 p.next() 139 } 140 // len(list) > 0 141 142 // no need for an Alternative node if list.Len() < 2 143 if len(list) == 1 { 144 return list[0] 145 } 146 147 return list 148 } 149 150 func (p *parser) parseProduction() *Production { 151 name := p.parseIdentifier() 152 p.expect('=') 153 var expr Expression 154 if p.tok != '.' { 155 expr = p.parseExpression() 156 } 157 p.expect('.') 158 return &Production{name, expr} 159 } 160 161 func (p *parser) parse(filename string, src io.Reader) Grammar { 162 p.scanner.Init(src) 163 p.scanner.Filename = filename 164 p.next() // initializes pos, tok, lit 165 166 grammar := make(Grammar) 167 for p.tok != scanner.EOF { 168 prod := p.parseProduction() 169 name := prod.Name.String 170 if _, found := grammar[name]; !found { 171 grammar[name] = prod 172 } else { 173 p.error(prod.Pos(), name+" declared already") 174 } 175 } 176 177 return grammar 178 } 179 180 // Parse parses a set of EBNF productions from source src. 181 // It returns a set of productions. Errors are reported 182 // for incorrect syntax and if a production is declared 183 // more than once; the filename is used only for error 184 // positions. 185 func Parse(filename string, src io.Reader) (Grammar, error) { 186 var p parser 187 grammar := p.parse(filename, src) 188 return grammar, p.errors.Err() 189 }