golang.org/x/exp@v0.0.0-20240506185415-9bf2ced13842/ebnf/ebnf.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package ebnf is a library for EBNF grammars. The input is text ([]byte) 6 // satisfying the following grammar (represented itself in EBNF): 7 // 8 // Production = name "=" [ Expression ] "." . 9 // Expression = Alternative { "|" Alternative } . 10 // Alternative = Term { Term } . 11 // Term = name | token [ "…" token ] | Group | Option | Repetition . 12 // Group = "(" Expression ")" . 13 // Option = "[" Expression "]" . 14 // Repetition = "{" Expression "}" . 15 // 16 // A name is a Go identifier, a token is a Go string, and comments 17 // and white space follow the same rules as for the Go language. 18 // Production names starting with an uppercase Unicode letter denote 19 // non-terminal productions (i.e., productions which allow white-space 20 // and comments between tokens); all other production names denote 21 // lexical productions. 22 package ebnf // import "golang.org/x/exp/ebnf" 23 24 import ( 25 "errors" 26 "fmt" 27 "text/scanner" 28 "unicode" 29 "unicode/utf8" 30 ) 31 32 // ---------------------------------------------------------------------------- 33 // Error handling 34 35 type errorList []error 36 37 func (list errorList) Err() error { 38 if len(list) == 0 { 39 return nil 40 } 41 return list 42 } 43 44 func (list errorList) Error() string { 45 switch len(list) { 46 case 0: 47 return "no errors" 48 case 1: 49 return list[0].Error() 50 } 51 return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1) 52 } 53 54 func newError(pos scanner.Position, msg string) error { 55 return errors.New(fmt.Sprintf("%s: %s", pos, msg)) 56 } 57 58 // ---------------------------------------------------------------------------- 59 // Internal representation 60 61 type ( 62 // An Expression node represents a production expression. 63 Expression interface { 64 // Pos is the position of the first character of the syntactic construct 65 Pos() scanner.Position 66 } 67 68 // An Alternative node represents a non-empty list of alternative expressions. 69 Alternative []Expression // x | y | z 70 71 // A Sequence node represents a non-empty list of sequential expressions. 72 Sequence []Expression // x y z 73 74 // A Name node represents a production name. 75 Name struct { 76 StringPos scanner.Position 77 String string 78 } 79 80 // A Token node represents a literal. 81 Token struct { 82 StringPos scanner.Position 83 String string 84 } 85 86 // A List node represents a range of characters. 87 Range struct { 88 Begin, End *Token // begin ... end 89 } 90 91 // A Group node represents a grouped expression. 92 Group struct { 93 Lparen scanner.Position 94 Body Expression // (body) 95 } 96 97 // An Option node represents an optional expression. 98 Option struct { 99 Lbrack scanner.Position 100 Body Expression // [body] 101 } 102 103 // A Repetition node represents a repeated expression. 104 Repetition struct { 105 Lbrace scanner.Position 106 Body Expression // {body} 107 } 108 109 // A Production node represents an EBNF production. 110 Production struct { 111 Name *Name 112 Expr Expression 113 } 114 115 // A Bad node stands for pieces of source code that lead to a parse error. 116 Bad struct { 117 TokPos scanner.Position 118 Error string // parser error message 119 } 120 121 // A Grammar is a set of EBNF productions. The map 122 // is indexed by production name. 123 // 124 Grammar map[string]*Production 125 ) 126 127 func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative 128 func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences 129 func (x *Name) Pos() scanner.Position { return x.StringPos } 130 func (x *Token) Pos() scanner.Position { return x.StringPos } 131 func (x *Range) Pos() scanner.Position { return x.Begin.Pos() } 132 func (x *Group) Pos() scanner.Position { return x.Lparen } 133 func (x *Option) Pos() scanner.Position { return x.Lbrack } 134 func (x *Repetition) Pos() scanner.Position { return x.Lbrace } 135 func (x *Production) Pos() scanner.Position { return x.Name.Pos() } 136 func (x *Bad) Pos() scanner.Position { return x.TokPos } 137 138 // ---------------------------------------------------------------------------- 139 // Grammar verification 140 141 func isLexical(name string) bool { 142 ch, _ := utf8.DecodeRuneInString(name) 143 return !unicode.IsUpper(ch) 144 } 145 146 type verifier struct { 147 errors errorList 148 worklist []*Production 149 reached Grammar // set of productions reached from (and including) the root production 150 grammar Grammar 151 } 152 153 func (v *verifier) error(pos scanner.Position, msg string) { 154 v.errors = append(v.errors, newError(pos, msg)) 155 } 156 157 func (v *verifier) push(prod *Production) { 158 name := prod.Name.String 159 if _, found := v.reached[name]; !found { 160 v.worklist = append(v.worklist, prod) 161 v.reached[name] = prod 162 } 163 } 164 165 func (v *verifier) verifyChar(x *Token) rune { 166 s := x.String 167 if utf8.RuneCountInString(s) != 1 { 168 v.error(x.Pos(), "single char expected, found "+s) 169 return 0 170 } 171 ch, _ := utf8.DecodeRuneInString(s) 172 return ch 173 } 174 175 func (v *verifier) verifyExpr(expr Expression, lexical bool) { 176 switch x := expr.(type) { 177 case nil: 178 // empty expression 179 case Alternative: 180 for _, e := range x { 181 v.verifyExpr(e, lexical) 182 } 183 case Sequence: 184 for _, e := range x { 185 v.verifyExpr(e, lexical) 186 } 187 case *Name: 188 // a production with this name must exist; 189 // add it to the worklist if not yet processed 190 if prod, found := v.grammar[x.String]; found { 191 v.push(prod) 192 } else { 193 v.error(x.Pos(), "missing production "+x.String) 194 } 195 // within a lexical production references 196 // to non-lexical productions are invalid 197 if lexical && !isLexical(x.String) { 198 v.error(x.Pos(), "reference to non-lexical production "+x.String) 199 } 200 case *Token: 201 // nothing to do for now 202 case *Range: 203 i := v.verifyChar(x.Begin) 204 j := v.verifyChar(x.End) 205 if i >= j { 206 v.error(x.Pos(), "decreasing character range") 207 } 208 case *Group: 209 v.verifyExpr(x.Body, lexical) 210 case *Option: 211 v.verifyExpr(x.Body, lexical) 212 case *Repetition: 213 v.verifyExpr(x.Body, lexical) 214 case *Bad: 215 v.error(x.Pos(), x.Error) 216 default: 217 panic(fmt.Sprintf("internal error: unexpected type %T", expr)) 218 } 219 } 220 221 func (v *verifier) verify(grammar Grammar, start string) { 222 // find root production 223 root, found := grammar[start] 224 if !found { 225 var noPos scanner.Position 226 v.error(noPos, "no start production "+start) 227 return 228 } 229 230 // initialize verifier 231 v.worklist = v.worklist[0:0] 232 v.reached = make(Grammar) 233 v.grammar = grammar 234 235 // work through the worklist 236 v.push(root) 237 for { 238 n := len(v.worklist) - 1 239 if n < 0 { 240 break 241 } 242 prod := v.worklist[n] 243 v.worklist = v.worklist[0:n] 244 v.verifyExpr(prod.Expr, isLexical(prod.Name.String)) 245 } 246 247 // check if all productions were reached 248 if len(v.reached) < len(v.grammar) { 249 for name, prod := range v.grammar { 250 if _, found := v.reached[name]; !found { 251 v.error(prod.Pos(), name+" is unreachable") 252 } 253 } 254 } 255 } 256 257 // Verify checks that: 258 // - all productions used are defined 259 // - all productions defined are used when beginning at start 260 // - lexical productions refer only to other lexical productions 261 // 262 // Position information is interpreted relative to the file set fset. 263 func Verify(grammar Grammar, start string) error { 264 var v verifier 265 v.verify(grammar, start) 266 return v.errors.Err() 267 }