github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/exp/ebnf/ebnf.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package ebnf is a library for EBNF grammars. The input is text ([]byte)
     6  // satisfying the following grammar (represented itself in EBNF):
     7  //
     8  //	Production  = name "=" [ Expression ] "." .
     9  //	Expression  = Alternative { "|" Alternative } .
    10  //	Alternative = Term { Term } .
    11  //	Term        = name | token [ "…" token ] | Group | Option | Repetition .
    12  //	Group       = "(" Expression ")" .
    13  //	Option      = "[" Expression "]" .
    14  //	Repetition  = "{" Expression "}" .
    15  //
    16  // A name is a Go identifier, a token is a Go string, and comments
    17  // and white space follow the same rules as for the Go language.
    18  // Production names starting with an uppercase Unicode letter denote
    19  // non-terminal productions (i.e., productions which allow white-space
    20  // and comments between tokens); all other production names denote
    21  // lexical productions.
    22  //
    23  package ebnf // import "golang.org/x/exp/ebnf"
    24  
    25  import (
    26  	"errors"
    27  	"fmt"
    28  	"text/scanner"
    29  	"unicode"
    30  	"unicode/utf8"
    31  )
    32  
    33  // ----------------------------------------------------------------------------
    34  // Error handling
    35  
    36  type errorList []error
    37  
    38  func (list errorList) Err() error {
    39  	if len(list) == 0 {
    40  		return nil
    41  	}
    42  	return list
    43  }
    44  
    45  func (list errorList) Error() string {
    46  	switch len(list) {
    47  	case 0:
    48  		return "no errors"
    49  	case 1:
    50  		return list[0].Error()
    51  	}
    52  	return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
    53  }
    54  
    55  func newError(pos scanner.Position, msg string) error {
    56  	return errors.New(fmt.Sprintf("%s: %s", pos, msg))
    57  }
    58  
    59  // ----------------------------------------------------------------------------
    60  // Internal representation
    61  
    62  type (
    63  	// An Expression node represents a production expression.
    64  	Expression interface {
    65  		// Pos is the position of the first character of the syntactic construct
    66  		Pos() scanner.Position
    67  	}
    68  
    69  	// An Alternative node represents a non-empty list of alternative expressions.
    70  	Alternative []Expression // x | y | z
    71  
    72  	// A Sequence node represents a non-empty list of sequential expressions.
    73  	Sequence []Expression // x y z
    74  
    75  	// A Name node represents a production name.
    76  	Name struct {
    77  		StringPos scanner.Position
    78  		String    string
    79  	}
    80  
    81  	// A Token node represents a literal.
    82  	Token struct {
    83  		StringPos scanner.Position
    84  		String    string
    85  	}
    86  
    87  	// A List node represents a range of characters.
    88  	Range struct {
    89  		Begin, End *Token // begin ... end
    90  	}
    91  
    92  	// A Group node represents a grouped expression.
    93  	Group struct {
    94  		Lparen scanner.Position
    95  		Body   Expression // (body)
    96  	}
    97  
    98  	// An Option node represents an optional expression.
    99  	Option struct {
   100  		Lbrack scanner.Position
   101  		Body   Expression // [body]
   102  	}
   103  
   104  	// A Repetition node represents a repeated expression.
   105  	Repetition struct {
   106  		Lbrace scanner.Position
   107  		Body   Expression // {body}
   108  	}
   109  
   110  	// A Production node represents an EBNF production.
   111  	Production struct {
   112  		Name *Name
   113  		Expr Expression
   114  	}
   115  
   116  	// A Bad node stands for pieces of source code that lead to a parse error.
   117  	Bad struct {
   118  		TokPos scanner.Position
   119  		Error  string // parser error message
   120  	}
   121  
   122  	// A Grammar is a set of EBNF productions. The map
   123  	// is indexed by production name.
   124  	//
   125  	Grammar map[string]*Production
   126  )
   127  
   128  func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
   129  func (x Sequence) Pos() scanner.Position    { return x[0].Pos() } // the parser always generates non-empty Sequences
   130  func (x *Name) Pos() scanner.Position       { return x.StringPos }
   131  func (x *Token) Pos() scanner.Position      { return x.StringPos }
   132  func (x *Range) Pos() scanner.Position      { return x.Begin.Pos() }
   133  func (x *Group) Pos() scanner.Position      { return x.Lparen }
   134  func (x *Option) Pos() scanner.Position     { return x.Lbrack }
   135  func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
   136  func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
   137  func (x *Bad) Pos() scanner.Position        { return x.TokPos }
   138  
   139  // ----------------------------------------------------------------------------
   140  // Grammar verification
   141  
   142  func isLexical(name string) bool {
   143  	ch, _ := utf8.DecodeRuneInString(name)
   144  	return !unicode.IsUpper(ch)
   145  }
   146  
   147  type verifier struct {
   148  	errors   errorList
   149  	worklist []*Production
   150  	reached  Grammar // set of productions reached from (and including) the root production
   151  	grammar  Grammar
   152  }
   153  
   154  func (v *verifier) error(pos scanner.Position, msg string) {
   155  	v.errors = append(v.errors, newError(pos, msg))
   156  }
   157  
   158  func (v *verifier) push(prod *Production) {
   159  	name := prod.Name.String
   160  	if _, found := v.reached[name]; !found {
   161  		v.worklist = append(v.worklist, prod)
   162  		v.reached[name] = prod
   163  	}
   164  }
   165  
   166  func (v *verifier) verifyChar(x *Token) rune {
   167  	s := x.String
   168  	if utf8.RuneCountInString(s) != 1 {
   169  		v.error(x.Pos(), "single char expected, found "+s)
   170  		return 0
   171  	}
   172  	ch, _ := utf8.DecodeRuneInString(s)
   173  	return ch
   174  }
   175  
   176  func (v *verifier) verifyExpr(expr Expression, lexical bool) {
   177  	switch x := expr.(type) {
   178  	case nil:
   179  		// empty expression
   180  	case Alternative:
   181  		for _, e := range x {
   182  			v.verifyExpr(e, lexical)
   183  		}
   184  	case Sequence:
   185  		for _, e := range x {
   186  			v.verifyExpr(e, lexical)
   187  		}
   188  	case *Name:
   189  		// a production with this name must exist;
   190  		// add it to the worklist if not yet processed
   191  		if prod, found := v.grammar[x.String]; found {
   192  			v.push(prod)
   193  		} else {
   194  			v.error(x.Pos(), "missing production "+x.String)
   195  		}
   196  		// within a lexical production references
   197  		// to non-lexical productions are invalid
   198  		if lexical && !isLexical(x.String) {
   199  			v.error(x.Pos(), "reference to non-lexical production "+x.String)
   200  		}
   201  	case *Token:
   202  		// nothing to do for now
   203  	case *Range:
   204  		i := v.verifyChar(x.Begin)
   205  		j := v.verifyChar(x.End)
   206  		if i >= j {
   207  			v.error(x.Pos(), "decreasing character range")
   208  		}
   209  	case *Group:
   210  		v.verifyExpr(x.Body, lexical)
   211  	case *Option:
   212  		v.verifyExpr(x.Body, lexical)
   213  	case *Repetition:
   214  		v.verifyExpr(x.Body, lexical)
   215  	case *Bad:
   216  		v.error(x.Pos(), x.Error)
   217  	default:
   218  		panic(fmt.Sprintf("internal error: unexpected type %T", expr))
   219  	}
   220  }
   221  
   222  func (v *verifier) verify(grammar Grammar, start string) {
   223  	// find root production
   224  	root, found := grammar[start]
   225  	if !found {
   226  		var noPos scanner.Position
   227  		v.error(noPos, "no start production "+start)
   228  		return
   229  	}
   230  
   231  	// initialize verifier
   232  	v.worklist = v.worklist[0:0]
   233  	v.reached = make(Grammar)
   234  	v.grammar = grammar
   235  
   236  	// work through the worklist
   237  	v.push(root)
   238  	for {
   239  		n := len(v.worklist) - 1
   240  		if n < 0 {
   241  			break
   242  		}
   243  		prod := v.worklist[n]
   244  		v.worklist = v.worklist[0:n]
   245  		v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
   246  	}
   247  
   248  	// check if all productions were reached
   249  	if len(v.reached) < len(v.grammar) {
   250  		for name, prod := range v.grammar {
   251  			if _, found := v.reached[name]; !found {
   252  				v.error(prod.Pos(), name+" is unreachable")
   253  			}
   254  		}
   255  	}
   256  }
   257  
   258  // Verify checks that:
   259  //	- all productions used are defined
   260  //	- all productions defined are used when beginning at start
   261  //	- lexical productions refer only to other lexical productions
   262  //
   263  // Position information is interpreted relative to the file set fset.
   264  //
   265  func Verify(grammar Grammar, start string) error {
   266  	var v verifier
   267  	v.verify(grammar, start)
   268  	return v.errors.Err()
   269  }