github.com/openconfig/goyang@v1.4.5/pkg/yang/parse.go (about)

     1  // Copyright 2015 Google Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package yang
    16  
    17  // This file implements Parse, which  parses the input as generic YANG and
    18  // returns a slice of base Statements (which in turn may contain more
    19  // Statements, i.e., a slice of Statement trees.)
    20  
    21  import (
    22  	"bytes"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"strings"
    27  )
    28  
    29  // a parser is used to parse the contents of a single .yang file.
    30  type parser struct {
    31  	lex    *lexer
    32  	errout *bytes.Buffer
    33  	tokens []*token // stack of pushed tokens (for backing up)
    34  
    35  	// Depth of statements in nested braces
    36  	statementDepth int
    37  
    38  	// hitBrace is returned when we encounter a '}'.  The statement location
    39  	// is updated with the location of the '}'.  The brace may be legitimate
    40  	// but only the caller will know if it is.  That is, the brace may be
    41  	// closing our parent or may be an error (we didn't expect it).
    42  	// hitBrace is updated with the file, line, and column of the brace's
    43  	// location.
    44  	hitBrace *Statement
    45  }
    46  
    47  // Statement is a generic YANG statement that may have sub-statements.
    48  // It implements the Node interface.
    49  //
    50  // Within the parser, it represents a non-terminal token.
    51  // From https://tools.ietf.org/html/rfc7950#section-6.3:
    52  // statement = keyword [argument] (";" / "{" *statement "}")
    53  // The argument is a string.
    54  type Statement struct {
    55  	Keyword     string
    56  	HasArgument bool
    57  	Argument    string
    58  	statements  []*Statement
    59  
    60  	file string
    61  	line int // 1's based line number
    62  	col  int // 1's based column number
    63  }
    64  
    65  func (s *Statement) NName() string         { return s.Argument }
    66  func (s *Statement) Kind() string          { return s.Keyword }
    67  func (s *Statement) Statement() *Statement { return s }
    68  func (s *Statement) ParentNode() Node      { return nil }
    69  func (s *Statement) Exts() []*Statement    { return nil }
    70  
    71  // Arg returns the optional argument to s.  It returns false if s has no
    72  // argument.
    73  func (s *Statement) Arg() (string, bool) { return s.Argument, s.HasArgument }
    74  
    75  // SubStatements returns a slice of Statements found in s.
    76  func (s *Statement) SubStatements() []*Statement { return s.statements }
    77  
    78  // Location returns the location in the source where s was defined.
    79  func (s *Statement) Location() string {
    80  	switch {
    81  	case s.file == "" && s.line == 0:
    82  		return "unknown"
    83  	case s.file == "":
    84  		return fmt.Sprintf("line %d:%d", s.line, s.col)
    85  	case s.line == 0:
    86  		return s.file
    87  	default:
    88  		return fmt.Sprintf("%s:%d:%d", s.file, s.line, s.col)
    89  	}
    90  }
    91  
    92  // Write writes the tree in s to w, each line indented by ident.  Children
    93  // nodes are indented further by a tab.  Typically indent is "" at the top
    94  // level.  Write is intended to display the contents of Statement, but
    95  // not necessarily reproduce the input of Statement.
    96  func (s *Statement) Write(w io.Writer, indent string) error {
    97  	if s.Keyword == "" {
    98  		// We are just a collection of statements at the top level.
    99  		for _, s := range s.statements {
   100  			if err := s.Write(w, indent); err != nil {
   101  				return err
   102  			}
   103  		}
   104  		return nil
   105  	}
   106  
   107  	parts := []string{fmt.Sprintf("%s%s", indent, s.Keyword)}
   108  	if s.HasArgument {
   109  		args := strings.Split(s.Argument, "\n")
   110  		if len(args) == 1 {
   111  			parts = append(parts, fmt.Sprintf(" %q", s.Argument))
   112  		} else {
   113  			parts = append(parts, ` "`, args[0], "\n")
   114  			i := fmt.Sprintf("%*s", len(s.Keyword)+1, "")
   115  			for x, p := range args[1:] {
   116  				s := fmt.Sprintf("%q", p)
   117  				s = s[1 : len(s)-1]
   118  				parts = append(parts, indent, " ", i, s)
   119  				if x == len(args[1:])-1 {
   120  					// last part just needs the closing "
   121  					parts = append(parts, `"`)
   122  				} else {
   123  					parts = append(parts, "\n")
   124  				}
   125  			}
   126  		}
   127  	}
   128  
   129  	if len(s.statements) == 0 {
   130  		_, err := fmt.Fprintf(w, "%s;\n", strings.Join(parts, ""))
   131  		return err
   132  	}
   133  	if _, err := fmt.Fprintf(w, "%s {\n", strings.Join(parts, "")); err != nil {
   134  		return err
   135  	}
   136  	for _, s := range s.statements {
   137  		if err := s.Write(w, indent+"\t"); err != nil {
   138  			return err
   139  		}
   140  	}
   141  	if _, err := fmt.Fprintf(w, "%s}\n", indent); err != nil {
   142  		return err
   143  	}
   144  	return nil
   145  }
   146  
   147  // ignoreMe is an error recovery token used by the parser in order
   148  // to continue processing for other errors in the file.
   149  var ignoreMe = &Statement{}
   150  
   151  // Parse parses the input as generic YANG and returns the statements parsed.
   152  // The path parameter should be the source name where input was read from (e.g.,
   153  // the file name the input was read from).  If one more more errors are
   154  // encountered, nil and an error are returned.  The error's text includes all
   155  // errors encountered.
   156  func Parse(input, path string) ([]*Statement, error) {
   157  	var statements []*Statement
   158  	p := &parser{
   159  		lex:      newLexer(input, path),
   160  		errout:   &bytes.Buffer{},
   161  		hitBrace: &Statement{},
   162  	}
   163  	p.lex.errout = p.errout
   164  Loop:
   165  	for {
   166  		switch ns := p.nextStatement(); ns {
   167  		case nil:
   168  			break Loop
   169  		case p.hitBrace:
   170  			fmt.Fprintf(p.errout, "%s:%d:%d: unexpected %c\n", ns.file, ns.line, ns.col, '}')
   171  		default:
   172  			statements = append(statements, ns)
   173  		}
   174  	}
   175  
   176  	p.checkStatementDepthIsZero()
   177  
   178  	if p.errout.Len() == 0 {
   179  		return statements, nil
   180  	}
   181  	return nil, errors.New(strings.TrimSpace(p.errout.String()))
   182  }
   183  
   184  // push pushes tokens t back on the input stream so they will be the next
   185  // tokens returned by next.  The tokens list is a LIFO so the final token
   186  // listed to push will be the next token returned.
   187  func (p *parser) push(t ...*token) {
   188  	p.tokens = append(p.tokens, t...)
   189  }
   190  
   191  // pop returns the last token pushed, or nil if the token stack is empty.
   192  func (p *parser) pop() *token {
   193  	if n := len(p.tokens); n > 0 {
   194  		n--
   195  		defer func() { p.tokens = p.tokens[:n] }()
   196  		return p.tokens[n]
   197  	}
   198  	return nil
   199  }
   200  
   201  // next returns the next token from the lexer. If the next token is a
   202  // concatenated string, it returns the concatenated string as the token.
   203  func (p *parser) next() *token {
   204  	if t := p.pop(); t != nil {
   205  		return t
   206  	}
   207  	// next returns the next unprocessed lexer token.
   208  	next := func() *token {
   209  		for {
   210  			if t := p.lex.NextToken(); t.Code() != tError {
   211  				return t
   212  			}
   213  		}
   214  	}
   215  	t := next()
   216  	if t.Code() != tString {
   217  		return t
   218  	}
   219  	// Process string concatenation (both single and double quote).
   220  	// See https://tools.ietf.org/html/rfc7950#section-6.1.3.1
   221  	// The lexer trimmed the quotes already.
   222  	for {
   223  		nt := next()
   224  		switch nt.Code() {
   225  		case tEOF:
   226  			return t
   227  		case tUnquoted:
   228  			if nt.Text != "+" {
   229  				p.push(nt)
   230  				return t
   231  			}
   232  		default:
   233  			p.push(nt)
   234  			return t
   235  		}
   236  		// Invariant: nt is a + sign.
   237  		nnt := next()
   238  		switch nnt.Code() {
   239  		case tEOF:
   240  			p.push(nt)
   241  			return t
   242  		case tString:
   243  			// Accumulate the concatenation.
   244  			t.Text += nnt.Text
   245  		default:
   246  			p.push(nnt, nt)
   247  			return t
   248  		}
   249  	}
   250  }
   251  
   252  // nextStatement returns the next statement in the input, which may in turn
   253  // recurse to read sub statements.
   254  // nil is returned when EOF has been reached, or is reached halfway through
   255  // parsing the next statement (with associated syntax errors printed to
   256  // errout).
   257  func (p *parser) nextStatement() *Statement {
   258  	t := p.next()
   259  	switch t.Code() {
   260  	case tEOF:
   261  		return nil
   262  	case '}':
   263  		p.statementDepth -= 1
   264  		p.hitBrace.file = t.File
   265  		p.hitBrace.line = t.Line
   266  		p.hitBrace.col = t.Col
   267  		return p.hitBrace
   268  	case tUnquoted:
   269  	default:
   270  		fmt.Fprintf(p.errout, "%v: keyword token not an unquoted string\n", t)
   271  		return ignoreMe
   272  	}
   273  	// Invariant: t represents a keyword token.
   274  
   275  	s := &Statement{
   276  		Keyword: t.Text,
   277  		file:    t.File,
   278  		line:    t.Line,
   279  		col:     t.Col,
   280  	}
   281  
   282  	// The keyword "pattern" must be treated specially. When
   283  	// parsing the argument for "pattern", escape sequences
   284  	// must be expanded differently.
   285  	p.lex.inPattern = t.Text == "pattern"
   286  	t = p.next()
   287  	p.lex.inPattern = false
   288  	switch t.Code() {
   289  	case tString, tUnquoted:
   290  		s.HasArgument = true
   291  		s.Argument = t.Text
   292  		t = p.next()
   293  	}
   294  
   295  	switch t.Code() {
   296  	case tEOF:
   297  		fmt.Fprintf(p.errout, "%s: unexpected EOF\n", s.file)
   298  		return nil
   299  	case ';':
   300  		return s
   301  	case '{':
   302  		p.statementDepth += 1
   303  		for {
   304  			switch ns := p.nextStatement(); ns {
   305  			case nil:
   306  				// Signal EOF reached.
   307  				return nil
   308  			case p.hitBrace:
   309  				return s
   310  			default:
   311  				s.statements = append(s.statements, ns)
   312  			}
   313  		}
   314  	default:
   315  		fmt.Fprintf(p.errout, "%v: syntax error, expected ';' or '{'\n", t)
   316  		return ignoreMe
   317  	}
   318  }
   319  
   320  // checkStatementDepthIsZero checks that we aren't missing closing
   321  // braces. Note: the parser will error out for the case where we
   322  // start with an unmatched close brace, i.e. depth < 0
   323  //
   324  // This test should only be done if there are no other errors as
   325  // we may exit early due to those errors -- and therefore there *might*
   326  // not really be a mismatched brace issue.
   327  func (p *parser) checkStatementDepthIsZero() {
   328  	if p.errout.Len() > 0 || p.statementDepth == 0 {
   329  		return
   330  	}
   331  
   332  	plural := ""
   333  	if p.statementDepth > 1 {
   334  		plural = "s"
   335  	}
   336  	fmt.Fprintf(p.errout, "%s:%d:%d: missing %d closing brace%s\n",
   337  		p.lex.file, p.lex.line, p.lex.col, p.statementDepth, plural)
   338  }