github.com/openconfig/goyang@v1.4.5/pkg/yang/parse.go (about) 1 // Copyright 2015 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package yang 16 17 // This file implements Parse, which parses the input as generic YANG and 18 // returns a slice of base Statements (which in turn may contain more 19 // Statements, i.e., a slice of Statement trees.) 20 21 import ( 22 "bytes" 23 "errors" 24 "fmt" 25 "io" 26 "strings" 27 ) 28 29 // a parser is used to parse the contents of a single .yang file. 30 type parser struct { 31 lex *lexer 32 errout *bytes.Buffer 33 tokens []*token // stack of pushed tokens (for backing up) 34 35 // Depth of statements in nested braces 36 statementDepth int 37 38 // hitBrace is returned when we encounter a '}'. The statement location 39 // is updated with the location of the '}'. The brace may be legitimate 40 // but only the caller will know if it is. That is, the brace may be 41 // closing our parent or may be an error (we didn't expect it). 42 // hitBrace is updated with the file, line, and column of the brace's 43 // location. 44 hitBrace *Statement 45 } 46 47 // Statement is a generic YANG statement that may have sub-statements. 48 // It implements the Node interface. 49 // 50 // Within the parser, it represents a non-terminal token. 51 // From https://tools.ietf.org/html/rfc7950#section-6.3: 52 // statement = keyword [argument] (";" / "{" *statement "}") 53 // The argument is a string. 54 type Statement struct { 55 Keyword string 56 HasArgument bool 57 Argument string 58 statements []*Statement 59 60 file string 61 line int // 1's based line number 62 col int // 1's based column number 63 } 64 65 func (s *Statement) NName() string { return s.Argument } 66 func (s *Statement) Kind() string { return s.Keyword } 67 func (s *Statement) Statement() *Statement { return s } 68 func (s *Statement) ParentNode() Node { return nil } 69 func (s *Statement) Exts() []*Statement { return nil } 70 71 // Arg returns the optional argument to s. It returns false if s has no 72 // argument. 73 func (s *Statement) Arg() (string, bool) { return s.Argument, s.HasArgument } 74 75 // SubStatements returns a slice of Statements found in s. 76 func (s *Statement) SubStatements() []*Statement { return s.statements } 77 78 // Location returns the location in the source where s was defined. 79 func (s *Statement) Location() string { 80 switch { 81 case s.file == "" && s.line == 0: 82 return "unknown" 83 case s.file == "": 84 return fmt.Sprintf("line %d:%d", s.line, s.col) 85 case s.line == 0: 86 return s.file 87 default: 88 return fmt.Sprintf("%s:%d:%d", s.file, s.line, s.col) 89 } 90 } 91 92 // Write writes the tree in s to w, each line indented by ident. Children 93 // nodes are indented further by a tab. Typically indent is "" at the top 94 // level. Write is intended to display the contents of Statement, but 95 // not necessarily reproduce the input of Statement. 96 func (s *Statement) Write(w io.Writer, indent string) error { 97 if s.Keyword == "" { 98 // We are just a collection of statements at the top level. 99 for _, s := range s.statements { 100 if err := s.Write(w, indent); err != nil { 101 return err 102 } 103 } 104 return nil 105 } 106 107 parts := []string{fmt.Sprintf("%s%s", indent, s.Keyword)} 108 if s.HasArgument { 109 args := strings.Split(s.Argument, "\n") 110 if len(args) == 1 { 111 parts = append(parts, fmt.Sprintf(" %q", s.Argument)) 112 } else { 113 parts = append(parts, ` "`, args[0], "\n") 114 i := fmt.Sprintf("%*s", len(s.Keyword)+1, "") 115 for x, p := range args[1:] { 116 s := fmt.Sprintf("%q", p) 117 s = s[1 : len(s)-1] 118 parts = append(parts, indent, " ", i, s) 119 if x == len(args[1:])-1 { 120 // last part just needs the closing " 121 parts = append(parts, `"`) 122 } else { 123 parts = append(parts, "\n") 124 } 125 } 126 } 127 } 128 129 if len(s.statements) == 0 { 130 _, err := fmt.Fprintf(w, "%s;\n", strings.Join(parts, "")) 131 return err 132 } 133 if _, err := fmt.Fprintf(w, "%s {\n", strings.Join(parts, "")); err != nil { 134 return err 135 } 136 for _, s := range s.statements { 137 if err := s.Write(w, indent+"\t"); err != nil { 138 return err 139 } 140 } 141 if _, err := fmt.Fprintf(w, "%s}\n", indent); err != nil { 142 return err 143 } 144 return nil 145 } 146 147 // ignoreMe is an error recovery token used by the parser in order 148 // to continue processing for other errors in the file. 149 var ignoreMe = &Statement{} 150 151 // Parse parses the input as generic YANG and returns the statements parsed. 152 // The path parameter should be the source name where input was read from (e.g., 153 // the file name the input was read from). If one more more errors are 154 // encountered, nil and an error are returned. The error's text includes all 155 // errors encountered. 156 func Parse(input, path string) ([]*Statement, error) { 157 var statements []*Statement 158 p := &parser{ 159 lex: newLexer(input, path), 160 errout: &bytes.Buffer{}, 161 hitBrace: &Statement{}, 162 } 163 p.lex.errout = p.errout 164 Loop: 165 for { 166 switch ns := p.nextStatement(); ns { 167 case nil: 168 break Loop 169 case p.hitBrace: 170 fmt.Fprintf(p.errout, "%s:%d:%d: unexpected %c\n", ns.file, ns.line, ns.col, '}') 171 default: 172 statements = append(statements, ns) 173 } 174 } 175 176 p.checkStatementDepthIsZero() 177 178 if p.errout.Len() == 0 { 179 return statements, nil 180 } 181 return nil, errors.New(strings.TrimSpace(p.errout.String())) 182 } 183 184 // push pushes tokens t back on the input stream so they will be the next 185 // tokens returned by next. The tokens list is a LIFO so the final token 186 // listed to push will be the next token returned. 187 func (p *parser) push(t ...*token) { 188 p.tokens = append(p.tokens, t...) 189 } 190 191 // pop returns the last token pushed, or nil if the token stack is empty. 192 func (p *parser) pop() *token { 193 if n := len(p.tokens); n > 0 { 194 n-- 195 defer func() { p.tokens = p.tokens[:n] }() 196 return p.tokens[n] 197 } 198 return nil 199 } 200 201 // next returns the next token from the lexer. If the next token is a 202 // concatenated string, it returns the concatenated string as the token. 203 func (p *parser) next() *token { 204 if t := p.pop(); t != nil { 205 return t 206 } 207 // next returns the next unprocessed lexer token. 208 next := func() *token { 209 for { 210 if t := p.lex.NextToken(); t.Code() != tError { 211 return t 212 } 213 } 214 } 215 t := next() 216 if t.Code() != tString { 217 return t 218 } 219 // Process string concatenation (both single and double quote). 220 // See https://tools.ietf.org/html/rfc7950#section-6.1.3.1 221 // The lexer trimmed the quotes already. 222 for { 223 nt := next() 224 switch nt.Code() { 225 case tEOF: 226 return t 227 case tUnquoted: 228 if nt.Text != "+" { 229 p.push(nt) 230 return t 231 } 232 default: 233 p.push(nt) 234 return t 235 } 236 // Invariant: nt is a + sign. 237 nnt := next() 238 switch nnt.Code() { 239 case tEOF: 240 p.push(nt) 241 return t 242 case tString: 243 // Accumulate the concatenation. 244 t.Text += nnt.Text 245 default: 246 p.push(nnt, nt) 247 return t 248 } 249 } 250 } 251 252 // nextStatement returns the next statement in the input, which may in turn 253 // recurse to read sub statements. 254 // nil is returned when EOF has been reached, or is reached halfway through 255 // parsing the next statement (with associated syntax errors printed to 256 // errout). 257 func (p *parser) nextStatement() *Statement { 258 t := p.next() 259 switch t.Code() { 260 case tEOF: 261 return nil 262 case '}': 263 p.statementDepth -= 1 264 p.hitBrace.file = t.File 265 p.hitBrace.line = t.Line 266 p.hitBrace.col = t.Col 267 return p.hitBrace 268 case tUnquoted: 269 default: 270 fmt.Fprintf(p.errout, "%v: keyword token not an unquoted string\n", t) 271 return ignoreMe 272 } 273 // Invariant: t represents a keyword token. 274 275 s := &Statement{ 276 Keyword: t.Text, 277 file: t.File, 278 line: t.Line, 279 col: t.Col, 280 } 281 282 // The keyword "pattern" must be treated specially. When 283 // parsing the argument for "pattern", escape sequences 284 // must be expanded differently. 285 p.lex.inPattern = t.Text == "pattern" 286 t = p.next() 287 p.lex.inPattern = false 288 switch t.Code() { 289 case tString, tUnquoted: 290 s.HasArgument = true 291 s.Argument = t.Text 292 t = p.next() 293 } 294 295 switch t.Code() { 296 case tEOF: 297 fmt.Fprintf(p.errout, "%s: unexpected EOF\n", s.file) 298 return nil 299 case ';': 300 return s 301 case '{': 302 p.statementDepth += 1 303 for { 304 switch ns := p.nextStatement(); ns { 305 case nil: 306 // Signal EOF reached. 307 return nil 308 case p.hitBrace: 309 return s 310 default: 311 s.statements = append(s.statements, ns) 312 } 313 } 314 default: 315 fmt.Fprintf(p.errout, "%v: syntax error, expected ';' or '{'\n", t) 316 return ignoreMe 317 } 318 } 319 320 // checkStatementDepthIsZero checks that we aren't missing closing 321 // braces. Note: the parser will error out for the case where we 322 // start with an unmatched close brace, i.e. depth < 0 323 // 324 // This test should only be done if there are no other errors as 325 // we may exit early due to those errors -- and therefore there *might* 326 // not really be a mismatched brace issue. 327 func (p *parser) checkStatementDepthIsZero() { 328 if p.errout.Len() > 0 || p.statementDepth == 0 { 329 return 330 } 331 332 plural := "" 333 if p.statementDepth > 1 { 334 plural = "s" 335 } 336 fmt.Fprintf(p.errout, "%s:%d:%d: missing %d closing brace%s\n", 337 p.lex.file, p.lex.line, p.lex.col, p.statementDepth, plural) 338 }