k8s.io/client-go@v0.31.1/util/jsonpath/parser.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package jsonpath 18 19 import ( 20 "errors" 21 "fmt" 22 "regexp" 23 "strconv" 24 "strings" 25 "unicode" 26 "unicode/utf8" 27 ) 28 29 const eof = -1 30 31 const ( 32 leftDelim = "{" 33 rightDelim = "}" 34 ) 35 36 type Parser struct { 37 Name string 38 Root *ListNode 39 input string 40 pos int 41 start int 42 width int 43 } 44 45 var ( 46 ErrSyntax = errors.New("invalid syntax") 47 dictKeyRex = regexp.MustCompile(`^'([^']*)'$`) 48 sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:-?[\d]*)?$`) 49 ) 50 51 // Parse parsed the given text and return a node Parser. 52 // If an error is encountered, parsing stops and an empty 53 // Parser is returned with the error 54 func Parse(name, text string) (*Parser, error) { 55 p := NewParser(name) 56 err := p.Parse(text) 57 if err != nil { 58 p = nil 59 } 60 return p, err 61 } 62 63 func NewParser(name string) *Parser { 64 return &Parser{ 65 Name: name, 66 } 67 } 68 69 // parseAction parsed the expression inside delimiter 70 func parseAction(name, text string) (*Parser, error) { 71 p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim)) 72 // when error happens, p will be nil, so we need to return here 73 if err != nil { 74 return p, err 75 } 76 p.Root = p.Root.Nodes[0].(*ListNode) 77 return p, nil 78 } 79 80 func (p *Parser) Parse(text string) error { 81 p.input = text 82 p.Root = newList() 83 p.pos = 0 84 return p.parseText(p.Root) 85 } 86 87 // consumeText return the parsed text since last cosumeText 88 func (p *Parser) consumeText() string { 89 value := p.input[p.start:p.pos] 90 p.start = p.pos 91 return value 92 } 93 94 // next returns the next rune in the input. 95 func (p *Parser) next() rune { 96 if p.pos >= len(p.input) { 97 p.width = 0 98 return eof 99 } 100 r, w := utf8.DecodeRuneInString(p.input[p.pos:]) 101 p.width = w 102 p.pos += p.width 103 return r 104 } 105 106 // peek returns but does not consume the next rune in the input. 107 func (p *Parser) peek() rune { 108 r := p.next() 109 p.backup() 110 return r 111 } 112 113 // backup steps back one rune. Can only be called once per call of next. 114 func (p *Parser) backup() { 115 p.pos -= p.width 116 } 117 118 func (p *Parser) parseText(cur *ListNode) error { 119 for { 120 if strings.HasPrefix(p.input[p.pos:], leftDelim) { 121 if p.pos > p.start { 122 cur.append(newText(p.consumeText())) 123 } 124 return p.parseLeftDelim(cur) 125 } 126 if p.next() == eof { 127 break 128 } 129 } 130 // Correctly reached EOF. 131 if p.pos > p.start { 132 cur.append(newText(p.consumeText())) 133 } 134 return nil 135 } 136 137 // parseLeftDelim scans the left delimiter, which is known to be present. 138 func (p *Parser) parseLeftDelim(cur *ListNode) error { 139 p.pos += len(leftDelim) 140 p.consumeText() 141 newNode := newList() 142 cur.append(newNode) 143 cur = newNode 144 return p.parseInsideAction(cur) 145 } 146 147 func (p *Parser) parseInsideAction(cur *ListNode) error { 148 prefixMap := map[string]func(*ListNode) error{ 149 rightDelim: p.parseRightDelim, 150 "[?(": p.parseFilter, 151 "..": p.parseRecursive, 152 } 153 for prefix, parseFunc := range prefixMap { 154 if strings.HasPrefix(p.input[p.pos:], prefix) { 155 return parseFunc(cur) 156 } 157 } 158 159 switch r := p.next(); { 160 case r == eof || isEndOfLine(r): 161 return fmt.Errorf("unclosed action") 162 case r == ' ': 163 p.consumeText() 164 case r == '@' || r == '$': //the current object, just pass it 165 p.consumeText() 166 case r == '[': 167 return p.parseArray(cur) 168 case r == '"' || r == '\'': 169 return p.parseQuote(cur, r) 170 case r == '.': 171 return p.parseField(cur) 172 case r == '+' || r == '-' || unicode.IsDigit(r): 173 p.backup() 174 return p.parseNumber(cur) 175 case isAlphaNumeric(r): 176 p.backup() 177 return p.parseIdentifier(cur) 178 default: 179 return fmt.Errorf("unrecognized character in action: %#U", r) 180 } 181 return p.parseInsideAction(cur) 182 } 183 184 // parseRightDelim scans the right delimiter, which is known to be present. 185 func (p *Parser) parseRightDelim(cur *ListNode) error { 186 p.pos += len(rightDelim) 187 p.consumeText() 188 return p.parseText(p.Root) 189 } 190 191 // parseIdentifier scans build-in keywords, like "range" "end" 192 func (p *Parser) parseIdentifier(cur *ListNode) error { 193 var r rune 194 for { 195 r = p.next() 196 if isTerminator(r) { 197 p.backup() 198 break 199 } 200 } 201 value := p.consumeText() 202 203 if isBool(value) { 204 v, err := strconv.ParseBool(value) 205 if err != nil { 206 return fmt.Errorf("can not parse bool '%s': %s", value, err.Error()) 207 } 208 209 cur.append(newBool(v)) 210 } else { 211 cur.append(newIdentifier(value)) 212 } 213 214 return p.parseInsideAction(cur) 215 } 216 217 // parseRecursive scans the recursive descent operator .. 218 func (p *Parser) parseRecursive(cur *ListNode) error { 219 if lastIndex := len(cur.Nodes) - 1; lastIndex >= 0 && cur.Nodes[lastIndex].Type() == NodeRecursive { 220 return fmt.Errorf("invalid multiple recursive descent") 221 } 222 p.pos += len("..") 223 p.consumeText() 224 cur.append(newRecursive()) 225 if r := p.peek(); isAlphaNumeric(r) { 226 return p.parseField(cur) 227 } 228 return p.parseInsideAction(cur) 229 } 230 231 // parseNumber scans number 232 func (p *Parser) parseNumber(cur *ListNode) error { 233 r := p.peek() 234 if r == '+' || r == '-' { 235 p.next() 236 } 237 for { 238 r = p.next() 239 if r != '.' && !unicode.IsDigit(r) { 240 p.backup() 241 break 242 } 243 } 244 value := p.consumeText() 245 i, err := strconv.Atoi(value) 246 if err == nil { 247 cur.append(newInt(i)) 248 return p.parseInsideAction(cur) 249 } 250 d, err := strconv.ParseFloat(value, 64) 251 if err == nil { 252 cur.append(newFloat(d)) 253 return p.parseInsideAction(cur) 254 } 255 return fmt.Errorf("cannot parse number %s", value) 256 } 257 258 // parseArray scans array index selection 259 func (p *Parser) parseArray(cur *ListNode) error { 260 Loop: 261 for { 262 switch p.next() { 263 case eof, '\n': 264 return fmt.Errorf("unterminated array") 265 case ']': 266 break Loop 267 } 268 } 269 text := p.consumeText() 270 text = text[1 : len(text)-1] 271 if text == "*" { 272 text = ":" 273 } 274 275 //union operator 276 strs := strings.Split(text, ",") 277 if len(strs) > 1 { 278 union := []*ListNode{} 279 for _, str := range strs { 280 parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " "))) 281 if err != nil { 282 return err 283 } 284 union = append(union, parser.Root) 285 } 286 cur.append(newUnion(union)) 287 return p.parseInsideAction(cur) 288 } 289 290 // dict key 291 value := dictKeyRex.FindStringSubmatch(text) 292 if value != nil { 293 parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1])) 294 if err != nil { 295 return err 296 } 297 for _, node := range parser.Root.Nodes { 298 cur.append(node) 299 } 300 return p.parseInsideAction(cur) 301 } 302 303 //slice operator 304 value = sliceOperatorRex.FindStringSubmatch(text) 305 if value == nil { 306 return fmt.Errorf("invalid array index %s", text) 307 } 308 value = value[1:] 309 params := [3]ParamsEntry{} 310 for i := 0; i < 3; i++ { 311 if value[i] != "" { 312 if i > 0 { 313 value[i] = value[i][1:] 314 } 315 if i > 0 && value[i] == "" { 316 params[i].Known = false 317 } else { 318 var err error 319 params[i].Known = true 320 params[i].Value, err = strconv.Atoi(value[i]) 321 if err != nil { 322 return fmt.Errorf("array index %s is not a number", value[i]) 323 } 324 } 325 } else { 326 if i == 1 { 327 params[i].Known = true 328 params[i].Value = params[0].Value + 1 329 params[i].Derived = true 330 } else { 331 params[i].Known = false 332 params[i].Value = 0 333 } 334 } 335 } 336 cur.append(newArray(params)) 337 return p.parseInsideAction(cur) 338 } 339 340 // parseFilter scans filter inside array selection 341 func (p *Parser) parseFilter(cur *ListNode) error { 342 p.pos += len("[?(") 343 p.consumeText() 344 begin := false 345 end := false 346 var pair rune 347 348 Loop: 349 for { 350 r := p.next() 351 switch r { 352 case eof, '\n': 353 return fmt.Errorf("unterminated filter") 354 case '"', '\'': 355 if begin == false { 356 //save the paired rune 357 begin = true 358 pair = r 359 continue 360 } 361 //only add when met paired rune 362 if p.input[p.pos-2] != '\\' && r == pair { 363 end = true 364 } 365 case ')': 366 //in rightParser below quotes only appear zero or once 367 //and must be paired at the beginning and end 368 if begin == end { 369 break Loop 370 } 371 } 372 } 373 if p.next() != ']' { 374 return fmt.Errorf("unclosed array expect ]") 375 } 376 reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`) 377 text := p.consumeText() 378 text = text[:len(text)-2] 379 value := reg.FindStringSubmatch(text) 380 if value == nil { 381 parser, err := parseAction("text", text) 382 if err != nil { 383 return err 384 } 385 cur.append(newFilter(parser.Root, newList(), "exists")) 386 } else { 387 leftParser, err := parseAction("left", value[1]) 388 if err != nil { 389 return err 390 } 391 rightParser, err := parseAction("right", value[3]) 392 if err != nil { 393 return err 394 } 395 cur.append(newFilter(leftParser.Root, rightParser.Root, value[2])) 396 } 397 return p.parseInsideAction(cur) 398 } 399 400 // parseQuote unquotes string inside double or single quote 401 func (p *Parser) parseQuote(cur *ListNode, end rune) error { 402 Loop: 403 for { 404 switch p.next() { 405 case eof, '\n': 406 return fmt.Errorf("unterminated quoted string") 407 case end: 408 //if it's not escape break the Loop 409 if p.input[p.pos-2] != '\\' { 410 break Loop 411 } 412 } 413 } 414 value := p.consumeText() 415 s, err := UnquoteExtend(value) 416 if err != nil { 417 return fmt.Errorf("unquote string %s error %v", value, err) 418 } 419 cur.append(newText(s)) 420 return p.parseInsideAction(cur) 421 } 422 423 // parseField scans a field until a terminator 424 func (p *Parser) parseField(cur *ListNode) error { 425 p.consumeText() 426 for p.advance() { 427 } 428 value := p.consumeText() 429 if value == "*" { 430 cur.append(newWildcard()) 431 } else { 432 cur.append(newField(strings.Replace(value, "\\", "", -1))) 433 } 434 return p.parseInsideAction(cur) 435 } 436 437 // advance scans until next non-escaped terminator 438 func (p *Parser) advance() bool { 439 r := p.next() 440 if r == '\\' { 441 p.next() 442 } else if isTerminator(r) { 443 p.backup() 444 return false 445 } 446 return true 447 } 448 449 // isTerminator reports whether the input is at valid termination character to appear after an identifier. 450 func isTerminator(r rune) bool { 451 if isSpace(r) || isEndOfLine(r) { 452 return true 453 } 454 switch r { 455 case eof, '.', ',', '[', ']', '$', '@', '{', '}': 456 return true 457 } 458 return false 459 } 460 461 // isSpace reports whether r is a space character. 462 func isSpace(r rune) bool { 463 return r == ' ' || r == '\t' 464 } 465 466 // isEndOfLine reports whether r is an end-of-line character. 467 func isEndOfLine(r rune) bool { 468 return r == '\r' || r == '\n' 469 } 470 471 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 472 func isAlphaNumeric(r rune) bool { 473 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 474 } 475 476 // isBool reports whether s is a boolean value. 477 func isBool(s string) bool { 478 return s == "true" || s == "false" 479 } 480 481 // UnquoteExtend is almost same as strconv.Unquote(), but it support parse single quotes as a string 482 func UnquoteExtend(s string) (string, error) { 483 n := len(s) 484 if n < 2 { 485 return "", ErrSyntax 486 } 487 quote := s[0] 488 if quote != s[n-1] { 489 return "", ErrSyntax 490 } 491 s = s[1 : n-1] 492 493 if quote != '"' && quote != '\'' { 494 return "", ErrSyntax 495 } 496 497 // Is it trivial? Avoid allocation. 498 if !contains(s, '\\') && !contains(s, quote) { 499 return s, nil 500 } 501 502 var runeTmp [utf8.UTFMax]byte 503 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 504 for len(s) > 0 { 505 c, multibyte, ss, err := strconv.UnquoteChar(s, quote) 506 if err != nil { 507 return "", err 508 } 509 s = ss 510 if c < utf8.RuneSelf || !multibyte { 511 buf = append(buf, byte(c)) 512 } else { 513 n := utf8.EncodeRune(runeTmp[:], c) 514 buf = append(buf, runeTmp[:n]...) 515 } 516 } 517 return string(buf), nil 518 } 519 520 func contains(s string, c byte) bool { 521 for i := 0; i < len(s); i++ { 522 if s[i] == c { 523 return true 524 } 525 } 526 return false 527 }