github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/k8s/jsonpath/parser.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package jsonpath
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"regexp"
    23  	"strconv"
    24  	"strings"
    25  	"unicode"
    26  	"unicode/utf8"
    27  )
    28  
    29  const eof = -1
    30  
    31  const (
    32  	leftDelim  = "{"
    33  	rightDelim = "}"
    34  )
    35  
    36  type Parser struct {
    37  	Name  string
    38  	Root  *ListNode
    39  	input string
    40  	pos   int
    41  	start int
    42  	width int
    43  }
    44  
    45  var (
    46  	ErrSyntax        = errors.New("invalid syntax")
    47  	dictKeyRex       = regexp.MustCompile(`^'([^']*)'$`)
    48  	sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:-?[\d]*)?$`)
    49  )
    50  
    51  // Parse parsed the given text and return a node Parser.
    52  // If an error is encountered, parsing stops and an empty
    53  // Parser is returned with the error
    54  func Parse(name, text string) (*Parser, error) {
    55  	p := NewParser(name)
    56  	err := p.Parse(text)
    57  	if err != nil {
    58  		p = nil
    59  	}
    60  	return p, err
    61  }
    62  
    63  func NewParser(name string) *Parser {
    64  	return &Parser{
    65  		Name: name,
    66  	}
    67  }
    68  
    69  // parseAction parsed the expression inside delimiter
    70  func parseAction(name, text string) (*Parser, error) {
    71  	p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim))
    72  	// when error happens, p will be nil, so we need to return here
    73  	if err != nil {
    74  		return p, err
    75  	}
    76  	p.Root = p.Root.Nodes[0].(*ListNode)
    77  	return p, nil
    78  }
    79  
    80  func (p *Parser) Parse(text string) error {
    81  	p.input = text
    82  	p.Root = newList()
    83  	p.pos = 0
    84  	return p.parseText(p.Root)
    85  }
    86  
    87  // consumeText return the parsed text since last cosumeText
    88  func (p *Parser) consumeText() string {
    89  	value := p.input[p.start:p.pos]
    90  	p.start = p.pos
    91  	return value
    92  }
    93  
    94  // next returns the next rune in the input.
    95  func (p *Parser) next() rune {
    96  	if p.pos >= len(p.input) {
    97  		p.width = 0
    98  		return eof
    99  	}
   100  	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
   101  	p.width = w
   102  	p.pos += p.width
   103  	return r
   104  }
   105  
   106  // peek returns but does not consume the next rune in the input.
   107  func (p *Parser) peek() rune {
   108  	r := p.next()
   109  	p.backup()
   110  	return r
   111  }
   112  
   113  // backup steps back one rune. Can only be called once per call of next.
   114  func (p *Parser) backup() {
   115  	p.pos -= p.width
   116  }
   117  
   118  func (p *Parser) parseText(cur *ListNode) error {
   119  	for {
   120  		if strings.HasPrefix(p.input[p.pos:], leftDelim) {
   121  			if p.pos > p.start {
   122  				cur.append(newText(p.consumeText()))
   123  			}
   124  			return p.parseLeftDelim(cur)
   125  		}
   126  		if p.next() == eof {
   127  			break
   128  		}
   129  	}
   130  	// Correctly reached EOF.
   131  	if p.pos > p.start {
   132  		cur.append(newText(p.consumeText()))
   133  	}
   134  	return nil
   135  }
   136  
   137  // parseLeftDelim scans the left delimiter, which is known to be present.
   138  func (p *Parser) parseLeftDelim(cur *ListNode) error {
   139  	p.pos += len(leftDelim)
   140  	p.consumeText()
   141  	newNode := newList()
   142  	cur.append(newNode)
   143  	cur = newNode
   144  	return p.parseInsideAction(cur)
   145  }
   146  
   147  func (p *Parser) parseInsideAction(cur *ListNode) error {
   148  	prefixMap := map[string]func(*ListNode) error{
   149  		rightDelim: p.parseRightDelim,
   150  		"[?(":      p.parseFilter,
   151  		"..":       p.parseRecursive,
   152  	}
   153  	for prefix, parseFunc := range prefixMap {
   154  		if strings.HasPrefix(p.input[p.pos:], prefix) {
   155  			return parseFunc(cur)
   156  		}
   157  	}
   158  
   159  	switch r := p.next(); {
   160  	case r == eof || isEndOfLine(r):
   161  		return fmt.Errorf("unclosed action")
   162  	case r == ' ':
   163  		p.consumeText()
   164  	case r == '@' || r == '$': //the current object, just pass it
   165  		p.consumeText()
   166  	case r == '[':
   167  		return p.parseArray(cur)
   168  	case r == '"' || r == '\'':
   169  		return p.parseQuote(cur, r)
   170  	case r == '.':
   171  		return p.parseField(cur)
   172  	case r == '+' || r == '-' || unicode.IsDigit(r):
   173  		p.backup()
   174  		return p.parseNumber(cur)
   175  	case isAlphaNumeric(r):
   176  		p.backup()
   177  		return p.parseIdentifier(cur)
   178  	default:
   179  		return fmt.Errorf("unrecognized character in action: %#U", r)
   180  	}
   181  	return p.parseInsideAction(cur)
   182  }
   183  
   184  // parseRightDelim scans the right delimiter, which is known to be present.
   185  func (p *Parser) parseRightDelim(cur *ListNode) error {
   186  	p.pos += len(rightDelim)
   187  	p.consumeText()
   188  	return p.parseText(p.Root)
   189  }
   190  
   191  // parseIdentifier scans build-in keywords, like "range" "end"
   192  func (p *Parser) parseIdentifier(cur *ListNode) error {
   193  	var r rune
   194  	for {
   195  		r = p.next()
   196  		if isTerminator(r) {
   197  			p.backup()
   198  			break
   199  		}
   200  	}
   201  	value := p.consumeText()
   202  
   203  	if isBool(value) {
   204  		v, err := strconv.ParseBool(value)
   205  		if err != nil {
   206  			return fmt.Errorf("can not parse bool '%s': %s", value, err.Error())
   207  		}
   208  
   209  		cur.append(newBool(v))
   210  	} else {
   211  		cur.append(newIdentifier(value))
   212  	}
   213  
   214  	return p.parseInsideAction(cur)
   215  }
   216  
   217  // parseRecursive scans the recursive desent operator ..
   218  func (p *Parser) parseRecursive(cur *ListNode) error {
   219  	p.pos += len("..")
   220  	p.consumeText()
   221  	cur.append(newRecursive())
   222  	if r := p.peek(); isAlphaNumeric(r) {
   223  		return p.parseField(cur)
   224  	}
   225  	return p.parseInsideAction(cur)
   226  }
   227  
   228  // parseNumber scans number
   229  func (p *Parser) parseNumber(cur *ListNode) error {
   230  	r := p.peek()
   231  	if r == '+' || r == '-' {
   232  		p.next()
   233  	}
   234  	for {
   235  		r = p.next()
   236  		if r != '.' && !unicode.IsDigit(r) {
   237  			p.backup()
   238  			break
   239  		}
   240  	}
   241  	value := p.consumeText()
   242  	i, err := strconv.Atoi(value)
   243  	if err == nil {
   244  		cur.append(newInt(i))
   245  		return p.parseInsideAction(cur)
   246  	}
   247  	d, err := strconv.ParseFloat(value, 64)
   248  	if err == nil {
   249  		cur.append(newFloat(d))
   250  		return p.parseInsideAction(cur)
   251  	}
   252  	return fmt.Errorf("cannot parse number %s", value)
   253  }
   254  
   255  // parseArray scans array index selection
   256  func (p *Parser) parseArray(cur *ListNode) error {
   257  Loop:
   258  	for {
   259  		switch p.next() {
   260  		case eof, '\n':
   261  			return fmt.Errorf("unterminated array")
   262  		case ']':
   263  			break Loop
   264  		}
   265  	}
   266  	text := p.consumeText()
   267  	text = text[1 : len(text)-1]
   268  	if text == "*" {
   269  		text = ":"
   270  	}
   271  
   272  	//union operator
   273  	strs := strings.Split(text, ",")
   274  	if len(strs) > 1 {
   275  		union := []*ListNode{}
   276  		for _, str := range strs {
   277  			parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " ")))
   278  			if err != nil {
   279  				return err
   280  			}
   281  			union = append(union, parser.Root)
   282  		}
   283  		cur.append(newUnion(union))
   284  		return p.parseInsideAction(cur)
   285  	}
   286  
   287  	// dict key
   288  	value := dictKeyRex.FindStringSubmatch(text)
   289  	if value != nil {
   290  		parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1]))
   291  		if err != nil {
   292  			return err
   293  		}
   294  		for _, node := range parser.Root.Nodes {
   295  			cur.append(node)
   296  		}
   297  		return p.parseInsideAction(cur)
   298  	}
   299  
   300  	//slice operator
   301  	value = sliceOperatorRex.FindStringSubmatch(text)
   302  	if value == nil {
   303  		return fmt.Errorf("invalid array index %s", text)
   304  	}
   305  	value = value[1:]
   306  	params := [3]ParamsEntry{}
   307  	for i := 0; i < 3; i++ {
   308  		if value[i] != "" {
   309  			if i > 0 {
   310  				value[i] = value[i][1:]
   311  			}
   312  			if i > 0 && value[i] == "" {
   313  				params[i].Known = false
   314  			} else {
   315  				var err error
   316  				params[i].Known = true
   317  				params[i].Value, err = strconv.Atoi(value[i])
   318  				if err != nil {
   319  					return fmt.Errorf("array index %s is not a number", value[i])
   320  				}
   321  			}
   322  		} else {
   323  			if i == 1 {
   324  				params[i].Known = true
   325  				params[i].Value = params[0].Value + 1
   326  				params[i].Derived = true
   327  			} else {
   328  				params[i].Known = false
   329  				params[i].Value = 0
   330  			}
   331  		}
   332  	}
   333  	cur.append(newArray(params))
   334  	return p.parseInsideAction(cur)
   335  }
   336  
   337  // parseFilter scans filter inside array selection
   338  func (p *Parser) parseFilter(cur *ListNode) error {
   339  	p.pos += len("[?(")
   340  	p.consumeText()
   341  	begin := false
   342  	end := false
   343  	var pair rune
   344  
   345  Loop:
   346  	for {
   347  		r := p.next()
   348  		switch r {
   349  		case eof, '\n':
   350  			return fmt.Errorf("unterminated filter")
   351  		case '"', '\'':
   352  			if !begin {
   353  				//save the paired rune
   354  				begin = true
   355  				pair = r
   356  				continue
   357  			}
   358  			//only add when met paired rune
   359  			if p.input[p.pos-2] != '\\' && r == pair {
   360  				end = true
   361  			}
   362  		case ')':
   363  			//in rightParser below quotes only appear zero or once
   364  			//and must be paired at the beginning and end
   365  			if begin == end {
   366  				break Loop
   367  			}
   368  		}
   369  	}
   370  	if p.next() != ']' {
   371  		return fmt.Errorf("unclosed array expect ]")
   372  	}
   373  	reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`)
   374  	text := p.consumeText()
   375  	text = text[:len(text)-2]
   376  	value := reg.FindStringSubmatch(text)
   377  	if value == nil {
   378  		parser, err := parseAction("text", text)
   379  		if err != nil {
   380  			return err
   381  		}
   382  		cur.append(newFilter(parser.Root, newList(), "exists"))
   383  	} else {
   384  		leftParser, err := parseAction("left", value[1])
   385  		if err != nil {
   386  			return err
   387  		}
   388  		rightParser, err := parseAction("right", value[3])
   389  		if err != nil {
   390  			return err
   391  		}
   392  		cur.append(newFilter(leftParser.Root, rightParser.Root, value[2]))
   393  	}
   394  	return p.parseInsideAction(cur)
   395  }
   396  
   397  // parseQuote unquotes string inside double or single quote
   398  func (p *Parser) parseQuote(cur *ListNode, end rune) error {
   399  Loop:
   400  	for {
   401  		switch p.next() {
   402  		case eof, '\n':
   403  			return fmt.Errorf("unterminated quoted string")
   404  		case end:
   405  			//if it's not escape break the Loop
   406  			if p.input[p.pos-2] != '\\' {
   407  				break Loop
   408  			}
   409  		}
   410  	}
   411  	value := p.consumeText()
   412  	s, err := UnquoteExtend(value)
   413  	if err != nil {
   414  		return fmt.Errorf("unquote string %s error %v", value, err)
   415  	}
   416  	cur.append(newText(s))
   417  	return p.parseInsideAction(cur)
   418  }
   419  
   420  // parseField scans a field until a terminator
   421  func (p *Parser) parseField(cur *ListNode) error {
   422  	p.consumeText()
   423  	for p.advance() {
   424  	}
   425  	value := p.consumeText()
   426  	if value == "*" {
   427  		cur.append(newWildcard())
   428  	} else {
   429  		cur.append(newField(strings.ReplaceAll(value, "\\", "")))
   430  	}
   431  	return p.parseInsideAction(cur)
   432  }
   433  
   434  // advance scans until next non-escaped terminator
   435  func (p *Parser) advance() bool {
   436  	r := p.next()
   437  	if r == '\\' {
   438  		p.next()
   439  	} else if isTerminator(r) {
   440  		p.backup()
   441  		return false
   442  	}
   443  	return true
   444  }
   445  
   446  // isTerminator reports whether the input is at valid termination character to appear after an identifier.
   447  func isTerminator(r rune) bool {
   448  	if isSpace(r) || isEndOfLine(r) {
   449  		return true
   450  	}
   451  	switch r {
   452  	case eof, '.', ',', '[', ']', '$', '@', '{', '}':
   453  		return true
   454  	}
   455  	return false
   456  }
   457  
   458  // isSpace reports whether r is a space character.
   459  func isSpace(r rune) bool {
   460  	return r == ' ' || r == '\t'
   461  }
   462  
   463  // isEndOfLine reports whether r is an end-of-line character.
   464  func isEndOfLine(r rune) bool {
   465  	return r == '\r' || r == '\n'
   466  }
   467  
   468  // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
   469  func isAlphaNumeric(r rune) bool {
   470  	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
   471  }
   472  
   473  // isBool reports whether s is a boolean value.
   474  func isBool(s string) bool {
   475  	return s == "true" || s == "false"
   476  }
   477  
   478  // UnquoteExtend is almost same as strconv.Unquote(), but it support parse single quotes as a string
   479  func UnquoteExtend(s string) (string, error) {
   480  	n := len(s)
   481  	if n < 2 {
   482  		return "", ErrSyntax
   483  	}
   484  	quote := s[0]
   485  	if quote != s[n-1] {
   486  		return "", ErrSyntax
   487  	}
   488  	s = s[1 : n-1]
   489  
   490  	if quote != '"' && quote != '\'' {
   491  		return "", ErrSyntax
   492  	}
   493  
   494  	// Is it trivial?  Avoid allocation.
   495  	if !contains(s, '\\') && !contains(s, quote) {
   496  		return s, nil
   497  	}
   498  
   499  	var runeTmp [utf8.UTFMax]byte
   500  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   501  	for len(s) > 0 {
   502  		c, multibyte, ss, err := strconv.UnquoteChar(s, quote)
   503  		if err != nil {
   504  			return "", err
   505  		}
   506  		s = ss
   507  		if c < utf8.RuneSelf || !multibyte {
   508  			buf = append(buf, byte(c))
   509  		} else {
   510  			n := utf8.EncodeRune(runeTmp[:], c)
   511  			buf = append(buf, runeTmp[:n]...)
   512  		}
   513  	}
   514  	return string(buf), nil
   515  }
   516  
   517  func contains(s string, c byte) bool {
   518  	for i := 0; i < len(s); i++ {
   519  		if s[i] == c {
   520  			return true
   521  		}
   522  	}
   523  	return false
   524  }