github.com/XiaoMi/Gaea@v1.2.5/parser/tidb-types/json/path_expr.go (about)

     1  // Copyright 2017 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package json
    15  
    16  import (
    17  	"regexp"
    18  	"strconv"
    19  	"strings"
    20  
    21  	"github.com/pingcap/errors"
    22  )
    23  
    24  /*
    25  	From MySQL 5.7, JSON path expression grammar:
    26  		pathExpression ::= scope (pathLeg)*
    27  		scope ::= [ columnReference ] '$'
    28  		columnReference ::= // omit...
    29  		pathLeg ::= member | arrayLocation | '**'
    30  		member ::= '.' (keyName | '*')
    31  		arrayLocation ::= '[' (non-negative-integer | '*') ']'
    32  		keyName ::= ECMAScript-identifier | ECMAScript-string-literal
    33  
    34  	And some implementation limits in MySQL 5.7:
    35  		1) columnReference in scope must be empty now;
    36  		2) double asterisk(**) could not be last leg;
    37  
    38  	Examples:
    39  		select json_extract('{"a": "b", "c": [1, "2"]}', '$.a') -> "b"
    40  		select json_extract('{"a": "b", "c": [1, "2"]}', '$.c') -> [1, "2"]
    41  		select json_extract('{"a": "b", "c": [1, "2"]}', '$.a', '$.c') -> ["b", [1, "2"]]
    42  		select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[0]') -> 1
    43  		select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[2]') -> NULL
    44  		select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[*]') -> [1, "2"]
    45  		select json_extract('{"a": "b", "c": [1, "2"]}', '$.*') -> ["b", [1, "2"]]
    46  */
    47  
    48  // [a-zA-Z_][a-zA-Z0-9_]* matches any identifier;
    49  // "[^"\\]*(\\.[^"\\]*)*" matches any string literal which can carry escaped quotes;
    50  var jsonPathExprLegRe = regexp.MustCompile(`(\.\s*([a-zA-Z_][a-zA-Z0-9_]*|\*|"[^"\\]*(\\.[^"\\]*)*")|(\[\s*([0-9]+|\*)\s*\])|\*\*)`)
    51  
    52  type pathLegType byte
    53  
    54  const (
    55  	// pathLegKey indicates the path leg with '.key'.
    56  	pathLegKey pathLegType = 0x01
    57  	// pathLegIndex indicates the path leg with form '[number]'.
    58  	pathLegIndex pathLegType = 0x02
    59  	// pathLegDoubleAsterisk indicates the path leg with form '**'.
    60  	pathLegDoubleAsterisk pathLegType = 0x03
    61  )
    62  
    63  // pathLeg is only used by PathExpression.
    64  type pathLeg struct {
    65  	typ        pathLegType
    66  	arrayIndex int    // if typ is pathLegIndex, the value should be parsed into here.
    67  	dotKey     string // if typ is pathLegKey, the key should be parsed into here.
    68  }
    69  
    70  // arrayIndexAsterisk is for parsing `*` into a number.
    71  // we need this number represent "all".
    72  const arrayIndexAsterisk = -1
    73  
    74  // pathExpressionFlag holds attributes of PathExpression
    75  type pathExpressionFlag byte
    76  
    77  const (
    78  	pathExpressionContainsAsterisk       pathExpressionFlag = 0x01
    79  	pathExpressionContainsDoubleAsterisk pathExpressionFlag = 0x02
    80  )
    81  
    82  // containsAnyAsterisk returns true if pef contains any asterisk.
    83  func (pef pathExpressionFlag) containsAnyAsterisk() bool {
    84  	pef &= pathExpressionContainsAsterisk | pathExpressionContainsDoubleAsterisk
    85  	return byte(pef) != 0
    86  }
    87  
    88  // PathExpression is for JSON path expression.
    89  type PathExpression struct {
    90  	legs  []pathLeg
    91  	flags pathExpressionFlag
    92  }
    93  
    94  // popOneLeg returns a pathLeg, and a child PathExpression without that leg.
    95  func (pe PathExpression) popOneLeg() (pathLeg, PathExpression) {
    96  	newPe := PathExpression{
    97  		legs:  pe.legs[1:],
    98  		flags: 0,
    99  	}
   100  	for _, leg := range newPe.legs {
   101  		if leg.typ == pathLegIndex && leg.arrayIndex == -1 {
   102  			newPe.flags |= pathExpressionContainsAsterisk
   103  		} else if leg.typ == pathLegKey && leg.dotKey == "*" {
   104  			newPe.flags |= pathExpressionContainsAsterisk
   105  		} else if leg.typ == pathLegDoubleAsterisk {
   106  			newPe.flags |= pathExpressionContainsDoubleAsterisk
   107  		}
   108  	}
   109  	return pe.legs[0], newPe
   110  }
   111  
   112  // popOneLastLeg returns the a parent PathExpression and the last pathLeg
   113  func (pe PathExpression) popOneLastLeg() (PathExpression, pathLeg) {
   114  	lastLegIdx := len(pe.legs) - 1
   115  	lastLeg := pe.legs[lastLegIdx]
   116  	// It is used only in modification, it has been checked that there is no asterisks.
   117  	return PathExpression{legs: pe.legs[:lastLegIdx]}, lastLeg
   118  }
   119  
   120  // ContainsAnyAsterisk returns true if pe contains any asterisk.
   121  func (pe PathExpression) ContainsAnyAsterisk() bool {
   122  	return pe.flags.containsAnyAsterisk()
   123  }
   124  
   125  // ParseJSONPathExpr parses a JSON path expression. Returns a PathExpression
   126  // object which can be used in JSON_EXTRACT, JSON_SET and so on.
   127  func ParseJSONPathExpr(pathExpr string) (pe PathExpression, err error) {
   128  	// Find the position of first '$'. If any no-blank characters in
   129  	// pathExpr[0: dollarIndex), return an ErrInvalidJSONPath error.
   130  	dollarIndex := strings.Index(pathExpr, "$")
   131  	if dollarIndex < 0 {
   132  		err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
   133  		return
   134  	}
   135  	for i := 0; i < dollarIndex; i++ {
   136  		if !isBlank(rune(pathExpr[i])) {
   137  			err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
   138  			return
   139  		}
   140  	}
   141  
   142  	pathExprSuffix := strings.TrimFunc(pathExpr[dollarIndex+1:], isBlank)
   143  	indices := jsonPathExprLegRe.FindAllStringIndex(pathExprSuffix, -1)
   144  	if len(indices) == 0 && len(pathExprSuffix) != 0 {
   145  		err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
   146  		return
   147  	}
   148  
   149  	pe.legs = make([]pathLeg, 0, len(indices))
   150  	pe.flags = pathExpressionFlag(0)
   151  
   152  	lastEnd := 0
   153  	for _, indice := range indices {
   154  		start, end := indice[0], indice[1]
   155  
   156  		// Check all characters between two legs are blank.
   157  		for i := lastEnd; i < start; i++ {
   158  			if !isBlank(rune(pathExprSuffix[i])) {
   159  				err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
   160  				return
   161  			}
   162  		}
   163  		lastEnd = end
   164  
   165  		if pathExprSuffix[start] == '[' {
   166  			// The leg is an index of a JSON array.
   167  			var leg = strings.TrimFunc(pathExprSuffix[start+1:end], isBlank)
   168  			var indexStr = strings.TrimFunc(leg[0:len(leg)-1], isBlank)
   169  			var index int
   170  			if len(indexStr) == 1 && indexStr[0] == '*' {
   171  				pe.flags |= pathExpressionContainsAsterisk
   172  				index = arrayIndexAsterisk
   173  			} else {
   174  				if index, err = strconv.Atoi(indexStr); err != nil {
   175  					err = errors.Trace(err)
   176  					return
   177  				}
   178  			}
   179  			pe.legs = append(pe.legs, pathLeg{typ: pathLegIndex, arrayIndex: index})
   180  		} else if pathExprSuffix[start] == '.' {
   181  			// The leg is a key of a JSON object.
   182  			var key = strings.TrimFunc(pathExprSuffix[start+1:end], isBlank)
   183  			if len(key) == 1 && key[0] == '*' {
   184  				pe.flags |= pathExpressionContainsAsterisk
   185  			} else if key[0] == '"' {
   186  				// We need unquote the origin string.
   187  				if key, err = unquoteString(key[1 : len(key)-1]); err != nil {
   188  					err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
   189  					return
   190  				}
   191  			}
   192  			pe.legs = append(pe.legs, pathLeg{typ: pathLegKey, dotKey: key})
   193  		} else {
   194  			// The leg is '**'.
   195  			pe.flags |= pathExpressionContainsDoubleAsterisk
   196  			pe.legs = append(pe.legs, pathLeg{typ: pathLegDoubleAsterisk})
   197  		}
   198  	}
   199  	if len(pe.legs) > 0 {
   200  		// The last leg of a path expression cannot be '**'.
   201  		if pe.legs[len(pe.legs)-1].typ == pathLegDoubleAsterisk {
   202  			err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr)
   203  			return
   204  		}
   205  	}
   206  	return
   207  }
   208  
   209  func isBlank(c rune) bool {
   210  	if c == '\n' || c == '\r' || c == '\t' || c == ' ' {
   211  		return true
   212  	}
   213  	return false
   214  }