github.com/XiaoMi/Gaea@v1.2.5/parser/tidb-types/json/path_expr.go (about) 1 // Copyright 2017 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package json 15 16 import ( 17 "regexp" 18 "strconv" 19 "strings" 20 21 "github.com/pingcap/errors" 22 ) 23 24 /* 25 From MySQL 5.7, JSON path expression grammar: 26 pathExpression ::= scope (pathLeg)* 27 scope ::= [ columnReference ] '$' 28 columnReference ::= // omit... 29 pathLeg ::= member | arrayLocation | '**' 30 member ::= '.' (keyName | '*') 31 arrayLocation ::= '[' (non-negative-integer | '*') ']' 32 keyName ::= ECMAScript-identifier | ECMAScript-string-literal 33 34 And some implementation limits in MySQL 5.7: 35 1) columnReference in scope must be empty now; 36 2) double asterisk(**) could not be last leg; 37 38 Examples: 39 select json_extract('{"a": "b", "c": [1, "2"]}', '$.a') -> "b" 40 select json_extract('{"a": "b", "c": [1, "2"]}', '$.c') -> [1, "2"] 41 select json_extract('{"a": "b", "c": [1, "2"]}', '$.a', '$.c') -> ["b", [1, "2"]] 42 select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[0]') -> 1 43 select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[2]') -> NULL 44 select json_extract('{"a": "b", "c": [1, "2"]}', '$.c[*]') -> [1, "2"] 45 select json_extract('{"a": "b", "c": [1, "2"]}', '$.*') -> ["b", [1, "2"]] 46 */ 47 48 // [a-zA-Z_][a-zA-Z0-9_]* matches any identifier; 49 // "[^"\\]*(\\.[^"\\]*)*" matches any string literal which can carry escaped quotes; 50 var jsonPathExprLegRe = regexp.MustCompile(`(\.\s*([a-zA-Z_][a-zA-Z0-9_]*|\*|"[^"\\]*(\\.[^"\\]*)*")|(\[\s*([0-9]+|\*)\s*\])|\*\*)`) 51 52 type pathLegType byte 53 54 const ( 55 // pathLegKey indicates the path leg with '.key'. 56 pathLegKey pathLegType = 0x01 57 // pathLegIndex indicates the path leg with form '[number]'. 58 pathLegIndex pathLegType = 0x02 59 // pathLegDoubleAsterisk indicates the path leg with form '**'. 60 pathLegDoubleAsterisk pathLegType = 0x03 61 ) 62 63 // pathLeg is only used by PathExpression. 64 type pathLeg struct { 65 typ pathLegType 66 arrayIndex int // if typ is pathLegIndex, the value should be parsed into here. 67 dotKey string // if typ is pathLegKey, the key should be parsed into here. 68 } 69 70 // arrayIndexAsterisk is for parsing `*` into a number. 71 // we need this number represent "all". 72 const arrayIndexAsterisk = -1 73 74 // pathExpressionFlag holds attributes of PathExpression 75 type pathExpressionFlag byte 76 77 const ( 78 pathExpressionContainsAsterisk pathExpressionFlag = 0x01 79 pathExpressionContainsDoubleAsterisk pathExpressionFlag = 0x02 80 ) 81 82 // containsAnyAsterisk returns true if pef contains any asterisk. 83 func (pef pathExpressionFlag) containsAnyAsterisk() bool { 84 pef &= pathExpressionContainsAsterisk | pathExpressionContainsDoubleAsterisk 85 return byte(pef) != 0 86 } 87 88 // PathExpression is for JSON path expression. 89 type PathExpression struct { 90 legs []pathLeg 91 flags pathExpressionFlag 92 } 93 94 // popOneLeg returns a pathLeg, and a child PathExpression without that leg. 95 func (pe PathExpression) popOneLeg() (pathLeg, PathExpression) { 96 newPe := PathExpression{ 97 legs: pe.legs[1:], 98 flags: 0, 99 } 100 for _, leg := range newPe.legs { 101 if leg.typ == pathLegIndex && leg.arrayIndex == -1 { 102 newPe.flags |= pathExpressionContainsAsterisk 103 } else if leg.typ == pathLegKey && leg.dotKey == "*" { 104 newPe.flags |= pathExpressionContainsAsterisk 105 } else if leg.typ == pathLegDoubleAsterisk { 106 newPe.flags |= pathExpressionContainsDoubleAsterisk 107 } 108 } 109 return pe.legs[0], newPe 110 } 111 112 // popOneLastLeg returns the a parent PathExpression and the last pathLeg 113 func (pe PathExpression) popOneLastLeg() (PathExpression, pathLeg) { 114 lastLegIdx := len(pe.legs) - 1 115 lastLeg := pe.legs[lastLegIdx] 116 // It is used only in modification, it has been checked that there is no asterisks. 117 return PathExpression{legs: pe.legs[:lastLegIdx]}, lastLeg 118 } 119 120 // ContainsAnyAsterisk returns true if pe contains any asterisk. 121 func (pe PathExpression) ContainsAnyAsterisk() bool { 122 return pe.flags.containsAnyAsterisk() 123 } 124 125 // ParseJSONPathExpr parses a JSON path expression. Returns a PathExpression 126 // object which can be used in JSON_EXTRACT, JSON_SET and so on. 127 func ParseJSONPathExpr(pathExpr string) (pe PathExpression, err error) { 128 // Find the position of first '$'. If any no-blank characters in 129 // pathExpr[0: dollarIndex), return an ErrInvalidJSONPath error. 130 dollarIndex := strings.Index(pathExpr, "$") 131 if dollarIndex < 0 { 132 err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr) 133 return 134 } 135 for i := 0; i < dollarIndex; i++ { 136 if !isBlank(rune(pathExpr[i])) { 137 err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr) 138 return 139 } 140 } 141 142 pathExprSuffix := strings.TrimFunc(pathExpr[dollarIndex+1:], isBlank) 143 indices := jsonPathExprLegRe.FindAllStringIndex(pathExprSuffix, -1) 144 if len(indices) == 0 && len(pathExprSuffix) != 0 { 145 err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr) 146 return 147 } 148 149 pe.legs = make([]pathLeg, 0, len(indices)) 150 pe.flags = pathExpressionFlag(0) 151 152 lastEnd := 0 153 for _, indice := range indices { 154 start, end := indice[0], indice[1] 155 156 // Check all characters between two legs are blank. 157 for i := lastEnd; i < start; i++ { 158 if !isBlank(rune(pathExprSuffix[i])) { 159 err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr) 160 return 161 } 162 } 163 lastEnd = end 164 165 if pathExprSuffix[start] == '[' { 166 // The leg is an index of a JSON array. 167 var leg = strings.TrimFunc(pathExprSuffix[start+1:end], isBlank) 168 var indexStr = strings.TrimFunc(leg[0:len(leg)-1], isBlank) 169 var index int 170 if len(indexStr) == 1 && indexStr[0] == '*' { 171 pe.flags |= pathExpressionContainsAsterisk 172 index = arrayIndexAsterisk 173 } else { 174 if index, err = strconv.Atoi(indexStr); err != nil { 175 err = errors.Trace(err) 176 return 177 } 178 } 179 pe.legs = append(pe.legs, pathLeg{typ: pathLegIndex, arrayIndex: index}) 180 } else if pathExprSuffix[start] == '.' { 181 // The leg is a key of a JSON object. 182 var key = strings.TrimFunc(pathExprSuffix[start+1:end], isBlank) 183 if len(key) == 1 && key[0] == '*' { 184 pe.flags |= pathExpressionContainsAsterisk 185 } else if key[0] == '"' { 186 // We need unquote the origin string. 187 if key, err = unquoteString(key[1 : len(key)-1]); err != nil { 188 err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr) 189 return 190 } 191 } 192 pe.legs = append(pe.legs, pathLeg{typ: pathLegKey, dotKey: key}) 193 } else { 194 // The leg is '**'. 195 pe.flags |= pathExpressionContainsDoubleAsterisk 196 pe.legs = append(pe.legs, pathLeg{typ: pathLegDoubleAsterisk}) 197 } 198 } 199 if len(pe.legs) > 0 { 200 // The last leg of a path expression cannot be '**'. 201 if pe.legs[len(pe.legs)-1].typ == pathLegDoubleAsterisk { 202 err = ErrInvalidJSONPath.GenWithStackByArgs(pathExpr) 203 return 204 } 205 } 206 return 207 } 208 209 func isBlank(c rune) bool { 210 if c == '\n' || c == '\r' || c == '\t' || c == ' ' { 211 return true 212 } 213 return false 214 }