go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/proto/mask/parse.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mask 16 17 import ( 18 "fmt" 19 "strings" 20 "unicode" 21 22 "google.golang.org/protobuf/reflect/protoreflect" 23 ) 24 25 // path models the parsed path which consists of a slice of segments 26 type path []string 27 28 const pathDelimiter = '.' 29 30 // parsePath parses a path string to a slice of segments (See grammar in pkg 31 // doc). 32 // 33 // If isJSONName is true, parsing the field name using JSON field name instead 34 // of its canonical form. However, the result segments in path will use 35 // canonical field name. 36 func parsePath(rawPath string, descriptor protoreflect.MessageDescriptor, isJSONName bool) (path, error) { 37 ctx := &parseCtx{ 38 curDescriptor: descriptor, 39 } 40 return parsePathWithContext(rawPath, ctx, isJSONName) 41 } 42 43 func parsePathWithContext(rawPath string, ctx *parseCtx, isJSONName bool) (path, error) { 44 t := &tokenizer{ 45 path: rawPath, 46 delimiter: pathDelimiter, 47 } 48 ret := path{} 49 for t.hasMoreTokens() { 50 if tok, err := t.nextToken(); err != nil { 51 return nil, err 52 } else { 53 seg, err := parseSegment(tok, isJSONName, ctx) 54 if err != nil { 55 return nil, err 56 } 57 ret = append(ret, seg) 58 } 59 } 60 61 return ret, nil 62 } 63 64 // parseCtx defines context during path parsing 65 type parseCtx struct { 66 curDescriptor protoreflect.MessageDescriptor 67 isList bool 68 mustBeLast bool 69 } 70 71 // advanceToField advances the context to the next field of current message. 72 // Returns the canonical form of field name. Returns error when the supplied 73 // field doesn't exist in message or the current message descriptor is nil 74 // (meaning scalar field). 75 // 76 // If isJSONName is true, we will assume the given field name is JSON name and 77 // look up the JSON name instead of the field name. 78 func (ctx *parseCtx) advanceToField(fieldName string, isJSONName bool) (string, error) { 79 msgDesc := ctx.curDescriptor 80 if msgDesc == nil { 81 return "", fmt.Errorf("can't advance to field when current descriptor is nil") 82 } 83 var fieldDesc protoreflect.FieldDescriptor 84 if isJSONName { 85 fieldDesc = msgDesc.Fields().ByJSONName(fieldName) 86 } else { 87 fieldDesc = msgDesc.Fields().ByName(protoreflect.Name(fieldName)) 88 } 89 if fieldDesc == nil { 90 return "", fmt.Errorf("field %q does not exist in message %s", fieldName, msgDesc.Name()) 91 } 92 ctx.curDescriptor = fieldDesc.Message() 93 ctx.isList = fieldDesc.IsList() 94 return string(fieldDesc.Name()), nil 95 } 96 97 // mapKeyKindToTokenType defines the mapping between the kind of mapkey 98 // and the expected token type of the token in path string. 99 var mapKeyKindToTokenType = map[protoreflect.Kind]tokenType{ 100 protoreflect.Int32Kind: intLiteral, 101 protoreflect.Int64Kind: intLiteral, 102 protoreflect.Sint32Kind: intLiteral, 103 protoreflect.Sint64Kind: intLiteral, 104 protoreflect.Uint32Kind: intLiteral, 105 protoreflect.Uint64Kind: intLiteral, 106 protoreflect.Sfixed32Kind: intLiteral, 107 protoreflect.Fixed32Kind: intLiteral, 108 protoreflect.Sfixed64Kind: intLiteral, 109 protoreflect.Fixed64Kind: intLiteral, 110 protoreflect.BoolKind: boolLiteral, 111 protoreflect.StringKind: strLiteral, 112 } 113 114 // parseSegment parses a token to a segment string and updates the prase context 115 // accordingly. 116 // 117 // If isJSONName is true, the token value is expected to be JSON name of 118 // a field of a message instead of canonical name. However, the return segment 119 // will always be canonical name. 120 func parseSegment(tok token, isJSONName bool, ctx *parseCtx) (string, error) { 121 switch desc := ctx.curDescriptor; { 122 case ctx.mustBeLast: 123 return "", fmt.Errorf("expected end of string; got token: %q", tok.value) 124 case ctx.isList: 125 // The current segment corresponds to a list field (non-map entry repeated 126 // field). Only star is allowed 127 if tok.typ != star { 128 return "", fmt.Errorf("expected a star following a repeated field; got token: %q", tok.value) 129 } 130 ctx.isList = false 131 return "*", nil 132 133 case desc == nil: 134 return "", fmt.Errorf("scalar field cannot have subfield: %q", tok.value) 135 136 case desc.IsMapEntry(): 137 if tok.typ != star { 138 keyKind := desc.Fields().ByName(protoreflect.Name("key")).Kind() 139 if expectTokenType, found := mapKeyKindToTokenType[keyKind]; !found { 140 return "", fmt.Errorf("unexpected map key kind %s", keyKind) 141 } else if expectTokenType != tok.typ { 142 return "", fmt.Errorf("expected map key kind %s; got token: %q", keyKind, tok.value) 143 } 144 } 145 146 if _, err := ctx.advanceToField("value", false); err != nil { 147 return "", err 148 } 149 return tok.value, nil 150 151 case tok.typ == star: 152 // a star cannot be followed by any subfields if it does not corresponds to 153 // a repeated field 154 ctx.mustBeLast = true 155 return "*", nil 156 157 case tok.typ != strLiteral: 158 return "", fmt.Errorf("expected a field name of type string; got token: %q", tok.value) 159 160 default: 161 return ctx.advanceToField(tok.value, isJSONName) 162 } 163 } 164 165 // tokenizer breaks a path string into tokens(segments) 166 type tokenizer struct { 167 path string 168 delimiter byte 169 pos int 170 } 171 172 // token is a composite of token type and the raw string value. It represents 173 // a segment in the path 174 type token struct { 175 typ tokenType 176 value string 177 } 178 179 // tokenType models different types of segment defined in grammar (see pkg doc). 180 // Note that, quoted string will also be treated as string literal. 181 type tokenType int8 182 183 const ( 184 star tokenType = iota 185 strLiteral 186 boolLiteral 187 intLiteral 188 ) 189 190 // hasMoreTokens tests if there are more tokens available from the path string 191 func (t tokenizer) hasMoreTokens() bool { 192 return t.pos < len(t.path) 193 } 194 195 // nextToken returns the next token in the path string. Always call 196 // hasMoreTokens before calling this function. Otherwise, This function call 197 // will panic with index out of range when there is no more token available. 198 func (t *tokenizer) nextToken() (token, error) { 199 if t.pos > 0 { 200 // if not reading the first token, expecting a delimiter 201 if t.path[t.pos] != t.delimiter { 202 return token{}, fmt.Errorf("expected delimiter: %c; got %c", t.delimiter, t.path[t.pos]) 203 } 204 t.pos++ // swallow the delimiter 205 if t.pos == len(t.path) { 206 return token{}, fmt.Errorf("path can't end with delimiter: %c", t.delimiter) 207 } 208 } 209 210 switch b, pathLen := t.path[t.pos], len(t.path); { 211 case b == '`': 212 t.pos++ // swallow the starting backtick 213 sb := &strings.Builder{} 214 for { 215 nextBacktickRel := strings.IndexRune(t.path[t.pos:], '`') 216 if nextBacktickRel == -1 { 217 sb.WriteString(t.path[t.pos:]) 218 return token{}, fmt.Errorf("a quoted string is never closed; got: %q", sb) 219 } 220 nextBacktickAbs := t.pos + nextBacktickRel 221 sb.WriteString(t.path[t.pos:nextBacktickAbs]) 222 t.pos = nextBacktickAbs + 1 // Swallow the discovered backtick as well 223 if t.pos >= pathLen || t.path[t.pos] != '`' { 224 // Stop if eof or the discovered backtick is not for escaping 225 break 226 } 227 sb.WriteByte('`') 228 t.pos++ // Swallow the escaped backtick 229 } 230 return token{ 231 typ: strLiteral, 232 value: sb.String(), 233 }, nil 234 235 case b == '*': 236 t.pos++ // swallow the star 237 return token{ 238 typ: star, 239 value: "*", 240 }, nil 241 242 // Check if '-' is the last character or look ahead to see if it is followed 243 // by a digit 244 case b == '-' && (t.pos+1 == pathLen || !unicode.IsDigit(rune(t.path[t.pos+1]))): 245 return token{}, fmt.Errorf("expected digit following minus sign for negative numbers; got minus sign only") 246 247 case b == '-' || unicode.IsDigit(rune(b)): 248 start := t.pos 249 t.pos++ // swallow the first digit or minus sign felled through 250 n := strings.IndexFunc(t.path[t.pos:], func(r rune) bool { return !unicode.IsDigit(r) }) 251 if n == -1 { 252 t.pos = pathLen 253 } else { 254 t.pos += n 255 } 256 return token{ 257 typ: intLiteral, 258 value: t.path[start:t.pos], 259 }, nil 260 261 case b == '_' || unicode.IsLetter(rune(b)): 262 start := t.pos 263 t.pos++ // swallow the underscore or first letter 264 n := strings.IndexFunc(t.path[t.pos:], isInvalidStringChar) 265 if n == -1 { 266 t.pos = pathLen 267 } else { 268 t.pos += n 269 } 270 typ, val := strLiteral, t.path[start:t.pos] 271 if val == "true" || val == "false" { 272 typ = boolLiteral 273 } 274 return token{ 275 typ: typ, 276 value: val, 277 }, nil 278 279 default: 280 return token{}, fmt.Errorf("unexpected token: %c", b) 281 } 282 } 283 284 // isInvalidStringChar tells whether the given rune represents an invalid 285 // character in a string literal according to the grammar. 286 func isInvalidStringChar(r rune) bool { 287 return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' 288 }