go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/proto/mask/parse.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mask
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  	"unicode"
    21  
    22  	"google.golang.org/protobuf/reflect/protoreflect"
    23  )
    24  
    25  // path models the parsed path which consists of a slice of segments
    26  type path []string
    27  
    28  const pathDelimiter = '.'
    29  
    30  // parsePath parses a path string to a slice of segments (See grammar in pkg
    31  // doc).
    32  //
    33  // If isJSONName is true, parsing the field name using JSON field name instead
    34  // of its canonical form. However, the result segments in path will use
    35  // canonical field name.
    36  func parsePath(rawPath string, descriptor protoreflect.MessageDescriptor, isJSONName bool) (path, error) {
    37  	ctx := &parseCtx{
    38  		curDescriptor: descriptor,
    39  	}
    40  	return parsePathWithContext(rawPath, ctx, isJSONName)
    41  }
    42  
    43  func parsePathWithContext(rawPath string, ctx *parseCtx, isJSONName bool) (path, error) {
    44  	t := &tokenizer{
    45  		path:      rawPath,
    46  		delimiter: pathDelimiter,
    47  	}
    48  	ret := path{}
    49  	for t.hasMoreTokens() {
    50  		if tok, err := t.nextToken(); err != nil {
    51  			return nil, err
    52  		} else {
    53  			seg, err := parseSegment(tok, isJSONName, ctx)
    54  			if err != nil {
    55  				return nil, err
    56  			}
    57  			ret = append(ret, seg)
    58  		}
    59  	}
    60  
    61  	return ret, nil
    62  }
    63  
    64  // parseCtx defines context during path parsing
    65  type parseCtx struct {
    66  	curDescriptor protoreflect.MessageDescriptor
    67  	isList        bool
    68  	mustBeLast    bool
    69  }
    70  
    71  // advanceToField advances the context to the next field of current message.
    72  // Returns the canonical form of field name. Returns error when the supplied
    73  // field doesn't exist in message or the current message descriptor is nil
    74  // (meaning scalar field).
    75  //
    76  // If isJSONName is true, we will assume the given field name is JSON name and
    77  // look up the JSON name instead of the field name.
    78  func (ctx *parseCtx) advanceToField(fieldName string, isJSONName bool) (string, error) {
    79  	msgDesc := ctx.curDescriptor
    80  	if msgDesc == nil {
    81  		return "", fmt.Errorf("can't advance to field when current descriptor is nil")
    82  	}
    83  	var fieldDesc protoreflect.FieldDescriptor
    84  	if isJSONName {
    85  		fieldDesc = msgDesc.Fields().ByJSONName(fieldName)
    86  	} else {
    87  		fieldDesc = msgDesc.Fields().ByName(protoreflect.Name(fieldName))
    88  	}
    89  	if fieldDesc == nil {
    90  		return "", fmt.Errorf("field %q does not exist in message %s", fieldName, msgDesc.Name())
    91  	}
    92  	ctx.curDescriptor = fieldDesc.Message()
    93  	ctx.isList = fieldDesc.IsList()
    94  	return string(fieldDesc.Name()), nil
    95  }
    96  
    97  // mapKeyKindToTokenType defines the mapping between the kind of mapkey
    98  // and the expected token type of the token in path string.
    99  var mapKeyKindToTokenType = map[protoreflect.Kind]tokenType{
   100  	protoreflect.Int32Kind:    intLiteral,
   101  	protoreflect.Int64Kind:    intLiteral,
   102  	protoreflect.Sint32Kind:   intLiteral,
   103  	protoreflect.Sint64Kind:   intLiteral,
   104  	protoreflect.Uint32Kind:   intLiteral,
   105  	protoreflect.Uint64Kind:   intLiteral,
   106  	protoreflect.Sfixed32Kind: intLiteral,
   107  	protoreflect.Fixed32Kind:  intLiteral,
   108  	protoreflect.Sfixed64Kind: intLiteral,
   109  	protoreflect.Fixed64Kind:  intLiteral,
   110  	protoreflect.BoolKind:     boolLiteral,
   111  	protoreflect.StringKind:   strLiteral,
   112  }
   113  
   114  // parseSegment parses a token to a segment string and updates the prase context
   115  // accordingly.
   116  //
   117  // If isJSONName is true, the token value is expected to be JSON name of
   118  // a field of a message instead of canonical name. However, the return segment
   119  // will always be canonical name.
   120  func parseSegment(tok token, isJSONName bool, ctx *parseCtx) (string, error) {
   121  	switch desc := ctx.curDescriptor; {
   122  	case ctx.mustBeLast:
   123  		return "", fmt.Errorf("expected end of string; got token: %q", tok.value)
   124  	case ctx.isList:
   125  		// The current segment corresponds to a list field (non-map entry repeated
   126  		// field). Only star is allowed
   127  		if tok.typ != star {
   128  			return "", fmt.Errorf("expected a star following a repeated field; got token: %q", tok.value)
   129  		}
   130  		ctx.isList = false
   131  		return "*", nil
   132  
   133  	case desc == nil:
   134  		return "", fmt.Errorf("scalar field cannot have subfield: %q", tok.value)
   135  
   136  	case desc.IsMapEntry():
   137  		if tok.typ != star {
   138  			keyKind := desc.Fields().ByName(protoreflect.Name("key")).Kind()
   139  			if expectTokenType, found := mapKeyKindToTokenType[keyKind]; !found {
   140  				return "", fmt.Errorf("unexpected map key kind %s", keyKind)
   141  			} else if expectTokenType != tok.typ {
   142  				return "", fmt.Errorf("expected map key kind %s; got token: %q", keyKind, tok.value)
   143  			}
   144  		}
   145  
   146  		if _, err := ctx.advanceToField("value", false); err != nil {
   147  			return "", err
   148  		}
   149  		return tok.value, nil
   150  
   151  	case tok.typ == star:
   152  		// a star cannot be followed by any subfields if it does not corresponds to
   153  		// a repeated field
   154  		ctx.mustBeLast = true
   155  		return "*", nil
   156  
   157  	case tok.typ != strLiteral:
   158  		return "", fmt.Errorf("expected a field name of type string; got token: %q", tok.value)
   159  
   160  	default:
   161  		return ctx.advanceToField(tok.value, isJSONName)
   162  	}
   163  }
   164  
   165  // tokenizer breaks a path string into tokens(segments)
   166  type tokenizer struct {
   167  	path      string
   168  	delimiter byte
   169  	pos       int
   170  }
   171  
   172  // token is a composite of token type and the raw string value. It represents
   173  // a segment in the path
   174  type token struct {
   175  	typ   tokenType
   176  	value string
   177  }
   178  
   179  // tokenType models different types of segment defined in grammar (see pkg doc).
   180  // Note that, quoted string will also be treated as string literal.
   181  type tokenType int8
   182  
   183  const (
   184  	star tokenType = iota
   185  	strLiteral
   186  	boolLiteral
   187  	intLiteral
   188  )
   189  
   190  // hasMoreTokens tests if there are more tokens available from the path string
   191  func (t tokenizer) hasMoreTokens() bool {
   192  	return t.pos < len(t.path)
   193  }
   194  
   195  // nextToken returns the next token in the path string. Always call
   196  // hasMoreTokens before calling this function. Otherwise, This function call
   197  // will panic with index out of range when there is no more token available.
   198  func (t *tokenizer) nextToken() (token, error) {
   199  	if t.pos > 0 {
   200  		// if not reading the first token, expecting a delimiter
   201  		if t.path[t.pos] != t.delimiter {
   202  			return token{}, fmt.Errorf("expected delimiter: %c; got %c", t.delimiter, t.path[t.pos])
   203  		}
   204  		t.pos++ // swallow the delimiter
   205  		if t.pos == len(t.path) {
   206  			return token{}, fmt.Errorf("path can't end with delimiter: %c", t.delimiter)
   207  		}
   208  	}
   209  
   210  	switch b, pathLen := t.path[t.pos], len(t.path); {
   211  	case b == '`':
   212  		t.pos++ // swallow the starting backtick
   213  		sb := &strings.Builder{}
   214  		for {
   215  			nextBacktickRel := strings.IndexRune(t.path[t.pos:], '`')
   216  			if nextBacktickRel == -1 {
   217  				sb.WriteString(t.path[t.pos:])
   218  				return token{}, fmt.Errorf("a quoted string is never closed; got: %q", sb)
   219  			}
   220  			nextBacktickAbs := t.pos + nextBacktickRel
   221  			sb.WriteString(t.path[t.pos:nextBacktickAbs])
   222  			t.pos = nextBacktickAbs + 1 // Swallow the discovered backtick as well
   223  			if t.pos >= pathLen || t.path[t.pos] != '`' {
   224  				// Stop if eof or the discovered backtick is not for escaping
   225  				break
   226  			}
   227  			sb.WriteByte('`')
   228  			t.pos++ // Swallow the escaped backtick
   229  		}
   230  		return token{
   231  			typ:   strLiteral,
   232  			value: sb.String(),
   233  		}, nil
   234  
   235  	case b == '*':
   236  		t.pos++ // swallow the star
   237  		return token{
   238  			typ:   star,
   239  			value: "*",
   240  		}, nil
   241  
   242  	// Check if '-' is the last character or look ahead to see if it is followed
   243  	// by a digit
   244  	case b == '-' && (t.pos+1 == pathLen || !unicode.IsDigit(rune(t.path[t.pos+1]))):
   245  		return token{}, fmt.Errorf("expected digit following minus sign for negative numbers; got minus sign only")
   246  
   247  	case b == '-' || unicode.IsDigit(rune(b)):
   248  		start := t.pos
   249  		t.pos++ // swallow the first digit or minus sign felled through
   250  		n := strings.IndexFunc(t.path[t.pos:], func(r rune) bool { return !unicode.IsDigit(r) })
   251  		if n == -1 {
   252  			t.pos = pathLen
   253  		} else {
   254  			t.pos += n
   255  		}
   256  		return token{
   257  			typ:   intLiteral,
   258  			value: t.path[start:t.pos],
   259  		}, nil
   260  
   261  	case b == '_' || unicode.IsLetter(rune(b)):
   262  		start := t.pos
   263  		t.pos++ // swallow the underscore or first letter
   264  		n := strings.IndexFunc(t.path[t.pos:], isInvalidStringChar)
   265  		if n == -1 {
   266  			t.pos = pathLen
   267  		} else {
   268  			t.pos += n
   269  		}
   270  		typ, val := strLiteral, t.path[start:t.pos]
   271  		if val == "true" || val == "false" {
   272  			typ = boolLiteral
   273  		}
   274  		return token{
   275  			typ:   typ,
   276  			value: val,
   277  		}, nil
   278  
   279  	default:
   280  		return token{}, fmt.Errorf("unexpected token: %c", b)
   281  	}
   282  }
   283  
   284  // isInvalidStringChar tells whether the given rune represents an invalid
   285  // character in a string literal according to the grammar.
   286  func isInvalidStringChar(r rune) bool {
   287  	return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_'
   288  }