github.com/erda-project/erda-infra@v1.0.10-0.20240327085753-f3a249292aeb/pkg/transport/http/httprule/parse.go (about)

     1  // Copyright (c) 2021 Terminus, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Reference: https://github.com/grpc-ecosystem/grpc-gateway/blob/v2.3.0/internal/httprule/parse.go
    16  
    17  package httprule
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  )
    23  
    24  // InvalidTemplateError indicates that the path template is not valid.
    25  type InvalidTemplateError struct {
    26  	tmpl string
    27  	msg  string
    28  }
    29  
    30  func (e InvalidTemplateError) Error() string {
    31  	return fmt.Sprintf("%s: %s", e.msg, e.tmpl)
    32  }
    33  
    34  // Parse parses the string representation of path template
    35  func Parse(tmpl string) (Compiler, error) {
    36  	if !strings.HasPrefix(tmpl, "/") {
    37  		return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"}
    38  	}
    39  	tokens, verb := tokenize(tmpl[1:])
    40  
    41  	p := parser{tokens: tokens}
    42  	segs, err := p.topLevelSegments()
    43  	if err != nil {
    44  		return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()}
    45  	}
    46  
    47  	return template{
    48  		segments: segs,
    49  		verb:     verb,
    50  		template: tmpl,
    51  	}, nil
    52  }
    53  
    54  func tokenize(path string) (tokens []string, verb string) {
    55  	if path == "" {
    56  		return []string{eof}, ""
    57  	}
    58  
    59  	const (
    60  		init = iota
    61  		field
    62  		nested
    63  	)
    64  	st := init
    65  	for path != "" {
    66  		var idx int
    67  		switch st {
    68  		case init:
    69  			idx = strings.IndexAny(path, "/{")
    70  		case field:
    71  			idx = strings.IndexAny(path, ".=}")
    72  		case nested:
    73  			idx = strings.IndexAny(path, "/}")
    74  		}
    75  		if idx < 0 {
    76  			tokens = append(tokens, path)
    77  			break
    78  		}
    79  		switch r := path[idx]; r {
    80  		case '/', '.':
    81  		case '{':
    82  			st = field
    83  		case '=':
    84  			st = nested
    85  		case '}':
    86  			st = init
    87  		}
    88  		if idx == 0 {
    89  			tokens = append(tokens, path[idx:idx+1])
    90  		} else {
    91  			tokens = append(tokens, path[:idx], path[idx:idx+1])
    92  		}
    93  		path = path[idx+1:]
    94  	}
    95  
    96  	l := len(tokens)
    97  	// See
    98  	// https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ;
    99  	// although normal and backwards-compat logic here is to use the last index
   100  	// of a colon, if the final segment is a variable followed by a colon, the
   101  	// part following the colon must be a verb. Hence if the previous token is
   102  	// an end var marker, we switch the index we're looking for to Index instead
   103  	// of LastIndex, so that we correctly grab the remaining part of the path as
   104  	// the verb.
   105  	var penultimateTokenIsEndVar bool
   106  	switch l {
   107  	case 0, 1:
   108  		// Not enough to be variable so skip this logic and don't result in an
   109  		// invalid index
   110  	default:
   111  		penultimateTokenIsEndVar = tokens[l-2] == "}"
   112  	}
   113  	t := tokens[l-1]
   114  	var idx int
   115  	if penultimateTokenIsEndVar {
   116  		idx = strings.Index(t, ":")
   117  	} else {
   118  		idx = strings.LastIndex(t, ":")
   119  	}
   120  	if idx == 0 {
   121  		tokens, verb = tokens[:l-1], t[1:]
   122  	} else if idx > 0 {
   123  		tokens[l-1], verb = t[:idx], t[idx+1:]
   124  	}
   125  	tokens = append(tokens, eof)
   126  	return tokens, verb
   127  }
   128  
   129  // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto.
   130  type parser struct {
   131  	tokens   []string
   132  	accepted []string
   133  }
   134  
   135  // topLevelSegments is the target of this parser.
   136  func (p *parser) topLevelSegments() ([]segment, error) {
   137  	segs, err := p.segments()
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  	if _, err := p.accept(typeEOF); err != nil {
   142  		return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, ""))
   143  	}
   144  	return segs, nil
   145  }
   146  
   147  func (p *parser) segments() ([]segment, error) {
   148  	s, err := p.segment()
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  
   153  	segs := []segment{s}
   154  	for {
   155  		if _, err := p.accept("/"); err != nil {
   156  			return segs, nil
   157  		}
   158  		s, err := p.segment()
   159  		if err != nil {
   160  			return segs, err
   161  		}
   162  		segs = append(segs, s)
   163  	}
   164  }
   165  
   166  func (p *parser) segment() (segment, error) {
   167  	if _, err := p.accept("*"); err == nil {
   168  		return wildcard{}, nil
   169  	}
   170  	if _, err := p.accept("**"); err == nil {
   171  		return deepWildcard{}, nil
   172  	}
   173  	if l, err := p.literal(); err == nil {
   174  		return l, nil
   175  	}
   176  
   177  	v, err := p.variable()
   178  	if err != nil {
   179  		return nil, fmt.Errorf("segment neither wildcards, literal or variable: %v", err)
   180  	}
   181  	return v, err
   182  }
   183  
   184  func (p *parser) literal() (segment, error) {
   185  	lit, err := p.accept(typeLiteral)
   186  	if err != nil {
   187  		return nil, err
   188  	}
   189  	return literal(lit), nil
   190  }
   191  
   192  func (p *parser) variable() (segment, error) {
   193  	if _, err := p.accept("{"); err != nil {
   194  		return nil, err
   195  	}
   196  
   197  	path, err := p.fieldPath()
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  
   202  	var segs []segment
   203  	if _, err := p.accept("="); err == nil {
   204  		segs, err = p.segments()
   205  		if err != nil {
   206  			return nil, fmt.Errorf("invalid segment in variable %q: %v", path, err)
   207  		}
   208  	} else {
   209  		segs = []segment{wildcard{}}
   210  	}
   211  
   212  	if _, err := p.accept("}"); err != nil {
   213  		return nil, fmt.Errorf("unterminated variable segment: %s", path)
   214  	}
   215  	return variable{
   216  		path:     path,
   217  		segments: segs,
   218  	}, nil
   219  }
   220  
   221  func (p *parser) fieldPath() (string, error) {
   222  	c, err := p.accept(typeIdent)
   223  	if err != nil {
   224  		return "", err
   225  	}
   226  	components := []string{c}
   227  	for {
   228  		if _, err = p.accept("."); err != nil {
   229  			return strings.Join(components, "."), nil
   230  		}
   231  		c, err := p.accept(typeIdent)
   232  		if err != nil {
   233  			return "", fmt.Errorf("invalid field path component: %v", err)
   234  		}
   235  		components = append(components, c)
   236  	}
   237  }
   238  
   239  // A termType is a type of terminal symbols.
   240  type termType string
   241  
   242  // These constants define some of valid values of termType.
   243  // They improve readability of parse functions.
   244  //
   245  // You can also use "/", "*", "**", "." or "=" as valid values.
   246  const (
   247  	typeIdent   = termType("ident")
   248  	typeLiteral = termType("literal")
   249  	typeEOF     = termType("$")
   250  )
   251  
   252  const (
   253  	// eof is the terminal symbol which always appears at the end of token sequence.
   254  	eof = "\u0000"
   255  )
   256  
   257  // accept tries to accept a token in "p".
   258  // This function consumes a token and returns it if it matches to the specified "term".
   259  // If it doesn't match, the function does not consume any tokens and return an error.
   260  func (p *parser) accept(term termType) (string, error) {
   261  	t := p.tokens[0]
   262  	switch term {
   263  	case "/", "*", "**", ".", "=", "{", "}":
   264  		if t != string(term) && t != "/" {
   265  			return "", fmt.Errorf("expected %q but got %q", term, t)
   266  		}
   267  	case typeEOF:
   268  		if t != eof {
   269  			return "", fmt.Errorf("expected EOF but got %q", t)
   270  		}
   271  	case typeIdent:
   272  		if err := expectIdent(t); err != nil {
   273  			return "", err
   274  		}
   275  	case typeLiteral:
   276  		if err := expectPChars(t); err != nil {
   277  			return "", err
   278  		}
   279  	default:
   280  		return "", fmt.Errorf("unknown termType %q", term)
   281  	}
   282  	p.tokens = p.tokens[1:]
   283  	p.accepted = append(p.accepted, t)
   284  	return t, nil
   285  }
   286  
   287  // expectPChars determines if "t" consists of only pchars defined in RFC3986.
   288  //
   289  // https://www.ietf.org/rfc/rfc3986.txt, P.49
   290  //
   291  //	pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
   292  //	unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
   293  //	sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
   294  //	              / "*" / "+" / "," / ";" / "="
   295  //	pct-encoded   = "%" HEXDIG HEXDIG
   296  func expectPChars(t string) error {
   297  	const (
   298  		init = iota
   299  		pct1
   300  		pct2
   301  	)
   302  	st := init
   303  	for _, r := range t {
   304  		if st != init {
   305  			if !isHexDigit(r) {
   306  				return fmt.Errorf("invalid hexdigit: %c(%U)", r, r)
   307  			}
   308  			switch st {
   309  			case pct1:
   310  				st = pct2
   311  			case pct2:
   312  				st = init
   313  			}
   314  			continue
   315  		}
   316  
   317  		// unreserved
   318  		switch {
   319  		case 'A' <= r && r <= 'Z':
   320  			continue
   321  		case 'a' <= r && r <= 'z':
   322  			continue
   323  		case '0' <= r && r <= '9':
   324  			continue
   325  		}
   326  		switch r {
   327  		case '-', '.', '_', '~':
   328  			// unreserved
   329  		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
   330  			// sub-delims
   331  		case ':', '@':
   332  			// rest of pchar
   333  		case '%':
   334  			// pct-encoded
   335  			st = pct1
   336  		default:
   337  			return fmt.Errorf("invalid character in path segment: %q(%U)", r, r)
   338  		}
   339  	}
   340  	if st != init {
   341  		return fmt.Errorf("invalid percent-encoding in %q", t)
   342  	}
   343  	return nil
   344  }
   345  
   346  // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*).
   347  func expectIdent(ident string) error {
   348  	if ident == "" {
   349  		return fmt.Errorf("empty identifier")
   350  	}
   351  	for pos, r := range ident {
   352  		switch {
   353  		case '0' <= r && r <= '9':
   354  			if pos == 0 {
   355  				return fmt.Errorf("identifier starting with digit: %s", ident)
   356  			}
   357  			continue
   358  		case 'A' <= r && r <= 'Z':
   359  			continue
   360  		case 'a' <= r && r <= 'z':
   361  			continue
   362  		case r == '_':
   363  			continue
   364  		default:
   365  			return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident)
   366  		}
   367  	}
   368  	return nil
   369  }
   370  
   371  func isHexDigit(r rune) bool {
   372  	switch {
   373  	case '0' <= r && r <= '9':
   374  		return true
   375  	case 'A' <= r && r <= 'F':
   376  		return true
   377  	case 'a' <= r && r <= 'f':
   378  		return true
   379  	}
   380  	return false
   381  }