github.com/erda-project/erda-infra@v1.0.10-0.20240327085753-f3a249292aeb/pkg/transport/http/httprule/parse.go (about) 1 // Copyright (c) 2021 Terminus, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Reference: https://github.com/grpc-ecosystem/grpc-gateway/blob/v2.3.0/internal/httprule/parse.go 16 17 package httprule 18 19 import ( 20 "fmt" 21 "strings" 22 ) 23 24 // InvalidTemplateError indicates that the path template is not valid. 25 type InvalidTemplateError struct { 26 tmpl string 27 msg string 28 } 29 30 func (e InvalidTemplateError) Error() string { 31 return fmt.Sprintf("%s: %s", e.msg, e.tmpl) 32 } 33 34 // Parse parses the string representation of path template 35 func Parse(tmpl string) (Compiler, error) { 36 if !strings.HasPrefix(tmpl, "/") { 37 return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"} 38 } 39 tokens, verb := tokenize(tmpl[1:]) 40 41 p := parser{tokens: tokens} 42 segs, err := p.topLevelSegments() 43 if err != nil { 44 return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()} 45 } 46 47 return template{ 48 segments: segs, 49 verb: verb, 50 template: tmpl, 51 }, nil 52 } 53 54 func tokenize(path string) (tokens []string, verb string) { 55 if path == "" { 56 return []string{eof}, "" 57 } 58 59 const ( 60 init = iota 61 field 62 nested 63 ) 64 st := init 65 for path != "" { 66 var idx int 67 switch st { 68 case init: 69 idx = strings.IndexAny(path, "/{") 70 case field: 71 idx = strings.IndexAny(path, ".=}") 72 case nested: 73 idx = strings.IndexAny(path, "/}") 74 } 75 if idx < 0 { 76 tokens = append(tokens, path) 77 break 78 } 79 switch r := path[idx]; r { 80 case '/', '.': 81 case '{': 82 st = field 83 case '=': 84 st = nested 85 case '}': 86 st = init 87 } 88 if idx == 0 { 89 tokens = append(tokens, path[idx:idx+1]) 90 } else { 91 tokens = append(tokens, path[:idx], path[idx:idx+1]) 92 } 93 path = path[idx+1:] 94 } 95 96 l := len(tokens) 97 // See 98 // https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ; 99 // although normal and backwards-compat logic here is to use the last index 100 // of a colon, if the final segment is a variable followed by a colon, the 101 // part following the colon must be a verb. Hence if the previous token is 102 // an end var marker, we switch the index we're looking for to Index instead 103 // of LastIndex, so that we correctly grab the remaining part of the path as 104 // the verb. 105 var penultimateTokenIsEndVar bool 106 switch l { 107 case 0, 1: 108 // Not enough to be variable so skip this logic and don't result in an 109 // invalid index 110 default: 111 penultimateTokenIsEndVar = tokens[l-2] == "}" 112 } 113 t := tokens[l-1] 114 var idx int 115 if penultimateTokenIsEndVar { 116 idx = strings.Index(t, ":") 117 } else { 118 idx = strings.LastIndex(t, ":") 119 } 120 if idx == 0 { 121 tokens, verb = tokens[:l-1], t[1:] 122 } else if idx > 0 { 123 tokens[l-1], verb = t[:idx], t[idx+1:] 124 } 125 tokens = append(tokens, eof) 126 return tokens, verb 127 } 128 129 // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto. 130 type parser struct { 131 tokens []string 132 accepted []string 133 } 134 135 // topLevelSegments is the target of this parser. 136 func (p *parser) topLevelSegments() ([]segment, error) { 137 segs, err := p.segments() 138 if err != nil { 139 return nil, err 140 } 141 if _, err := p.accept(typeEOF); err != nil { 142 return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, "")) 143 } 144 return segs, nil 145 } 146 147 func (p *parser) segments() ([]segment, error) { 148 s, err := p.segment() 149 if err != nil { 150 return nil, err 151 } 152 153 segs := []segment{s} 154 for { 155 if _, err := p.accept("/"); err != nil { 156 return segs, nil 157 } 158 s, err := p.segment() 159 if err != nil { 160 return segs, err 161 } 162 segs = append(segs, s) 163 } 164 } 165 166 func (p *parser) segment() (segment, error) { 167 if _, err := p.accept("*"); err == nil { 168 return wildcard{}, nil 169 } 170 if _, err := p.accept("**"); err == nil { 171 return deepWildcard{}, nil 172 } 173 if l, err := p.literal(); err == nil { 174 return l, nil 175 } 176 177 v, err := p.variable() 178 if err != nil { 179 return nil, fmt.Errorf("segment neither wildcards, literal or variable: %v", err) 180 } 181 return v, err 182 } 183 184 func (p *parser) literal() (segment, error) { 185 lit, err := p.accept(typeLiteral) 186 if err != nil { 187 return nil, err 188 } 189 return literal(lit), nil 190 } 191 192 func (p *parser) variable() (segment, error) { 193 if _, err := p.accept("{"); err != nil { 194 return nil, err 195 } 196 197 path, err := p.fieldPath() 198 if err != nil { 199 return nil, err 200 } 201 202 var segs []segment 203 if _, err := p.accept("="); err == nil { 204 segs, err = p.segments() 205 if err != nil { 206 return nil, fmt.Errorf("invalid segment in variable %q: %v", path, err) 207 } 208 } else { 209 segs = []segment{wildcard{}} 210 } 211 212 if _, err := p.accept("}"); err != nil { 213 return nil, fmt.Errorf("unterminated variable segment: %s", path) 214 } 215 return variable{ 216 path: path, 217 segments: segs, 218 }, nil 219 } 220 221 func (p *parser) fieldPath() (string, error) { 222 c, err := p.accept(typeIdent) 223 if err != nil { 224 return "", err 225 } 226 components := []string{c} 227 for { 228 if _, err = p.accept("."); err != nil { 229 return strings.Join(components, "."), nil 230 } 231 c, err := p.accept(typeIdent) 232 if err != nil { 233 return "", fmt.Errorf("invalid field path component: %v", err) 234 } 235 components = append(components, c) 236 } 237 } 238 239 // A termType is a type of terminal symbols. 240 type termType string 241 242 // These constants define some of valid values of termType. 243 // They improve readability of parse functions. 244 // 245 // You can also use "/", "*", "**", "." or "=" as valid values. 246 const ( 247 typeIdent = termType("ident") 248 typeLiteral = termType("literal") 249 typeEOF = termType("$") 250 ) 251 252 const ( 253 // eof is the terminal symbol which always appears at the end of token sequence. 254 eof = "\u0000" 255 ) 256 257 // accept tries to accept a token in "p". 258 // This function consumes a token and returns it if it matches to the specified "term". 259 // If it doesn't match, the function does not consume any tokens and return an error. 260 func (p *parser) accept(term termType) (string, error) { 261 t := p.tokens[0] 262 switch term { 263 case "/", "*", "**", ".", "=", "{", "}": 264 if t != string(term) && t != "/" { 265 return "", fmt.Errorf("expected %q but got %q", term, t) 266 } 267 case typeEOF: 268 if t != eof { 269 return "", fmt.Errorf("expected EOF but got %q", t) 270 } 271 case typeIdent: 272 if err := expectIdent(t); err != nil { 273 return "", err 274 } 275 case typeLiteral: 276 if err := expectPChars(t); err != nil { 277 return "", err 278 } 279 default: 280 return "", fmt.Errorf("unknown termType %q", term) 281 } 282 p.tokens = p.tokens[1:] 283 p.accepted = append(p.accepted, t) 284 return t, nil 285 } 286 287 // expectPChars determines if "t" consists of only pchars defined in RFC3986. 288 // 289 // https://www.ietf.org/rfc/rfc3986.txt, P.49 290 // 291 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 292 // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 293 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 294 // / "*" / "+" / "," / ";" / "=" 295 // pct-encoded = "%" HEXDIG HEXDIG 296 func expectPChars(t string) error { 297 const ( 298 init = iota 299 pct1 300 pct2 301 ) 302 st := init 303 for _, r := range t { 304 if st != init { 305 if !isHexDigit(r) { 306 return fmt.Errorf("invalid hexdigit: %c(%U)", r, r) 307 } 308 switch st { 309 case pct1: 310 st = pct2 311 case pct2: 312 st = init 313 } 314 continue 315 } 316 317 // unreserved 318 switch { 319 case 'A' <= r && r <= 'Z': 320 continue 321 case 'a' <= r && r <= 'z': 322 continue 323 case '0' <= r && r <= '9': 324 continue 325 } 326 switch r { 327 case '-', '.', '_', '~': 328 // unreserved 329 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': 330 // sub-delims 331 case ':', '@': 332 // rest of pchar 333 case '%': 334 // pct-encoded 335 st = pct1 336 default: 337 return fmt.Errorf("invalid character in path segment: %q(%U)", r, r) 338 } 339 } 340 if st != init { 341 return fmt.Errorf("invalid percent-encoding in %q", t) 342 } 343 return nil 344 } 345 346 // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*). 347 func expectIdent(ident string) error { 348 if ident == "" { 349 return fmt.Errorf("empty identifier") 350 } 351 for pos, r := range ident { 352 switch { 353 case '0' <= r && r <= '9': 354 if pos == 0 { 355 return fmt.Errorf("identifier starting with digit: %s", ident) 356 } 357 continue 358 case 'A' <= r && r <= 'Z': 359 continue 360 case 'a' <= r && r <= 'z': 361 continue 362 case r == '_': 363 continue 364 default: 365 return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident) 366 } 367 } 368 return nil 369 } 370 371 func isHexDigit(r rune) bool { 372 switch { 373 case '0' <= r && r <= '9': 374 return true 375 case 'A' <= r && r <= 'F': 376 return true 377 case 'a' <= r && r <= 'f': 378 return true 379 } 380 return false 381 }