github.com/grpc-ecosystem/grpc-gateway/v2@v2.19.1/internal/httprule/parse.go (about) 1 package httprule 2 3 import ( 4 "errors" 5 "fmt" 6 "strings" 7 ) 8 9 // InvalidTemplateError indicates that the path template is not valid. 10 type InvalidTemplateError struct { 11 tmpl string 12 msg string 13 } 14 15 func (e InvalidTemplateError) Error() string { 16 return fmt.Sprintf("%s: %s", e.msg, e.tmpl) 17 } 18 19 // Parse parses the string representation of path template 20 func Parse(tmpl string) (Compiler, error) { 21 if !strings.HasPrefix(tmpl, "/") { 22 return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"} 23 } 24 tokens, verb := tokenize(tmpl[1:]) 25 26 p := parser{tokens: tokens} 27 segs, err := p.topLevelSegments() 28 if err != nil { 29 return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()} 30 } 31 32 return template{ 33 segments: segs, 34 verb: verb, 35 template: tmpl, 36 }, nil 37 } 38 39 func tokenize(path string) (tokens []string, verb string) { 40 if path == "" { 41 return []string{eof}, "" 42 } 43 44 const ( 45 init = iota 46 field 47 nested 48 ) 49 st := init 50 for path != "" { 51 var idx int 52 switch st { 53 case init: 54 idx = strings.IndexAny(path, "/{") 55 case field: 56 idx = strings.IndexAny(path, ".=}") 57 case nested: 58 idx = strings.IndexAny(path, "/}") 59 } 60 if idx < 0 { 61 tokens = append(tokens, path) 62 break 63 } 64 switch r := path[idx]; r { 65 case '/', '.': 66 case '{': 67 st = field 68 case '=': 69 st = nested 70 case '}': 71 st = init 72 } 73 if idx == 0 { 74 tokens = append(tokens, path[idx:idx+1]) 75 } else { 76 tokens = append(tokens, path[:idx], path[idx:idx+1]) 77 } 78 path = path[idx+1:] 79 } 80 81 l := len(tokens) 82 // See 83 // https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ; 84 // although normal and backwards-compat logic here is to use the last index 85 // of a colon, if the final segment is a variable followed by a colon, the 86 // part following the colon must be a verb. Hence if the previous token is 87 // an end var marker, we switch the index we're looking for to Index instead 88 // of LastIndex, so that we correctly grab the remaining part of the path as 89 // the verb. 90 var penultimateTokenIsEndVar bool 91 switch l { 92 case 0, 1: 93 // Not enough to be variable so skip this logic and don't result in an 94 // invalid index 95 default: 96 penultimateTokenIsEndVar = tokens[l-2] == "}" 97 } 98 t := tokens[l-1] 99 var idx int 100 if penultimateTokenIsEndVar { 101 idx = strings.Index(t, ":") 102 } else { 103 idx = strings.LastIndex(t, ":") 104 } 105 if idx == 0 { 106 tokens, verb = tokens[:l-1], t[1:] 107 } else if idx > 0 { 108 tokens[l-1], verb = t[:idx], t[idx+1:] 109 } 110 tokens = append(tokens, eof) 111 return tokens, verb 112 } 113 114 // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto. 115 type parser struct { 116 tokens []string 117 accepted []string 118 } 119 120 // topLevelSegments is the target of this parser. 121 func (p *parser) topLevelSegments() ([]segment, error) { 122 if _, err := p.accept(typeEOF); err == nil { 123 p.tokens = p.tokens[:0] 124 return []segment{literal(eof)}, nil 125 } 126 segs, err := p.segments() 127 if err != nil { 128 return nil, err 129 } 130 if _, err := p.accept(typeEOF); err != nil { 131 return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, "")) 132 } 133 return segs, nil 134 } 135 136 func (p *parser) segments() ([]segment, error) { 137 s, err := p.segment() 138 if err != nil { 139 return nil, err 140 } 141 142 segs := []segment{s} 143 for { 144 if _, err := p.accept("/"); err != nil { 145 return segs, nil 146 } 147 s, err := p.segment() 148 if err != nil { 149 return segs, err 150 } 151 segs = append(segs, s) 152 } 153 } 154 155 func (p *parser) segment() (segment, error) { 156 if _, err := p.accept("*"); err == nil { 157 return wildcard{}, nil 158 } 159 if _, err := p.accept("**"); err == nil { 160 return deepWildcard{}, nil 161 } 162 if l, err := p.literal(); err == nil { 163 return l, nil 164 } 165 166 v, err := p.variable() 167 if err != nil { 168 return nil, fmt.Errorf("segment neither wildcards, literal or variable: %w", err) 169 } 170 return v, nil 171 } 172 173 func (p *parser) literal() (segment, error) { 174 lit, err := p.accept(typeLiteral) 175 if err != nil { 176 return nil, err 177 } 178 return literal(lit), nil 179 } 180 181 func (p *parser) variable() (segment, error) { 182 if _, err := p.accept("{"); err != nil { 183 return nil, err 184 } 185 186 path, err := p.fieldPath() 187 if err != nil { 188 return nil, err 189 } 190 191 var segs []segment 192 if _, err := p.accept("="); err == nil { 193 segs, err = p.segments() 194 if err != nil { 195 return nil, fmt.Errorf("invalid segment in variable %q: %w", path, err) 196 } 197 } else { 198 segs = []segment{wildcard{}} 199 } 200 201 if _, err := p.accept("}"); err != nil { 202 return nil, fmt.Errorf("unterminated variable segment: %s", path) 203 } 204 return variable{ 205 path: path, 206 segments: segs, 207 }, nil 208 } 209 210 func (p *parser) fieldPath() (string, error) { 211 c, err := p.accept(typeIdent) 212 if err != nil { 213 return "", err 214 } 215 components := []string{c} 216 for { 217 if _, err := p.accept("."); err != nil { 218 return strings.Join(components, "."), nil 219 } 220 c, err := p.accept(typeIdent) 221 if err != nil { 222 return "", fmt.Errorf("invalid field path component: %w", err) 223 } 224 components = append(components, c) 225 } 226 } 227 228 // A termType is a type of terminal symbols. 229 type termType string 230 231 // These constants define some of valid values of termType. 232 // They improve readability of parse functions. 233 // 234 // You can also use "/", "*", "**", "." or "=" as valid values. 235 const ( 236 typeIdent = termType("ident") 237 typeLiteral = termType("literal") 238 typeEOF = termType("$") 239 ) 240 241 // eof is the terminal symbol which always appears at the end of token sequence. 242 const eof = "\u0000" 243 244 // accept tries to accept a token in "p". 245 // This function consumes a token and returns it if it matches to the specified "term". 246 // If it doesn't match, the function does not consume any tokens and return an error. 247 func (p *parser) accept(term termType) (string, error) { 248 t := p.tokens[0] 249 switch term { 250 case "/", "*", "**", ".", "=", "{", "}": 251 if t != string(term) && t != "/" { 252 return "", fmt.Errorf("expected %q but got %q", term, t) 253 } 254 case typeEOF: 255 if t != eof { 256 return "", fmt.Errorf("expected EOF but got %q", t) 257 } 258 case typeIdent: 259 if err := expectIdent(t); err != nil { 260 return "", err 261 } 262 case typeLiteral: 263 if err := expectPChars(t); err != nil { 264 return "", err 265 } 266 default: 267 return "", fmt.Errorf("unknown termType %q", term) 268 } 269 p.tokens = p.tokens[1:] 270 p.accepted = append(p.accepted, t) 271 return t, nil 272 } 273 274 // expectPChars determines if "t" consists of only pchars defined in RFC3986. 275 // 276 // https://www.ietf.org/rfc/rfc3986.txt, P.49 277 // 278 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 279 // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 280 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 281 // / "*" / "+" / "," / ";" / "=" 282 // pct-encoded = "%" HEXDIG HEXDIG 283 func expectPChars(t string) error { 284 const ( 285 init = iota 286 pct1 287 pct2 288 ) 289 st := init 290 for _, r := range t { 291 if st != init { 292 if !isHexDigit(r) { 293 return fmt.Errorf("invalid hexdigit: %c(%U)", r, r) 294 } 295 switch st { 296 case pct1: 297 st = pct2 298 case pct2: 299 st = init 300 } 301 continue 302 } 303 304 // unreserved 305 switch { 306 case 'A' <= r && r <= 'Z': 307 continue 308 case 'a' <= r && r <= 'z': 309 continue 310 case '0' <= r && r <= '9': 311 continue 312 } 313 switch r { 314 case '-', '.', '_', '~': 315 // unreserved 316 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': 317 // sub-delims 318 case ':', '@': 319 // rest of pchar 320 case '%': 321 // pct-encoded 322 st = pct1 323 default: 324 return fmt.Errorf("invalid character in path segment: %q(%U)", r, r) 325 } 326 } 327 if st != init { 328 return fmt.Errorf("invalid percent-encoding in %q", t) 329 } 330 return nil 331 } 332 333 // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*). 334 func expectIdent(ident string) error { 335 if ident == "" { 336 return errors.New("empty identifier") 337 } 338 for pos, r := range ident { 339 switch { 340 case '0' <= r && r <= '9': 341 if pos == 0 { 342 return fmt.Errorf("identifier starting with digit: %s", ident) 343 } 344 continue 345 case 'A' <= r && r <= 'Z': 346 continue 347 case 'a' <= r && r <= 'z': 348 continue 349 case r == '_': 350 continue 351 default: 352 return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident) 353 } 354 } 355 return nil 356 } 357 358 func isHexDigit(r rune) bool { 359 switch { 360 case '0' <= r && r <= '9': 361 return true 362 case 'A' <= r && r <= 'F': 363 return true 364 case 'a' <= r && r <= 'f': 365 return true 366 } 367 return false 368 }