trpc.group/trpc-go/trpc-go@v1.0.3/internal/httprule/parse.go (about) 1 // 2 // 3 // Tencent is pleased to support the open source community by making tRPC available. 4 // 5 // Copyright (C) 2023 THL A29 Limited, a Tencent company. 6 // All rights reserved. 7 // 8 // If you have downloaded a copy of the tRPC source code from Tencent, 9 // please note that tRPC source code is licensed under the Apache 2.0 License, 10 // A copy of the Apache 2.0 License is included in this file. 11 // 12 // 13 14 package httprule 15 16 import ( 17 "bytes" 18 "errors" 19 "fmt" 20 "strings" 21 ) 22 23 const ( 24 invalidChar = byte(0) 25 ) 26 27 var ( 28 errParserInternal = errors.New("parser internal error") 29 errEmptyLiteral = errors.New("empty literal is not allowed") 30 errInitialCharAlpha = errors.New("initial char of identifier not alpha") 31 errEmptyIdent = errors.New("empty identifier") 32 errNestedVar = errors.New("nested variables are not allowed") 33 errDeepWildcard = errors.New("deep wildcard must be the last segment") 34 errDupFieldPath = errors.New("dup field path") 35 errLeadingSlash = errors.New("leading slash required") 36 ) 37 38 // parser is the template parser. 39 type parser struct { 40 urlPath string // the complete httprule URL path. 41 curr int // current pointer position. 42 } 43 44 // Parse parses the httprule URL path into template. 45 func Parse(urlPath string) (*PathTemplate, error) { 46 p := &parser{ 47 urlPath: urlPath, 48 } 49 50 tpl, err := p.parse() 51 if err != nil { 52 return nil, fmt.Errorf("failed to parse url path %s to template: %w, curr: %d", urlPath, err, p.curr) 53 } 54 55 return tpl, nil 56 } 57 58 // parse begins parsing. 59 func (p *parser) parse() (*PathTemplate, error) { 60 // should start with '/'. 61 if err := p.consume('/'); err != nil { 62 return nil, err 63 } 64 65 // parse segments. 66 segments, err := p.parseSegments() 67 if err != nil { 68 return nil, err 69 } 70 // parse verb. 71 var verb string 72 // If the last segment is of type literal, then verb has already been included. 73 // Find the last position of ':' in the literal. 74 lastSegment := segments[len(segments)-1] 75 if lastSegment.kind() == kindLiteral { 76 s := lastSegment.String() 77 idx := strings.LastIndex(s, ":") 78 if idx > 0 { 79 verb = s[idx+1:] 80 segments[len(segments)-1] = literal(s[:idx]) 81 } 82 } else { 83 if err := p.consume(':'); err == nil { 84 verb, err = p.parseVerb() 85 if err != nil { 86 return nil, err 87 } 88 } 89 } 90 91 // check whether parsing is completed. 92 if !p.done() { 93 return nil, errParserInternal 94 } 95 96 // validate. 97 tpl := &PathTemplate{ 98 segments: segments, 99 verb: verb, 100 } 101 if err := p.validate(tpl); err != nil { 102 return nil, err 103 } 104 105 return tpl, nil 106 } 107 108 // validate validates the template: 109 // 1. whether has nested variables 110 // 2. whether ** is the last segment 111 // 3. whether exists duplicate variable names 112 func (p *parser) validate(tpl *PathTemplate) error { 113 m := make(map[string]bool) // save duplicate variable names 114 115 for i, segment := range tpl.segments { 116 // If it is of type variable, first check whether it is duplicated, 117 // then check its nested segments: 118 // 1. whether has nested variables 119 // 2. if i != len(tpl.segments) - 1, then nested variables should not have ** 120 // 3. if i == len(tpl.segments) - 1, then ** has to be the last nested variable 121 if segment.kind() == kindVariable { 122 // check duplication 123 s := strings.Join(segment.fieldPath(), ".") 124 if m[s] { 125 return errDupFieldPath 126 } 127 m[s] = true 128 129 // check nested segments. 130 nestedSegments := segment.nestedSegments() 131 for j, nestedSegment := range nestedSegments { 132 // nested segment is of kind variable. 133 if nestedSegment.kind() == kindVariable { 134 return errNestedVar 135 } 136 137 // If i != len(tpl.segments) - 1, then nested variables should not have **. 138 if i != len(tpl.segments)-1 && nestedSegment.kind() == kindDeepWildcard { 139 return errDeepWildcard 140 } 141 142 // If i == len(tpl.segments) - 1, then ** has to be the last nested variable. 143 if i == len(tpl.segments)-1 && j != len(nestedSegments)-1 && 144 nestedSegment.kind() == kindDeepWildcard { 145 return errDeepWildcard 146 } 147 } 148 } 149 150 // It is illegal if ** does not appear as the last segment. 151 if i != len(tpl.segments)-1 && segment.kind() == kindDeepWildcard { 152 return errDeepWildcard 153 } 154 } 155 156 return nil 157 } 158 159 // parseSegments parses segments. 160 func (p *parser) parseSegments() ([]segment, error) { 161 // at lease has one segment. 162 seg, err := p.parseSegment() 163 if err != nil { 164 return nil, err 165 } 166 167 result := []segment{seg} 168 169 if err := p.consume('/'); err == nil { 170 // parse segments recursively. 171 segs, err := p.parseSegments() 172 if err != nil { 173 return nil, err 174 } 175 result = append(result, segs...) 176 } 177 178 return result, nil 179 } 180 181 // parseVerb parses verb. 182 func (p *parser) parseVerb() (string, error) { 183 return p.parseLiteral() 184 } 185 186 // parseSegment parses a single segment. 187 func (p *parser) parseSegment() (segment, error) { 188 switch p.currentChar() { 189 case invalidChar: 190 return nil, errParserInternal 191 case '*': 192 if p.peekN(1) == '*' { 193 p.curr++ 194 p.curr++ 195 return deepWildcard{}, nil 196 } 197 p.curr++ 198 return wildcard{}, nil 199 case '{': 200 return p.parseVariableSegment() 201 default: 202 return p.parseLiteralSegment() 203 } 204 } 205 206 // parseLiteral parses literal type. 207 // https://www.ietf.org/rfc/rfc3986.txt, P.49 208 // 209 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 210 // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 211 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 212 // / "*" / "+" / "," / ";" / "=" 213 // pct-encoded = "%" HEXDIG HEXDIG 214 func (p *parser) parseLiteral() (string, error) { 215 lit := bytes.Buffer{} 216 217 for { 218 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 219 if isUnreserved(rune(p.currentChar())) || isSubDelims(rune(p.currentChar())) || 220 p.currentChar() == '@' || p.currentChar() == ':' { 221 lit.WriteByte(p.currentChar()) 222 p.curr++ 223 continue 224 } else if isPCTEncoded(rune(p.currentChar()), rune(p.peekN(1)), rune(p.peekN(2))) { 225 lit.WriteByte(p.currentChar()) 226 p.curr++ 227 lit.WriteByte(p.currentChar()) 228 p.curr++ 229 lit.WriteByte(p.currentChar()) 230 p.curr++ 231 continue 232 } else { 233 break 234 } 235 } 236 237 // empty literal. 238 if lit.Len() == 0 { 239 return "", errEmptyLiteral 240 } 241 242 return lit.String(), nil 243 } 244 245 // parseLiteralSegment parses literal segment. 246 func (p *parser) parseLiteralSegment() (segment, error) { 247 lit, err := p.parseLiteral() 248 if err != nil { 249 return nil, err 250 } 251 return literal(lit), nil 252 } 253 254 // parseVariableSegment parses variable segment. 255 func (p *parser) parseVariableSegment() (segment, error) { 256 var v variable 257 258 // variable must start with '{'. 259 if err := p.consume('{'); err != nil { 260 return nil, err 261 } 262 263 // parse fieldPath. 264 fieldPath, err := p.parseFieldPath() 265 if err != nil { 266 return nil, err 267 } 268 v.fp = fieldPath 269 270 // check whether has segments. 271 if err := p.consume('='); err == nil { 272 segments, err := p.parseSegments() 273 if err != nil { 274 return nil, err 275 } 276 v.segments = segments 277 } else { // no segments, defaults to *. 278 v.segments = []segment{wildcard{}} 279 } 280 281 // variable must end with '}'. 282 if err := p.consume('}'); err != nil { 283 return nil, err 284 } 285 286 return v, nil 287 } 288 289 // parseFieldPath parses field path. 290 func (p *parser) parseFieldPath() ([]string, error) { 291 // at least has one ident. 292 ident, err := p.parseIdent() 293 if err != nil { 294 return nil, err 295 } 296 297 result := []string{ident} 298 299 if err := p.consume('.'); err == nil { 300 // parse fieldPath recursively. 301 fp, err := p.parseFieldPath() 302 if err != nil { 303 return nil, err 304 } 305 result = append(result, fp...) 306 } 307 return result, nil 308 } 309 310 // parseIdent parses ident, the valid format of ident is ([[:alpha:]_][[:alphanum:]_]*). 311 func (p *parser) parseIdent() (string, error) { 312 ident := bytes.Buffer{} 313 314 for { 315 if ident.Len() == 0 && !isAlpha(rune(p.currentChar())) { 316 return "", errInitialCharAlpha 317 } 318 if isAlpha(rune(p.currentChar())) || isDigit(rune(p.currentChar())) || p.currentChar() == '_' { 319 ident.WriteByte(p.currentChar()) 320 p.curr++ 321 continue 322 } 323 break 324 } 325 326 // empty ident. 327 if ident.Len() == 0 { 328 return "", errEmptyIdent 329 } 330 return ident.String(), nil 331 } 332 333 func (p *parser) done() bool { 334 return p.curr >= len(p.urlPath) 335 } 336 337 func (p *parser) currentChar() byte { 338 if p.done() { 339 return invalidChar 340 } 341 return p.urlPath[p.curr] 342 } 343 344 // consume consumes the given character. 345 func (p *parser) consume(c byte) error { 346 if p.currentChar() == c { 347 p.curr++ 348 return nil 349 } 350 return fmt.Errorf("failed to consume `%c`", c) 351 } 352 353 // peekN gets the character at position p.curr+n. 354 func (p *parser) peekN(n int) byte { 355 peekIdx := p.curr + n 356 if peekIdx < len(p.urlPath) { 357 return p.urlPath[peekIdx] 358 } 359 return invalidChar 360 } 361 362 // isUnreserved checks whether the given rune is of type unreserved. 363 func isUnreserved(r rune) bool { 364 if isAlpha(r) || isDigit(r) { 365 return true 366 } 367 switch r { 368 case '-', '.', '_', '~': 369 return true 370 default: 371 return false 372 } 373 } 374 375 func isAlpha(r rune) bool { 376 return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z') 377 } 378 379 func isDigit(r rune) bool { 380 return '0' <= r && r <= '9' 381 } 382 383 func isSubDelims(r rune) bool { 384 switch r { 385 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': 386 return true 387 default: 388 return false 389 } 390 } 391 392 func isPCTEncoded(r1, r2, r3 rune) bool { 393 return r1 == '%' && isHexDigit(r2) && isHexDigit(r3) 394 } 395 396 func isHexDigit(r rune) bool { 397 switch { 398 case '0' <= r && r <= '9': 399 return true 400 case 'A' <= r && r <= 'F': 401 return true 402 case 'a' <= r && r <= 'f': 403 return true 404 default: 405 return false 406 } 407 }