github.com/cloudwego/hertz@v0.9.3/pkg/protocol/uri.go (about) 1 /* 2 * Copyright 2022 CloudWeGo Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 * The MIT License (MIT) 17 * 18 * Copyright (c) 2015-present Aliaksandr Valialkin, VertaMedia, Kirill Danshin, Erik Dubbelboer, FastHTTP Authors 19 * 20 * Permission is hereby granted, free of charge, to any person obtaining a copy 21 * of this software and associated documentation files (the "Software"), to deal 22 * in the Software without restriction, including without limitation the rights 23 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 24 * copies of the Software, and to permit persons to whom the Software is 25 * furnished to do so, subject to the following conditions: 26 * 27 * The above copyright notice and this permission notice shall be included in 28 * all copies or substantial portions of the Software. 29 * 30 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 31 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 32 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 33 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 34 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 35 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 36 * THE SOFTWARE. 37 * 38 * This file may have been modified by CloudWeGo authors. All CloudWeGo 39 * Modifications are Copyright 2022 CloudWeGo Authors. 40 */ 41 42 package protocol 43 44 import ( 45 "bytes" 46 "path/filepath" 47 "sync" 48 49 "github.com/cloudwego/hertz/internal/bytesconv" 50 "github.com/cloudwego/hertz/internal/bytestr" 51 "github.com/cloudwego/hertz/internal/nocopy" 52 ) 53 54 // AcquireURI returns an empty URI instance from the pool. 55 // 56 // Release the URI with ReleaseURI after the URI is no longer needed. 57 // This allows reducing GC load. 58 func AcquireURI() *URI { 59 return uriPool.Get().(*URI) 60 } 61 62 // ReleaseURI releases the URI acquired via AcquireURI. 63 // 64 // The released URI mustn't be used after releasing it, otherwise data races 65 // may occur. 66 func ReleaseURI(u *URI) { 67 u.Reset() 68 uriPool.Put(u) 69 } 70 71 var uriPool = &sync.Pool{ 72 New: func() interface{} { 73 return &URI{} 74 }, 75 } 76 77 type URI struct { 78 noCopy nocopy.NoCopy //lint:ignore U1000 until noCopy is used 79 80 pathOriginal []byte 81 scheme []byte 82 path []byte 83 queryString []byte 84 hash []byte 85 host []byte 86 87 queryArgs Args 88 parsedQueryArgs bool 89 90 DisablePathNormalizing bool 91 92 fullURI []byte 93 requestURI []byte 94 95 username []byte 96 password []byte 97 } 98 99 type argsKV struct { 100 key []byte 101 value []byte 102 noValue bool 103 } 104 105 func (kv *argsKV) GetKey() []byte { 106 return kv.key 107 } 108 109 func (kv *argsKV) GetValue() []byte { 110 return kv.value 111 } 112 113 // CopyTo copies uri contents to dst. 114 func (u *URI) CopyTo(dst *URI) { 115 dst.Reset() 116 dst.pathOriginal = append(dst.pathOriginal[:0], u.pathOriginal...) 117 dst.scheme = append(dst.scheme[:0], u.scheme...) 118 dst.path = append(dst.path[:0], u.path...) 119 dst.queryString = append(dst.queryString[:0], u.queryString...) 120 dst.hash = append(dst.hash[:0], u.hash...) 121 dst.host = append(dst.host[:0], u.host...) 122 dst.username = append(dst.username[:0], u.username...) 123 dst.password = append(dst.password[:0], u.password...) 124 125 u.queryArgs.CopyTo(&dst.queryArgs) 126 dst.parsedQueryArgs = u.parsedQueryArgs 127 dst.DisablePathNormalizing = u.DisablePathNormalizing 128 129 // fullURI and requestURI shouldn't be copied, since they are created 130 // from scratch on each FullURI() and RequestURI() call. 131 } 132 133 // QueryArgs returns query args. 134 func (u *URI) QueryArgs() *Args { 135 u.parseQueryArgs() 136 return &u.queryArgs 137 } 138 139 func (u *URI) parseQueryArgs() { 140 if u.parsedQueryArgs { 141 return 142 } 143 u.queryArgs.ParseBytes(u.queryString) 144 u.parsedQueryArgs = true 145 } 146 147 // Hash returns URI hash, i.e. qwe of http://aaa.com/foo/bar?baz=123#qwe . 148 // 149 // The returned value is valid until the next URI method call. 150 func (u *URI) Hash() []byte { 151 return u.hash 152 } 153 154 // SetHash sets URI hash. 155 func (u *URI) SetHash(hash string) { 156 u.hash = append(u.hash[:0], hash...) 157 } 158 159 // SetHashBytes sets URI hash. 160 func (u *URI) SetHashBytes(hash []byte) { 161 u.hash = append(u.hash[:0], hash...) 162 } 163 164 // Username returns URI username 165 func (u *URI) Username() []byte { 166 return u.username 167 } 168 169 // SetUsername sets URI username. 170 func (u *URI) SetUsername(username string) { 171 u.username = append(u.username[:0], username...) 172 } 173 174 // SetUsernameBytes sets URI username. 175 func (u *URI) SetUsernameBytes(username []byte) { 176 u.username = append(u.username[:0], username...) 177 } 178 179 // Password returns URI password 180 func (u *URI) Password() []byte { 181 return u.password 182 } 183 184 // SetPassword sets URI password. 185 func (u *URI) SetPassword(password string) { 186 u.password = append(u.password[:0], password...) 187 } 188 189 // SetPasswordBytes sets URI password. 190 func (u *URI) SetPasswordBytes(password []byte) { 191 u.password = append(u.password[:0], password...) 192 } 193 194 // QueryString returns URI query string, 195 // i.e. baz=123 of http://aaa.com/foo/bar?baz=123#qwe . 196 // 197 // The returned value is valid until the next URI method call. 198 func (u *URI) QueryString() []byte { 199 return u.queryString 200 } 201 202 // SetQueryString sets URI query string. 203 func (u *URI) SetQueryString(queryString string) { 204 u.queryString = append(u.queryString[:0], queryString...) 205 u.parsedQueryArgs = false 206 } 207 208 // SetQueryStringBytes sets URI query string. 209 func (u *URI) SetQueryStringBytes(queryString []byte) { 210 u.queryString = append(u.queryString[:0], queryString...) 211 u.parsedQueryArgs = false 212 } 213 214 // Path returns URI path, i.e. /foo/bar of http://aaa.com/foo/bar?baz=123#qwe . 215 // 216 // The returned path is always urldecoded and normalized, 217 // i.e. '//f%20obar/baz/../zzz' becomes '/f obar/zzz'. 218 // 219 // The returned value is valid until the next URI method call. 220 func (u *URI) Path() []byte { 221 path := u.path 222 if len(path) == 0 { 223 path = bytestr.StrSlash 224 } 225 return path 226 } 227 228 // SetPath sets URI path. 229 func (u *URI) SetPath(path string) { 230 u.pathOriginal = append(u.pathOriginal[:0], path...) 231 u.path = normalizePath(u.path, u.pathOriginal) 232 } 233 234 // String returns full uri. 235 func (u *URI) String() string { 236 return string(u.FullURI()) 237 } 238 239 // SetPathBytes sets URI path. 240 func (u *URI) SetPathBytes(path []byte) { 241 u.pathOriginal = append(u.pathOriginal[:0], path...) 242 u.path = normalizePath(u.path, u.pathOriginal) 243 } 244 245 // PathOriginal returns the original path from requestURI passed to URI.Parse(). 246 // 247 // The returned value is valid until the next URI method call. 248 func (u *URI) PathOriginal() []byte { 249 return u.pathOriginal 250 } 251 252 // Scheme returns URI scheme, i.e. http of http://aaa.com/foo/bar?baz=123#qwe . 253 // 254 // Returned scheme is always lowercased. 255 // 256 // The returned value is valid until the next URI method call. 257 func (u *URI) Scheme() []byte { 258 scheme := u.scheme 259 if len(scheme) == 0 { 260 scheme = bytestr.StrHTTP 261 } 262 return scheme 263 } 264 265 // SetScheme sets URI scheme, i.e. http, https, ftp, etc. 266 func (u *URI) SetScheme(scheme string) { 267 u.scheme = append(u.scheme[:0], scheme...) 268 bytesconv.LowercaseBytes(u.scheme) 269 } 270 271 // SetSchemeBytes sets URI scheme, i.e. http, https, ftp, etc. 272 func (u *URI) SetSchemeBytes(scheme []byte) { 273 u.scheme = append(u.scheme[:0], scheme...) 274 bytesconv.LowercaseBytes(u.scheme) 275 } 276 277 // Reset clears uri. 278 func (u *URI) Reset() { 279 u.pathOriginal = u.pathOriginal[:0] 280 u.scheme = u.scheme[:0] 281 u.path = u.path[:0] 282 u.queryString = u.queryString[:0] 283 u.hash = u.hash[:0] 284 u.username = u.username[:0] 285 u.password = u.password[:0] 286 287 u.host = u.host[:0] 288 u.queryArgs.Reset() 289 u.parsedQueryArgs = false 290 u.DisablePathNormalizing = false 291 292 // There is no need in u.fullURI = u.fullURI[:0], since full uri 293 // is calculated on each call to FullURI(). 294 295 // There is no need in u.requestURI = u.requestURI[:0], since requestURI 296 // is calculated on each call to RequestURI(). 297 } 298 299 // Host returns host part, i.e. aaa.com of http://aaa.com/foo/bar?baz=123#qwe . 300 // 301 // Host is always lowercased. 302 func (u *URI) Host() []byte { 303 return u.host 304 } 305 306 // SetHost sets host for the uri. 307 func (u *URI) SetHost(host string) { 308 u.host = append(u.host[:0], host...) 309 bytesconv.LowercaseBytes(u.host) 310 } 311 312 // SetHostBytes sets host for the uri. 313 func (u *URI) SetHostBytes(host []byte) { 314 u.host = append(u.host[:0], host...) 315 bytesconv.LowercaseBytes(u.host) 316 } 317 318 // LastPathSegment returns the last part of uri path after '/'. 319 // 320 // Examples: 321 // 322 // - For /foo/bar/baz.html path returns baz.html. 323 // - For /foo/bar/ returns empty byte slice. 324 // - For /foobar.js returns foobar.js. 325 func (u *URI) LastPathSegment() []byte { 326 path := u.Path() 327 n := bytes.LastIndexByte(path, '/') 328 if n < 0 { 329 return path 330 } 331 return path[n+1:] 332 } 333 334 // Update updates uri. 335 // 336 // The following newURI types are accepted: 337 // 338 // - Absolute, i.e. http://foobar.com/aaa/bb?cc . In this case the original 339 // uri is replaced by newURI. 340 // - Absolute without scheme, i.e. //foobar.com/aaa/bb?cc. In this case 341 // the original scheme is preserved. 342 // - Missing host, i.e. /aaa/bb?cc . In this case only RequestURI part 343 // of the original uri is replaced. 344 // - Relative path, i.e. xx?yy=abc . In this case the original RequestURI 345 // is updated according to the new relative path. 346 func (u *URI) Update(newURI string) { 347 u.UpdateBytes(bytesconv.S2b(newURI)) 348 } 349 350 // UpdateBytes updates uri. 351 // 352 // The following newURI types are accepted: 353 // 354 // - Absolute, i.e. http://foobar.com/aaa/bb?cc . In this case the original 355 // uri is replaced by newURI. 356 // - Absolute without scheme, i.e. //foobar.com/aaa/bb?cc. In this case 357 // the original scheme is preserved. 358 // - Missing host, i.e. /aaa/bb?cc . In this case only RequestURI part 359 // of the original uri is replaced. 360 // - Relative path, i.e. xx?yy=abc . In this case the original RequestURI 361 // is updated according to the new relative path. 362 func (u *URI) UpdateBytes(newURI []byte) { 363 u.requestURI = u.updateBytes(newURI, u.requestURI) 364 } 365 366 // Parse initializes URI from the given host and uri. 367 // 368 // host may be nil. In this case uri must contain fully qualified uri, 369 // i.e. with scheme and host. http is assumed if scheme is omitted. 370 // 371 // uri may contain e.g. RequestURI without scheme and host if host is non-empty. 372 func (u *URI) Parse(host, uri []byte) { 373 u.parse(host, uri, false) 374 } 375 376 // Maybe rawURL is of the form scheme:path. 377 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) 378 // If so, return scheme, path; else return nil, rawURL. 379 func getScheme(rawURL []byte) (scheme, path []byte) { 380 for i := 0; i < len(rawURL); i++ { 381 c := rawURL[i] 382 switch { 383 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': 384 // do nothing 385 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': 386 if i == 0 { 387 return nil, rawURL 388 } 389 case c == ':': 390 return checkSchemeWhenCharIsColon(i, rawURL) 391 default: 392 // we have encountered an invalid character, 393 // so there is no valid scheme 394 return nil, rawURL 395 } 396 } 397 return nil, rawURL 398 } 399 400 func (u *URI) parse(host, uri []byte, isTLS bool) { 401 u.Reset() 402 403 if stringContainsCTLByte(uri) { 404 return 405 } 406 407 if len(host) == 0 || bytes.Contains(uri, bytestr.StrColonSlashSlash) { 408 scheme, newHost, newURI := splitHostURI(host, uri) 409 u.scheme = append(u.scheme, scheme...) 410 bytesconv.LowercaseBytes(u.scheme) 411 host = newHost 412 uri = newURI 413 } 414 415 if isTLS { 416 u.scheme = append(u.scheme[:0], bytestr.StrHTTPS...) 417 } 418 419 if n := bytes.Index(host, bytestr.StrAt); n >= 0 { 420 auth := host[:n] 421 host = host[n+1:] 422 423 if n := bytes.Index(auth, bytestr.StrColon); n >= 0 { 424 u.username = append(u.username[:0], auth[:n]...) 425 u.password = append(u.password[:0], auth[n+1:]...) 426 } else { 427 u.username = append(u.username[:0], auth...) 428 u.password = u.password[:0] 429 } 430 } 431 432 u.host = append(u.host, host...) 433 bytesconv.LowercaseBytes(u.host) 434 435 b := uri 436 queryIndex := bytes.IndexByte(b, '?') 437 fragmentIndex := bytes.IndexByte(b, '#') 438 // Ignore query in fragment part 439 if fragmentIndex >= 0 && queryIndex > fragmentIndex { 440 queryIndex = -1 441 } 442 443 if queryIndex < 0 && fragmentIndex < 0 { 444 u.pathOriginal = append(u.pathOriginal, b...) 445 u.path = normalizePath(u.path, u.pathOriginal) 446 return 447 } 448 449 if queryIndex >= 0 { 450 // Path is everything up to the start of the query 451 u.pathOriginal = append(u.pathOriginal, b[:queryIndex]...) 452 u.path = normalizePath(u.path, u.pathOriginal) 453 454 if fragmentIndex < 0 { 455 u.queryString = append(u.queryString, b[queryIndex+1:]...) 456 } else { 457 u.queryString = append(u.queryString, b[queryIndex+1:fragmentIndex]...) 458 u.hash = append(u.hash, b[fragmentIndex+1:]...) 459 } 460 return 461 } 462 463 // fragmentIndex >= 0 && queryIndex < 0 464 // Path is up to the start of fragment 465 u.pathOriginal = append(u.pathOriginal, b[:fragmentIndex]...) 466 u.path = normalizePath(u.path, u.pathOriginal) 467 u.hash = append(u.hash, b[fragmentIndex+1:]...) 468 } 469 470 // stringContainsCTLByte reports whether s contains any ASCII control character. 471 func stringContainsCTLByte(s []byte) bool { 472 for i := 0; i < len(s); i++ { 473 b := s[i] 474 if b < ' ' || b == 0x7f { 475 return true 476 } 477 } 478 return false 479 } 480 481 func splitHostURI(host, uri []byte) ([]byte, []byte, []byte) { 482 scheme, path := getScheme(uri) 483 484 if scheme == nil { 485 return bytestr.StrHTTP, host, uri 486 } 487 488 uri = path[len(bytestr.StrSlashSlash):] 489 n := bytes.IndexByte(uri, '/') 490 if n < 0 { 491 // A hack for bogus urls like foobar.com?a=b without 492 // slash after host. 493 if n = bytes.IndexByte(uri, '?'); n >= 0 { 494 return scheme, uri[:n], uri[n:] 495 } 496 return scheme, uri, bytestr.StrSlash 497 } 498 return scheme, uri[:n], uri[n:] 499 } 500 501 func normalizePath(dst, src []byte) []byte { 502 dst = dst[:0] 503 dst = addLeadingSlash(dst, src) 504 dst = decodeArgAppendNoPlus(dst, src) 505 506 // Windows server need to replace all backslashes with 507 // forward slashes to avoid path traversal attacks. 508 if filepath.Separator == '\\' { 509 for { 510 n := bytes.IndexByte(dst, '\\') 511 if n < 0 { 512 break 513 } 514 dst[n] = '/' 515 } 516 } 517 518 // remove duplicate slashes 519 b := dst 520 bSize := len(b) 521 for { 522 n := bytes.Index(b, bytestr.StrSlashSlash) 523 if n < 0 { 524 break 525 } 526 b = b[n:] 527 copy(b, b[1:]) 528 b = b[:len(b)-1] 529 bSize-- 530 } 531 dst = dst[:bSize] 532 533 // remove /./ parts 534 b = dst 535 for { 536 n := bytes.Index(b, bytestr.StrSlashDotSlash) 537 if n < 0 { 538 break 539 } 540 nn := n + len(bytestr.StrSlashDotSlash) - 1 541 copy(b[n:], b[nn:]) 542 b = b[:len(b)-nn+n] 543 } 544 545 // remove /foo/../ parts 546 for { 547 n := bytes.Index(b, bytestr.StrSlashDotDotSlash) 548 if n < 0 { 549 break 550 } 551 nn := bytes.LastIndexByte(b[:n], '/') 552 if nn < 0 { 553 nn = 0 554 } 555 n += len(bytestr.StrSlashDotDotSlash) - 1 556 copy(b[nn:], b[n:]) 557 b = b[:len(b)-n+nn] 558 } 559 560 // remove trailing /foo/.. 561 n := bytes.LastIndex(b, bytestr.StrSlashDotDot) 562 if n >= 0 && n+len(bytestr.StrSlashDotDot) == len(b) { 563 nn := bytes.LastIndexByte(b[:n], '/') 564 if nn < 0 { 565 return bytestr.StrSlash 566 } 567 b = b[:nn+1] 568 } 569 570 return b 571 } 572 573 func copyArgs(dst, src []argsKV) []argsKV { 574 if cap(dst) < len(src) { 575 tmp := make([]argsKV, len(src)) 576 copy(tmp, dst) 577 dst = tmp 578 } 579 n := len(src) 580 dst = dst[:n] 581 for i := 0; i < n; i++ { 582 dstKV := &dst[i] 583 srcKV := &src[i] 584 dstKV.key = append(dstKV.key[:0], srcKV.key...) 585 if srcKV.noValue { 586 dstKV.value = dstKV.value[:0] 587 } else { 588 dstKV.value = append(dstKV.value[:0], srcKV.value...) 589 } 590 dstKV.noValue = srcKV.noValue 591 } 592 return dst 593 } 594 595 func (u *URI) updateBytes(newURI, buf []byte) []byte { 596 if len(newURI) == 0 { 597 return buf 598 } 599 600 n := bytes.Index(newURI, bytestr.StrSlashSlash) 601 if n >= 0 { 602 // absolute uri 603 var b [32]byte 604 schemeOriginal := b[:0] 605 if len(u.scheme) > 0 { 606 schemeOriginal = append([]byte(nil), u.scheme...) 607 } 608 if n == 0 { 609 newURI = bytes.Join([][]byte{u.scheme, bytestr.StrColon, newURI}, nil) 610 } 611 u.Parse(nil, newURI) 612 if len(schemeOriginal) > 0 && len(u.scheme) == 0 { 613 u.scheme = append(u.scheme[:0], schemeOriginal...) 614 } 615 return buf 616 } 617 618 if newURI[0] == '/' { 619 // uri without host 620 buf = u.appendSchemeHost(buf[:0]) 621 buf = append(buf, newURI...) 622 u.Parse(nil, buf) 623 return buf 624 } 625 626 // relative path 627 switch newURI[0] { 628 case '?': 629 // query string only update 630 u.SetQueryStringBytes(newURI[1:]) 631 return append(buf[:0], u.FullURI()...) 632 case '#': 633 // update only hash 634 u.SetHashBytes(newURI[1:]) 635 return append(buf[:0], u.FullURI()...) 636 default: 637 // update the last path part after the slash 638 path := u.Path() 639 n = bytes.LastIndexByte(path, '/') 640 if n < 0 { 641 panic("BUG: path must contain at least one slash") 642 } 643 buf = u.appendSchemeHost(buf[:0]) 644 buf = bytesconv.AppendQuotedPath(buf, path[:n+1]) 645 buf = append(buf, newURI...) 646 u.Parse(nil, buf) 647 return buf 648 } 649 } 650 651 // AppendBytes appends full uri to dst and returns the extended dst. 652 func (u *URI) AppendBytes(dst []byte) []byte { 653 dst = u.appendSchemeHost(dst) 654 dst = append(dst, u.RequestURI()...) 655 if len(u.hash) > 0 { 656 dst = append(dst, '#') 657 dst = append(dst, u.hash...) 658 } 659 return dst 660 } 661 662 // RequestURI returns RequestURI - i.e. URI without Scheme and Host. 663 func (u *URI) RequestURI() []byte { 664 var dst []byte 665 if u.DisablePathNormalizing { 666 dst = append(u.requestURI[:0], u.PathOriginal()...) 667 } else { 668 dst = bytesconv.AppendQuotedPath(u.requestURI[:0], u.Path()) 669 } 670 if u.queryArgs.Len() > 0 { 671 dst = append(dst, '?') 672 dst = u.queryArgs.AppendBytes(dst) 673 } else if len(u.queryString) > 0 { 674 dst = append(dst, '?') 675 dst = append(dst, u.queryString...) 676 } 677 u.requestURI = dst 678 return u.requestURI 679 } 680 681 func (u *URI) appendSchemeHost(dst []byte) []byte { 682 dst = append(dst, u.Scheme()...) 683 dst = append(dst, bytestr.StrColonSlashSlash...) 684 return append(dst, u.Host()...) 685 } 686 687 // FullURI returns full uri in the form {Scheme}://{Host}{RequestURI}#{Hash}. 688 func (u *URI) FullURI() []byte { 689 u.fullURI = u.AppendBytes(u.fullURI[:0]) 690 return u.fullURI 691 } 692 693 func ParseURI(uriStr string) *URI { 694 uri := &URI{} 695 uri.Parse(nil, []byte(uriStr)) 696 697 return uri 698 } 699 700 type Proxy func(*Request) (*URI, error) 701 702 func ProxyURI(fixedURI *URI) Proxy { 703 return func(*Request) (*URI, error) { 704 return fixedURI, nil 705 } 706 }