github.com/c9s/go@v0.0.0-20180120015821-984e81f64e0c/src/net/url/url.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package url parses URLs and implements query escaping. 6 package url 7 8 // See RFC 3986. This package generally follows RFC 3986, except where 9 // it deviates for compatibility reasons. When sending changes, first 10 // search old issues for history on decisions. Unit tests should also 11 // contain references to issue numbers with details. 12 13 import ( 14 "bytes" 15 "errors" 16 "fmt" 17 "sort" 18 "strconv" 19 "strings" 20 ) 21 22 // Error reports an error and the operation and URL that caused it. 23 type Error struct { 24 Op string 25 URL string 26 Err error 27 } 28 29 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() } 30 31 type timeout interface { 32 Timeout() bool 33 } 34 35 func (e *Error) Timeout() bool { 36 t, ok := e.Err.(timeout) 37 return ok && t.Timeout() 38 } 39 40 type temporary interface { 41 Temporary() bool 42 } 43 44 func (e *Error) Temporary() bool { 45 t, ok := e.Err.(temporary) 46 return ok && t.Temporary() 47 } 48 49 func ishex(c byte) bool { 50 switch { 51 case '0' <= c && c <= '9': 52 return true 53 case 'a' <= c && c <= 'f': 54 return true 55 case 'A' <= c && c <= 'F': 56 return true 57 } 58 return false 59 } 60 61 func unhex(c byte) byte { 62 switch { 63 case '0' <= c && c <= '9': 64 return c - '0' 65 case 'a' <= c && c <= 'f': 66 return c - 'a' + 10 67 case 'A' <= c && c <= 'F': 68 return c - 'A' + 10 69 } 70 return 0 71 } 72 73 type encoding int 74 75 const ( 76 encodePath encoding = 1 + iota 77 encodePathSegment 78 encodeHost 79 encodeZone 80 encodeUserPassword 81 encodeQueryComponent 82 encodeFragment 83 ) 84 85 type EscapeError string 86 87 func (e EscapeError) Error() string { 88 return "invalid URL escape " + strconv.Quote(string(e)) 89 } 90 91 type InvalidHostError string 92 93 func (e InvalidHostError) Error() string { 94 return "invalid character " + strconv.Quote(string(e)) + " in host name" 95 } 96 97 // Return true if the specified character should be escaped when 98 // appearing in a URL string, according to RFC 3986. 99 // 100 // Please be informed that for now shouldEscape does not check all 101 // reserved characters correctly. See golang.org/issue/5684. 102 func shouldEscape(c byte, mode encoding) bool { 103 // §2.3 Unreserved characters (alphanum) 104 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { 105 return false 106 } 107 108 if mode == encodeHost || mode == encodeZone { 109 // §3.2.2 Host allows 110 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" 111 // as part of reg-name. 112 // We add : because we include :port as part of host. 113 // We add [ ] because we include [ipv6]:port as part of host. 114 // We add < > because they're the only characters left that 115 // we could possibly allow, and Parse will reject them if we 116 // escape them (because hosts can't use %-encoding for 117 // ASCII bytes). 118 switch c { 119 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': 120 return false 121 } 122 } 123 124 switch c { 125 case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) 126 return false 127 128 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) 129 // Different sections of the URL allow a few of 130 // the reserved characters to appear unescaped. 131 switch mode { 132 case encodePath: // §3.3 133 // The RFC allows : @ & = + $ but saves / ; , for assigning 134 // meaning to individual path segments. This package 135 // only manipulates the path as a whole, so we allow those 136 // last three as well. That leaves only ? to escape. 137 return c == '?' 138 139 case encodePathSegment: // §3.3 140 // The RFC allows : @ & = + $ but saves / ; , for assigning 141 // meaning to individual path segments. 142 return c == '/' || c == ';' || c == ',' || c == '?' 143 144 case encodeUserPassword: // §3.2.1 145 // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in 146 // userinfo, so we must escape only '@', '/', and '?'. 147 // The parsing of userinfo treats ':' as special so we must escape 148 // that too. 149 return c == '@' || c == '/' || c == '?' || c == ':' 150 151 case encodeQueryComponent: // §3.4 152 // The RFC reserves (so we must escape) everything. 153 return true 154 155 case encodeFragment: // §4.1 156 // The RFC text is silent but the grammar allows 157 // everything, so escape nothing. 158 return false 159 } 160 } 161 162 // Everything else must be escaped. 163 return true 164 } 165 166 // QueryUnescape does the inverse transformation of QueryEscape, 167 // converting each 3-byte encoded substring of the form "%AB" into the 168 // hex-decoded byte 0xAB. It also converts '+' into ' ' (space). 169 // It returns an error if any % is not followed by two hexadecimal 170 // digits. 171 func QueryUnescape(s string) (string, error) { 172 return unescape(s, encodeQueryComponent) 173 } 174 175 // PathUnescape does the inverse transformation of PathEscape, 176 // converting each 3-byte encoded substring of the form "%AB" into the 177 // hex-decoded byte 0xAB. It also converts '+' into ' ' (space). 178 // It returns an error if any % is not followed by two hexadecimal 179 // digits. 180 // 181 // PathUnescape is identical to QueryUnescape except that it does not 182 // unescape '+' to ' ' (space). 183 func PathUnescape(s string) (string, error) { 184 return unescape(s, encodePathSegment) 185 } 186 187 // unescape unescapes a string; the mode specifies 188 // which section of the URL string is being unescaped. 189 func unescape(s string, mode encoding) (string, error) { 190 // Count %, check that they're well-formed. 191 n := 0 192 hasPlus := false 193 for i := 0; i < len(s); { 194 switch s[i] { 195 case '%': 196 n++ 197 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { 198 s = s[i:] 199 if len(s) > 3 { 200 s = s[:3] 201 } 202 return "", EscapeError(s) 203 } 204 // Per https://tools.ietf.org/html/rfc3986#page-21 205 // in the host component %-encoding can only be used 206 // for non-ASCII bytes. 207 // But https://tools.ietf.org/html/rfc6874#section-2 208 // introduces %25 being allowed to escape a percent sign 209 // in IPv6 scoped-address literals. Yay. 210 if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" { 211 return "", EscapeError(s[i : i+3]) 212 } 213 if mode == encodeZone { 214 // RFC 6874 says basically "anything goes" for zone identifiers 215 // and that even non-ASCII can be redundantly escaped, 216 // but it seems prudent to restrict %-escaped bytes here to those 217 // that are valid host name bytes in their unescaped form. 218 // That is, you can use escaping in the zone identifier but not 219 // to introduce bytes you couldn't just write directly. 220 // But Windows puts spaces here! Yay. 221 v := unhex(s[i+1])<<4 | unhex(s[i+2]) 222 if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) { 223 return "", EscapeError(s[i : i+3]) 224 } 225 } 226 i += 3 227 case '+': 228 hasPlus = mode == encodeQueryComponent 229 i++ 230 default: 231 if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) { 232 return "", InvalidHostError(s[i : i+1]) 233 } 234 i++ 235 } 236 } 237 238 if n == 0 && !hasPlus { 239 return s, nil 240 } 241 242 t := make([]byte, len(s)-2*n) 243 j := 0 244 for i := 0; i < len(s); { 245 switch s[i] { 246 case '%': 247 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) 248 j++ 249 i += 3 250 case '+': 251 if mode == encodeQueryComponent { 252 t[j] = ' ' 253 } else { 254 t[j] = '+' 255 } 256 j++ 257 i++ 258 default: 259 t[j] = s[i] 260 j++ 261 i++ 262 } 263 } 264 return string(t), nil 265 } 266 267 // QueryEscape escapes the string so it can be safely placed 268 // inside a URL query. 269 func QueryEscape(s string) string { 270 return escape(s, encodeQueryComponent) 271 } 272 273 // PathEscape escapes the string so it can be safely placed 274 // inside a URL path segment. 275 func PathEscape(s string) string { 276 return escape(s, encodePathSegment) 277 } 278 279 func escape(s string, mode encoding) string { 280 spaceCount, hexCount := 0, 0 281 for i := 0; i < len(s); i++ { 282 c := s[i] 283 if shouldEscape(c, mode) { 284 if c == ' ' && mode == encodeQueryComponent { 285 spaceCount++ 286 } else { 287 hexCount++ 288 } 289 } 290 } 291 292 if spaceCount == 0 && hexCount == 0 { 293 return s 294 } 295 296 t := make([]byte, len(s)+2*hexCount) 297 j := 0 298 for i := 0; i < len(s); i++ { 299 switch c := s[i]; { 300 case c == ' ' && mode == encodeQueryComponent: 301 t[j] = '+' 302 j++ 303 case shouldEscape(c, mode): 304 t[j] = '%' 305 t[j+1] = "0123456789ABCDEF"[c>>4] 306 t[j+2] = "0123456789ABCDEF"[c&15] 307 j += 3 308 default: 309 t[j] = s[i] 310 j++ 311 } 312 } 313 return string(t) 314 } 315 316 // A URL represents a parsed URL (technically, a URI reference). 317 // 318 // The general form represented is: 319 // 320 // [scheme:][//[userinfo@]host][/]path[?query][#fragment] 321 // 322 // URLs that do not start with a slash after the scheme are interpreted as: 323 // 324 // scheme:opaque[?query][#fragment] 325 // 326 // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/. 327 // A consequence is that it is impossible to tell which slashes in the Path were 328 // slashes in the raw URL and which were %2f. This distinction is rarely important, 329 // but when it is, code must not use Path directly. 330 // The Parse function sets both Path and RawPath in the URL it returns, 331 // and URL's String method uses RawPath if it is a valid encoding of Path, 332 // by calling the EscapedPath method. 333 type URL struct { 334 Scheme string 335 Opaque string // encoded opaque data 336 User *Userinfo // username and password information 337 Host string // host or host:port 338 Path string // path (relative paths may omit leading slash) 339 RawPath string // encoded path hint (see EscapedPath method) 340 ForceQuery bool // append a query ('?') even if RawQuery is empty 341 RawQuery string // encoded query values, without '?' 342 Fragment string // fragment for references, without '#' 343 } 344 345 // User returns a Userinfo containing the provided username 346 // and no password set. 347 func User(username string) *Userinfo { 348 return &Userinfo{username, "", false} 349 } 350 351 // UserPassword returns a Userinfo containing the provided username 352 // and password. 353 // 354 // This functionality should only be used with legacy web sites. 355 // RFC 2396 warns that interpreting Userinfo this way 356 // ``is NOT RECOMMENDED, because the passing of authentication 357 // information in clear text (such as URI) has proven to be a 358 // security risk in almost every case where it has been used.'' 359 func UserPassword(username, password string) *Userinfo { 360 return &Userinfo{username, password, true} 361 } 362 363 // The Userinfo type is an immutable encapsulation of username and 364 // password details for a URL. An existing Userinfo value is guaranteed 365 // to have a username set (potentially empty, as allowed by RFC 2396), 366 // and optionally a password. 367 type Userinfo struct { 368 username string 369 password string 370 passwordSet bool 371 } 372 373 // Username returns the username. 374 func (u *Userinfo) Username() string { 375 if u == nil { 376 return "" 377 } 378 return u.username 379 } 380 381 // Password returns the password in case it is set, and whether it is set. 382 func (u *Userinfo) Password() (string, bool) { 383 if u == nil { 384 return "", false 385 } 386 return u.password, u.passwordSet 387 } 388 389 // String returns the encoded userinfo information in the standard form 390 // of "username[:password]". 391 func (u *Userinfo) String() string { 392 if u == nil { 393 return "" 394 } 395 s := escape(u.username, encodeUserPassword) 396 if u.passwordSet { 397 s += ":" + escape(u.password, encodeUserPassword) 398 } 399 return s 400 } 401 402 // Maybe rawurl is of the form scheme:path. 403 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) 404 // If so, return scheme, path; else return "", rawurl. 405 func getscheme(rawurl string) (scheme, path string, err error) { 406 for i := 0; i < len(rawurl); i++ { 407 c := rawurl[i] 408 switch { 409 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': 410 // do nothing 411 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': 412 if i == 0 { 413 return "", rawurl, nil 414 } 415 case c == ':': 416 if i == 0 { 417 return "", "", errors.New("missing protocol scheme") 418 } 419 return rawurl[:i], rawurl[i+1:], nil 420 default: 421 // we have encountered an invalid character, 422 // so there is no valid scheme 423 return "", rawurl, nil 424 } 425 } 426 return "", rawurl, nil 427 } 428 429 // Maybe s is of the form t c u. 430 // If so, return t, c u (or t, u if cutc == true). 431 // If not, return s, "". 432 func split(s string, c string, cutc bool) (string, string) { 433 i := strings.Index(s, c) 434 if i < 0 { 435 return s, "" 436 } 437 if cutc { 438 return s[:i], s[i+len(c):] 439 } 440 return s[:i], s[i:] 441 } 442 443 // Parse parses rawurl into a URL structure. 444 // 445 // The rawurl may be relative (a path, without a host) or absolute 446 // (starting with a scheme). Trying to parse a hostname and path 447 // without a scheme is invalid but may not necessarily return an 448 // error, due to parsing ambiguities. 449 func Parse(rawurl string) (*URL, error) { 450 // Cut off #frag 451 u, frag := split(rawurl, "#", true) 452 url, err := parse(u, false) 453 if err != nil { 454 return nil, &Error{"parse", u, err} 455 } 456 if frag == "" { 457 return url, nil 458 } 459 if url.Fragment, err = unescape(frag, encodeFragment); err != nil { 460 return nil, &Error{"parse", rawurl, err} 461 } 462 return url, nil 463 } 464 465 // ParseRequestURI parses rawurl into a URL structure. It assumes that 466 // rawurl was received in an HTTP request, so the rawurl is interpreted 467 // only as an absolute URI or an absolute path. 468 // The string rawurl is assumed not to have a #fragment suffix. 469 // (Web browsers strip #fragment before sending the URL to a web server.) 470 func ParseRequestURI(rawurl string) (*URL, error) { 471 url, err := parse(rawurl, true) 472 if err != nil { 473 return nil, &Error{"parse", rawurl, err} 474 } 475 return url, nil 476 } 477 478 // parse parses a URL from a string in one of two contexts. If 479 // viaRequest is true, the URL is assumed to have arrived via an HTTP request, 480 // in which case only absolute URLs or path-absolute relative URLs are allowed. 481 // If viaRequest is false, all forms of relative URLs are allowed. 482 func parse(rawurl string, viaRequest bool) (*URL, error) { 483 var rest string 484 var err error 485 486 if rawurl == "" && viaRequest { 487 return nil, errors.New("empty url") 488 } 489 url := new(URL) 490 491 if rawurl == "*" { 492 url.Path = "*" 493 return url, nil 494 } 495 496 // Split off possible leading "http:", "mailto:", etc. 497 // Cannot contain escaped characters. 498 if url.Scheme, rest, err = getscheme(rawurl); err != nil { 499 return nil, err 500 } 501 url.Scheme = strings.ToLower(url.Scheme) 502 503 if strings.HasSuffix(rest, "?") && strings.Count(rest, "?") == 1 { 504 url.ForceQuery = true 505 rest = rest[:len(rest)-1] 506 } else { 507 rest, url.RawQuery = split(rest, "?", true) 508 } 509 510 if !strings.HasPrefix(rest, "/") { 511 if url.Scheme != "" { 512 // We consider rootless paths per RFC 3986 as opaque. 513 url.Opaque = rest 514 return url, nil 515 } 516 if viaRequest { 517 return nil, errors.New("invalid URI for request") 518 } 519 520 // Avoid confusion with malformed schemes, like cache_object:foo/bar. 521 // See golang.org/issue/16822. 522 // 523 // RFC 3986, §3.3: 524 // In addition, a URI reference (Section 4.1) may be a relative-path reference, 525 // in which case the first path segment cannot contain a colon (":") character. 526 colon := strings.Index(rest, ":") 527 slash := strings.Index(rest, "/") 528 if colon >= 0 && (slash < 0 || colon < slash) { 529 // First path segment has colon. Not allowed in relative URL. 530 return nil, errors.New("first path segment in URL cannot contain colon") 531 } 532 } 533 534 if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") { 535 var authority string 536 authority, rest = split(rest[2:], "/", false) 537 url.User, url.Host, err = parseAuthority(authority) 538 if err != nil { 539 return nil, err 540 } 541 } 542 // Set Path and, optionally, RawPath. 543 // RawPath is a hint of the encoding of Path. We don't want to set it if 544 // the default escaping of Path is equivalent, to help make sure that people 545 // don't rely on it in general. 546 if err := url.setPath(rest); err != nil { 547 return nil, err 548 } 549 return url, nil 550 } 551 552 func parseAuthority(authority string) (user *Userinfo, host string, err error) { 553 i := strings.LastIndex(authority, "@") 554 if i < 0 { 555 host, err = parseHost(authority) 556 } else { 557 host, err = parseHost(authority[i+1:]) 558 } 559 if err != nil { 560 return nil, "", err 561 } 562 if i < 0 { 563 return nil, host, nil 564 } 565 userinfo := authority[:i] 566 if !validUserinfo(userinfo) { 567 return nil, "", errors.New("net/url: invalid userinfo") 568 } 569 if !strings.Contains(userinfo, ":") { 570 if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil { 571 return nil, "", err 572 } 573 user = User(userinfo) 574 } else { 575 username, password := split(userinfo, ":", true) 576 if username, err = unescape(username, encodeUserPassword); err != nil { 577 return nil, "", err 578 } 579 if password, err = unescape(password, encodeUserPassword); err != nil { 580 return nil, "", err 581 } 582 user = UserPassword(username, password) 583 } 584 return user, host, nil 585 } 586 587 // parseHost parses host as an authority without user 588 // information. That is, as host[:port]. 589 func parseHost(host string) (string, error) { 590 if strings.HasPrefix(host, "[") { 591 // Parse an IP-Literal in RFC 3986 and RFC 6874. 592 // E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80". 593 i := strings.LastIndex(host, "]") 594 if i < 0 { 595 return "", errors.New("missing ']' in host") 596 } 597 colonPort := host[i+1:] 598 if !validOptionalPort(colonPort) { 599 return "", fmt.Errorf("invalid port %q after host", colonPort) 600 } 601 602 // RFC 6874 defines that %25 (%-encoded percent) introduces 603 // the zone identifier, and the zone identifier can use basically 604 // any %-encoding it likes. That's different from the host, which 605 // can only %-encode non-ASCII bytes. 606 // We do impose some restrictions on the zone, to avoid stupidity 607 // like newlines. 608 zone := strings.Index(host[:i], "%25") 609 if zone >= 0 { 610 host1, err := unescape(host[:zone], encodeHost) 611 if err != nil { 612 return "", err 613 } 614 host2, err := unescape(host[zone:i], encodeZone) 615 if err != nil { 616 return "", err 617 } 618 host3, err := unescape(host[i:], encodeHost) 619 if err != nil { 620 return "", err 621 } 622 return host1 + host2 + host3, nil 623 } 624 } 625 626 var err error 627 if host, err = unescape(host, encodeHost); err != nil { 628 return "", err 629 } 630 return host, nil 631 } 632 633 // setPath sets the Path and RawPath fields of the URL based on the provided 634 // escaped path p. It maintains the invariant that RawPath is only specified 635 // when it differs from the default encoding of the path. 636 // For example: 637 // - setPath("/foo/bar") will set Path="/foo/bar" and RawPath="" 638 // - setPath("/foo%2fbar") will set Path="/foo/bar" and RawPath="/foo%2fbar" 639 // setPath will return an error only if the provided path contains an invalid 640 // escaping. 641 func (u *URL) setPath(p string) error { 642 path, err := unescape(p, encodePath) 643 if err != nil { 644 return err 645 } 646 u.Path = path 647 if escp := escape(path, encodePath); p == escp { 648 // Default encoding is fine. 649 u.RawPath = "" 650 } else { 651 u.RawPath = p 652 } 653 return nil 654 } 655 656 // EscapedPath returns the escaped form of u.Path. 657 // In general there are multiple possible escaped forms of any path. 658 // EscapedPath returns u.RawPath when it is a valid escaping of u.Path. 659 // Otherwise EscapedPath ignores u.RawPath and computes an escaped 660 // form on its own. 661 // The String and RequestURI methods use EscapedPath to construct 662 // their results. 663 // In general, code should call EscapedPath instead of 664 // reading u.RawPath directly. 665 func (u *URL) EscapedPath() string { 666 if u.RawPath != "" && validEncodedPath(u.RawPath) { 667 p, err := unescape(u.RawPath, encodePath) 668 if err == nil && p == u.Path { 669 return u.RawPath 670 } 671 } 672 if u.Path == "*" { 673 return "*" // don't escape (Issue 11202) 674 } 675 return escape(u.Path, encodePath) 676 } 677 678 // validEncodedPath reports whether s is a valid encoded path. 679 // It must not contain any bytes that require escaping during path encoding. 680 func validEncodedPath(s string) bool { 681 for i := 0; i < len(s); i++ { 682 // RFC 3986, Appendix A. 683 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@". 684 // shouldEscape is not quite compliant with the RFC, 685 // so we check the sub-delims ourselves and let 686 // shouldEscape handle the others. 687 switch s[i] { 688 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '@': 689 // ok 690 case '[', ']': 691 // ok - not specified in RFC 3986 but left alone by modern browsers 692 case '%': 693 // ok - percent encoded, will decode 694 default: 695 if shouldEscape(s[i], encodePath) { 696 return false 697 } 698 } 699 } 700 return true 701 } 702 703 // validOptionalPort reports whether port is either an empty string 704 // or matches /^:\d*$/ 705 func validOptionalPort(port string) bool { 706 if port == "" { 707 return true 708 } 709 if port[0] != ':' { 710 return false 711 } 712 for _, b := range port[1:] { 713 if b < '0' || b > '9' { 714 return false 715 } 716 } 717 return true 718 } 719 720 // String reassembles the URL into a valid URL string. 721 // The general form of the result is one of: 722 // 723 // scheme:opaque?query#fragment 724 // scheme://userinfo@host/path?query#fragment 725 // 726 // If u.Opaque is non-empty, String uses the first form; 727 // otherwise it uses the second form. 728 // To obtain the path, String uses u.EscapedPath(). 729 // 730 // In the second form, the following rules apply: 731 // - if u.Scheme is empty, scheme: is omitted. 732 // - if u.User is nil, userinfo@ is omitted. 733 // - if u.Host is empty, host/ is omitted. 734 // - if u.Scheme and u.Host are empty and u.User is nil, 735 // the entire scheme://userinfo@host/ is omitted. 736 // - if u.Host is non-empty and u.Path begins with a /, 737 // the form host/path does not add its own /. 738 // - if u.RawQuery is empty, ?query is omitted. 739 // - if u.Fragment is empty, #fragment is omitted. 740 func (u *URL) String() string { 741 var buf bytes.Buffer 742 if u.Scheme != "" { 743 buf.WriteString(u.Scheme) 744 buf.WriteByte(':') 745 } 746 if u.Opaque != "" { 747 buf.WriteString(u.Opaque) 748 } else { 749 if u.Scheme != "" || u.Host != "" || u.User != nil { 750 if u.Host != "" || u.Path != "" || u.User != nil { 751 buf.WriteString("//") 752 } 753 if ui := u.User; ui != nil { 754 buf.WriteString(ui.String()) 755 buf.WriteByte('@') 756 } 757 if h := u.Host; h != "" { 758 buf.WriteString(escape(h, encodeHost)) 759 } 760 } 761 path := u.EscapedPath() 762 if path != "" && path[0] != '/' && u.Host != "" { 763 buf.WriteByte('/') 764 } 765 if buf.Len() == 0 { 766 // RFC 3986 §4.2 767 // A path segment that contains a colon character (e.g., "this:that") 768 // cannot be used as the first segment of a relative-path reference, as 769 // it would be mistaken for a scheme name. Such a segment must be 770 // preceded by a dot-segment (e.g., "./this:that") to make a relative- 771 // path reference. 772 if i := strings.IndexByte(path, ':'); i > -1 && strings.IndexByte(path[:i], '/') == -1 { 773 buf.WriteString("./") 774 } 775 } 776 buf.WriteString(path) 777 } 778 if u.ForceQuery || u.RawQuery != "" { 779 buf.WriteByte('?') 780 buf.WriteString(u.RawQuery) 781 } 782 if u.Fragment != "" { 783 buf.WriteByte('#') 784 buf.WriteString(escape(u.Fragment, encodeFragment)) 785 } 786 return buf.String() 787 } 788 789 // Values maps a string key to a list of values. 790 // It is typically used for query parameters and form values. 791 // Unlike in the http.Header map, the keys in a Values map 792 // are case-sensitive. 793 type Values map[string][]string 794 795 // Get gets the first value associated with the given key. 796 // If there are no values associated with the key, Get returns 797 // the empty string. To access multiple values, use the map 798 // directly. 799 func (v Values) Get(key string) string { 800 if v == nil { 801 return "" 802 } 803 vs := v[key] 804 if len(vs) == 0 { 805 return "" 806 } 807 return vs[0] 808 } 809 810 // Set sets the key to value. It replaces any existing 811 // values. 812 func (v Values) Set(key, value string) { 813 v[key] = []string{value} 814 } 815 816 // Add adds the value to key. It appends to any existing 817 // values associated with key. 818 func (v Values) Add(key, value string) { 819 v[key] = append(v[key], value) 820 } 821 822 // Del deletes the values associated with key. 823 func (v Values) Del(key string) { 824 delete(v, key) 825 } 826 827 // ParseQuery parses the URL-encoded query string and returns 828 // a map listing the values specified for each key. 829 // ParseQuery always returns a non-nil map containing all the 830 // valid query parameters found; err describes the first decoding error 831 // encountered, if any. 832 // 833 // Query is expected to be a list of key=value settings separated by 834 // ampersands or semicolons. A setting without an equals sign is 835 // interpreted as a key set to an empty value. 836 func ParseQuery(query string) (Values, error) { 837 m := make(Values) 838 err := parseQuery(m, query) 839 return m, err 840 } 841 842 func parseQuery(m Values, query string) (err error) { 843 for query != "" { 844 key := query 845 if i := strings.IndexAny(key, "&;"); i >= 0 { 846 key, query = key[:i], key[i+1:] 847 } else { 848 query = "" 849 } 850 if key == "" { 851 continue 852 } 853 value := "" 854 if i := strings.Index(key, "="); i >= 0 { 855 key, value = key[:i], key[i+1:] 856 } 857 key, err1 := QueryUnescape(key) 858 if err1 != nil { 859 if err == nil { 860 err = err1 861 } 862 continue 863 } 864 value, err1 = QueryUnescape(value) 865 if err1 != nil { 866 if err == nil { 867 err = err1 868 } 869 continue 870 } 871 m[key] = append(m[key], value) 872 } 873 return err 874 } 875 876 // Encode encodes the values into ``URL encoded'' form 877 // ("bar=baz&foo=quux") sorted by key. 878 func (v Values) Encode() string { 879 if v == nil { 880 return "" 881 } 882 var buf bytes.Buffer 883 keys := make([]string, 0, len(v)) 884 for k := range v { 885 keys = append(keys, k) 886 } 887 sort.Strings(keys) 888 for _, k := range keys { 889 vs := v[k] 890 prefix := QueryEscape(k) + "=" 891 for _, v := range vs { 892 if buf.Len() > 0 { 893 buf.WriteByte('&') 894 } 895 buf.WriteString(prefix) 896 buf.WriteString(QueryEscape(v)) 897 } 898 } 899 return buf.String() 900 } 901 902 // resolvePath applies special path segments from refs and applies 903 // them to base, per RFC 3986. 904 func resolvePath(base, ref string) string { 905 var full string 906 if ref == "" { 907 full = base 908 } else if ref[0] != '/' { 909 i := strings.LastIndex(base, "/") 910 full = base[:i+1] + ref 911 } else { 912 full = ref 913 } 914 if full == "" { 915 return "" 916 } 917 var dst []string 918 src := strings.Split(full, "/") 919 for _, elem := range src { 920 switch elem { 921 case ".": 922 // drop 923 case "..": 924 if len(dst) > 0 { 925 dst = dst[:len(dst)-1] 926 } 927 default: 928 dst = append(dst, elem) 929 } 930 } 931 if last := src[len(src)-1]; last == "." || last == ".." { 932 // Add final slash to the joined path. 933 dst = append(dst, "") 934 } 935 return "/" + strings.TrimPrefix(strings.Join(dst, "/"), "/") 936 } 937 938 // IsAbs reports whether the URL is absolute. 939 // Absolute means that it has a non-empty scheme. 940 func (u *URL) IsAbs() bool { 941 return u.Scheme != "" 942 } 943 944 // Parse parses a URL in the context of the receiver. The provided URL 945 // may be relative or absolute. Parse returns nil, err on parse 946 // failure, otherwise its return value is the same as ResolveReference. 947 func (u *URL) Parse(ref string) (*URL, error) { 948 refurl, err := Parse(ref) 949 if err != nil { 950 return nil, err 951 } 952 return u.ResolveReference(refurl), nil 953 } 954 955 // ResolveReference resolves a URI reference to an absolute URI from 956 // an absolute base URI, per RFC 3986 Section 5.2. The URI reference 957 // may be relative or absolute. ResolveReference always returns a new 958 // URL instance, even if the returned URL is identical to either the 959 // base or reference. If ref is an absolute URL, then ResolveReference 960 // ignores base and returns a copy of ref. 961 func (u *URL) ResolveReference(ref *URL) *URL { 962 url := *ref 963 if ref.Scheme == "" { 964 url.Scheme = u.Scheme 965 } 966 if ref.Scheme != "" || ref.Host != "" || ref.User != nil { 967 // The "absoluteURI" or "net_path" cases. 968 // We can ignore the error from setPath since we know we provided a 969 // validly-escaped path. 970 url.setPath(resolvePath(ref.EscapedPath(), "")) 971 return &url 972 } 973 if ref.Opaque != "" { 974 url.User = nil 975 url.Host = "" 976 url.Path = "" 977 return &url 978 } 979 if ref.Path == "" && ref.RawQuery == "" { 980 url.RawQuery = u.RawQuery 981 if ref.Fragment == "" { 982 url.Fragment = u.Fragment 983 } 984 } 985 // The "abs_path" or "rel_path" cases. 986 url.Host = u.Host 987 url.User = u.User 988 url.setPath(resolvePath(u.EscapedPath(), ref.EscapedPath())) 989 return &url 990 } 991 992 // Query parses RawQuery and returns the corresponding values. 993 // It silently discards malformed value pairs. 994 // To check errors use ParseQuery. 995 func (u *URL) Query() Values { 996 v, _ := ParseQuery(u.RawQuery) 997 return v 998 } 999 1000 // RequestURI returns the encoded path?query or opaque?query 1001 // string that would be used in an HTTP request for u. 1002 func (u *URL) RequestURI() string { 1003 result := u.Opaque 1004 if result == "" { 1005 result = u.EscapedPath() 1006 if result == "" { 1007 result = "/" 1008 } 1009 } else { 1010 if strings.HasPrefix(result, "//") { 1011 result = u.Scheme + ":" + result 1012 } 1013 } 1014 if u.ForceQuery || u.RawQuery != "" { 1015 result += "?" + u.RawQuery 1016 } 1017 return result 1018 } 1019 1020 // Hostname returns u.Host, without any port number. 1021 // 1022 // If Host is an IPv6 literal with a port number, Hostname returns the 1023 // IPv6 literal without the square brackets. IPv6 literals may include 1024 // a zone identifier. 1025 func (u *URL) Hostname() string { 1026 return stripPort(u.Host) 1027 } 1028 1029 // Port returns the port part of u.Host, without the leading colon. 1030 // If u.Host doesn't contain a port, Port returns an empty string. 1031 func (u *URL) Port() string { 1032 return portOnly(u.Host) 1033 } 1034 1035 func stripPort(hostport string) string { 1036 colon := strings.IndexByte(hostport, ':') 1037 if colon == -1 { 1038 return hostport 1039 } 1040 if i := strings.IndexByte(hostport, ']'); i != -1 { 1041 return strings.TrimPrefix(hostport[:i], "[") 1042 } 1043 return hostport[:colon] 1044 } 1045 1046 func portOnly(hostport string) string { 1047 colon := strings.IndexByte(hostport, ':') 1048 if colon == -1 { 1049 return "" 1050 } 1051 if i := strings.Index(hostport, "]:"); i != -1 { 1052 return hostport[i+len("]:"):] 1053 } 1054 if strings.Contains(hostport, "]") { 1055 return "" 1056 } 1057 return hostport[colon+len(":"):] 1058 } 1059 1060 // Marshaling interface implementations. 1061 // Would like to implement MarshalText/UnmarshalText but that will change the JSON representation of URLs. 1062 1063 func (u *URL) MarshalBinary() (text []byte, err error) { 1064 return []byte(u.String()), nil 1065 } 1066 1067 func (u *URL) UnmarshalBinary(text []byte) error { 1068 u1, err := Parse(string(text)) 1069 if err != nil { 1070 return err 1071 } 1072 *u = *u1 1073 return nil 1074 } 1075 1076 // validUserinfo reports whether s is a valid userinfo string per RFC 3986 1077 // Section 3.2.1: 1078 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 1079 // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 1080 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 1081 // / "*" / "+" / "," / ";" / "=" 1082 // 1083 // It doesn't validate pct-encoded. The caller does that via func unescape. 1084 func validUserinfo(s string) bool { 1085 for _, r := range s { 1086 if 'A' <= r && r <= 'Z' { 1087 continue 1088 } 1089 if 'a' <= r && r <= 'z' { 1090 continue 1091 } 1092 if '0' <= r && r <= '9' { 1093 continue 1094 } 1095 switch r { 1096 case '-', '.', '_', ':', '~', '!', '$', '&', '\'', 1097 '(', ')', '*', '+', ',', ';', '=', '%', '@': 1098 continue 1099 default: 1100 return false 1101 } 1102 } 1103 return true 1104 }