github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/net/url/url.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package url parses URLs and implements query escaping. 6 // See RFC 3986. 7 package url 8 9 import ( 10 "bytes" 11 "errors" 12 "sort" 13 "strconv" 14 "strings" 15 ) 16 17 // Error reports an error and the operation and URL that caused it. 18 type Error struct { 19 Op string 20 URL string 21 Err error 22 } 23 24 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() } 25 26 func ishex(c byte) bool { 27 switch { 28 case '0' <= c && c <= '9': 29 return true 30 case 'a' <= c && c <= 'f': 31 return true 32 case 'A' <= c && c <= 'F': 33 return true 34 } 35 return false 36 } 37 38 func unhex(c byte) byte { 39 switch { 40 case '0' <= c && c <= '9': 41 return c - '0' 42 case 'a' <= c && c <= 'f': 43 return c - 'a' + 10 44 case 'A' <= c && c <= 'F': 45 return c - 'A' + 10 46 } 47 return 0 48 } 49 50 type encoding int 51 52 const ( 53 encodePath encoding = 1 + iota 54 encodeHost 55 encodeUserPassword 56 encodeQueryComponent 57 encodeFragment 58 ) 59 60 type EscapeError string 61 62 func (e EscapeError) Error() string { 63 return "invalid URL escape " + strconv.Quote(string(e)) 64 } 65 66 // Return true if the specified character should be escaped when 67 // appearing in a URL string, according to RFC 3986. 68 // 69 // Please be informed that for now shouldEscape does not check all 70 // reserved characters correctly. See golang.org/issue/5684. 71 func shouldEscape(c byte, mode encoding) bool { 72 // §2.3 Unreserved characters (alphanum) 73 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { 74 return false 75 } 76 77 switch c { 78 case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) 79 return false 80 81 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) 82 // Different sections of the URL allow a few of 83 // the reserved characters to appear unescaped. 84 switch mode { 85 case encodePath: // §3.3 86 // The RFC allows : @ & = + $ but saves / ; , for assigning 87 // meaning to individual path segments. This package 88 // only manipulates the path as a whole, so we allow those 89 // last two as well. That leaves only ? to escape. 90 return c == '?' 91 92 case encodeUserPassword: // §3.2.1 93 // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in 94 // userinfo, so we must escape only '@', '/', and '?'. 95 // The parsing of userinfo treats ':' as special so we must escape 96 // that too. 97 return c == '@' || c == '/' || c == '?' || c == ':' 98 99 case encodeHost: // §3.2.1 100 // The RFC allows ':'. 101 return c != ':' 102 103 case encodeQueryComponent: // §3.4 104 // The RFC reserves (so we must escape) everything. 105 return true 106 107 case encodeFragment: // §4.1 108 // The RFC text is silent but the grammar allows 109 // everything, so escape nothing. 110 return false 111 } 112 113 case '[', ']': // §2.2 Reserved characters (reserved) 114 switch mode { 115 case encodeHost: // §3.2.1 116 // The RFC allows '[', ']'. 117 return false 118 } 119 } 120 121 // Everything else must be escaped. 122 return true 123 } 124 125 // QueryUnescape does the inverse transformation of QueryEscape, converting 126 // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if 127 // any % is not followed by two hexadecimal digits. 128 func QueryUnescape(s string) (string, error) { 129 return unescape(s, encodeQueryComponent) 130 } 131 132 // unescape unescapes a string; the mode specifies 133 // which section of the URL string is being unescaped. 134 func unescape(s string, mode encoding) (string, error) { 135 // Count %, check that they're well-formed. 136 n := 0 137 hasPlus := false 138 for i := 0; i < len(s); { 139 switch s[i] { 140 case '%': 141 n++ 142 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { 143 s = s[i:] 144 if len(s) > 3 { 145 s = s[0:3] 146 } 147 return "", EscapeError(s) 148 } 149 i += 3 150 case '+': 151 hasPlus = mode == encodeQueryComponent 152 i++ 153 default: 154 i++ 155 } 156 } 157 158 if n == 0 && !hasPlus { 159 return s, nil 160 } 161 162 t := make([]byte, len(s)-2*n) 163 j := 0 164 for i := 0; i < len(s); { 165 switch s[i] { 166 case '%': 167 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) 168 j++ 169 i += 3 170 case '+': 171 if mode == encodeQueryComponent { 172 t[j] = ' ' 173 } else { 174 t[j] = '+' 175 } 176 j++ 177 i++ 178 default: 179 t[j] = s[i] 180 j++ 181 i++ 182 } 183 } 184 return string(t), nil 185 } 186 187 // QueryEscape escapes the string so it can be safely placed 188 // inside a URL query. 189 func QueryEscape(s string) string { 190 return escape(s, encodeQueryComponent) 191 } 192 193 func escape(s string, mode encoding) string { 194 spaceCount, hexCount := 0, 0 195 for i := 0; i < len(s); i++ { 196 c := s[i] 197 if shouldEscape(c, mode) { 198 if c == ' ' && mode == encodeQueryComponent { 199 spaceCount++ 200 } else { 201 hexCount++ 202 } 203 } 204 } 205 206 if spaceCount == 0 && hexCount == 0 { 207 return s 208 } 209 210 t := make([]byte, len(s)+2*hexCount) 211 j := 0 212 for i := 0; i < len(s); i++ { 213 switch c := s[i]; { 214 case c == ' ' && mode == encodeQueryComponent: 215 t[j] = '+' 216 j++ 217 case shouldEscape(c, mode): 218 t[j] = '%' 219 t[j+1] = "0123456789ABCDEF"[c>>4] 220 t[j+2] = "0123456789ABCDEF"[c&15] 221 j += 3 222 default: 223 t[j] = s[i] 224 j++ 225 } 226 } 227 return string(t) 228 } 229 230 // A URL represents a parsed URL (technically, a URI reference). 231 // The general form represented is: 232 // 233 // scheme://[userinfo@]host/path[?query][#fragment] 234 // 235 // URLs that do not start with a slash after the scheme are interpreted as: 236 // 237 // scheme:opaque[?query][#fragment] 238 // 239 // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/. 240 // A consequence is that it is impossible to tell which slashes in the Path were 241 // slashes in the raw URL and which were %2f. This distinction is rarely important, 242 // but when it is a client must use other routines to parse the raw URL or construct 243 // the parsed URL. For example, an HTTP server can consult req.RequestURI, and 244 // an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"} 245 // instead of URL{Host: "example.com", Path: "/Go/"}. 246 type URL struct { 247 Scheme string 248 Opaque string // encoded opaque data 249 User *Userinfo // username and password information 250 Host string // host or host:port 251 Path string 252 RawQuery string // encoded query values, without '?' 253 Fragment string // fragment for references, without '#' 254 } 255 256 // User returns a Userinfo containing the provided username 257 // and no password set. 258 func User(username string) *Userinfo { 259 return &Userinfo{username, "", false} 260 } 261 262 // UserPassword returns a Userinfo containing the provided username 263 // and password. 264 // This functionality should only be used with legacy web sites. 265 // RFC 2396 warns that interpreting Userinfo this way 266 // ``is NOT RECOMMENDED, because the passing of authentication 267 // information in clear text (such as URI) has proven to be a 268 // security risk in almost every case where it has been used.'' 269 func UserPassword(username, password string) *Userinfo { 270 return &Userinfo{username, password, true} 271 } 272 273 // The Userinfo type is an immutable encapsulation of username and 274 // password details for a URL. An existing Userinfo value is guaranteed 275 // to have a username set (potentially empty, as allowed by RFC 2396), 276 // and optionally a password. 277 type Userinfo struct { 278 username string 279 password string 280 passwordSet bool 281 } 282 283 // Username returns the username. 284 func (u *Userinfo) Username() string { 285 return u.username 286 } 287 288 // Password returns the password in case it is set, and whether it is set. 289 func (u *Userinfo) Password() (string, bool) { 290 if u.passwordSet { 291 return u.password, true 292 } 293 return "", false 294 } 295 296 // String returns the encoded userinfo information in the standard form 297 // of "username[:password]". 298 func (u *Userinfo) String() string { 299 s := escape(u.username, encodeUserPassword) 300 if u.passwordSet { 301 s += ":" + escape(u.password, encodeUserPassword) 302 } 303 return s 304 } 305 306 // Maybe rawurl is of the form scheme:path. 307 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) 308 // If so, return scheme, path; else return "", rawurl. 309 func getscheme(rawurl string) (scheme, path string, err error) { 310 for i := 0; i < len(rawurl); i++ { 311 c := rawurl[i] 312 switch { 313 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': 314 // do nothing 315 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': 316 if i == 0 { 317 return "", rawurl, nil 318 } 319 case c == ':': 320 if i == 0 { 321 return "", "", errors.New("missing protocol scheme") 322 } 323 return rawurl[0:i], rawurl[i+1:], nil 324 default: 325 // we have encountered an invalid character, 326 // so there is no valid scheme 327 return "", rawurl, nil 328 } 329 } 330 return "", rawurl, nil 331 } 332 333 // Maybe s is of the form t c u. 334 // If so, return t, c u (or t, u if cutc == true). 335 // If not, return s, "". 336 func split(s string, c string, cutc bool) (string, string) { 337 i := strings.Index(s, c) 338 if i < 0 { 339 return s, "" 340 } 341 if cutc { 342 return s[0:i], s[i+len(c):] 343 } 344 return s[0:i], s[i:] 345 } 346 347 // Parse parses rawurl into a URL structure. 348 // The rawurl may be relative or absolute. 349 func Parse(rawurl string) (url *URL, err error) { 350 // Cut off #frag 351 u, frag := split(rawurl, "#", true) 352 if url, err = parse(u, false); err != nil { 353 return nil, err 354 } 355 if frag == "" { 356 return url, nil 357 } 358 if url.Fragment, err = unescape(frag, encodeFragment); err != nil { 359 return nil, &Error{"parse", rawurl, err} 360 } 361 return url, nil 362 } 363 364 // ParseRequestURI parses rawurl into a URL structure. It assumes that 365 // rawurl was received in an HTTP request, so the rawurl is interpreted 366 // only as an absolute URI or an absolute path. 367 // The string rawurl is assumed not to have a #fragment suffix. 368 // (Web browsers strip #fragment before sending the URL to a web server.) 369 func ParseRequestURI(rawurl string) (url *URL, err error) { 370 return parse(rawurl, true) 371 } 372 373 // parse parses a URL from a string in one of two contexts. If 374 // viaRequest is true, the URL is assumed to have arrived via an HTTP request, 375 // in which case only absolute URLs or path-absolute relative URLs are allowed. 376 // If viaRequest is false, all forms of relative URLs are allowed. 377 func parse(rawurl string, viaRequest bool) (url *URL, err error) { 378 var rest string 379 380 if rawurl == "" && viaRequest { 381 err = errors.New("empty url") 382 goto Error 383 } 384 url = new(URL) 385 386 if rawurl == "*" { 387 url.Path = "*" 388 return 389 } 390 391 // Split off possible leading "http:", "mailto:", etc. 392 // Cannot contain escaped characters. 393 if url.Scheme, rest, err = getscheme(rawurl); err != nil { 394 goto Error 395 } 396 url.Scheme = strings.ToLower(url.Scheme) 397 398 rest, url.RawQuery = split(rest, "?", true) 399 400 if !strings.HasPrefix(rest, "/") { 401 if url.Scheme != "" { 402 // We consider rootless paths per RFC 3986 as opaque. 403 url.Opaque = rest 404 return url, nil 405 } 406 if viaRequest { 407 err = errors.New("invalid URI for request") 408 goto Error 409 } 410 } 411 412 if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") { 413 var authority string 414 authority, rest = split(rest[2:], "/", false) 415 url.User, url.Host, err = parseAuthority(authority) 416 if err != nil { 417 goto Error 418 } 419 } 420 if url.Path, err = unescape(rest, encodePath); err != nil { 421 goto Error 422 } 423 return url, nil 424 425 Error: 426 return nil, &Error{"parse", rawurl, err} 427 } 428 429 func parseAuthority(authority string) (user *Userinfo, host string, err error) { 430 i := strings.LastIndex(authority, "@") 431 if i < 0 { 432 host, err = parseHost(authority) 433 } else { 434 host, err = parseHost(authority[i+1:]) 435 } 436 if err != nil { 437 return nil, "", err 438 } 439 if i < 0 { 440 return nil, host, nil 441 } 442 userinfo := authority[:i] 443 if strings.Index(userinfo, ":") < 0 { 444 if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil { 445 return nil, "", err 446 } 447 user = User(userinfo) 448 } else { 449 username, password := split(userinfo, ":", true) 450 if username, err = unescape(username, encodeUserPassword); err != nil { 451 return nil, "", err 452 } 453 if password, err = unescape(password, encodeUserPassword); err != nil { 454 return nil, "", err 455 } 456 user = UserPassword(username, password) 457 } 458 return user, host, nil 459 } 460 461 // parseHost parses host as an authority without user information. 462 func parseHost(host string) (string, error) { 463 litOrName := host 464 if strings.HasPrefix(host, "[") { 465 // Parse an IP-Literal in RFC 3986 and RFC 6874. 466 // E.g., "[fe80::1], "[fe80::1%25en0]" 467 // 468 // RFC 4007 defines "%" as a delimiter character in 469 // the textual representation of IPv6 addresses. 470 // Per RFC 6874, in URIs that "%" is encoded as "%25". 471 i := strings.LastIndex(host[1:], "]") 472 if i < 0 { 473 return "", errors.New("missing ']' in host") 474 } 475 // Parse a host subcomponent without a ZoneID in RFC 476 // 6874 because the ZoneID is allowed to use the 477 // percent encoded form. 478 j := strings.Index(host[1:1+i], "%25") 479 if j < 0 { 480 litOrName = host[1 : 1+i] 481 } else { 482 litOrName = host[1 : 1+j] 483 } 484 } 485 // A URI containing an IP-Literal without a ZoneID or 486 // IPv4address in RFC 3986 and RFC 6847 must not be 487 // percent-encoded. 488 // 489 // A URI containing a DNS registered name in RFC 3986 is 490 // allowed to be percent-encoded, though we don't use it for 491 // now to avoid messing up with the gap between allowed 492 // characters in URI and allowed characters in DNS. 493 // See golang.org/issue/7991. 494 if strings.Contains(litOrName, "%") { 495 return "", errors.New("percent-encoded characters in host") 496 } 497 var err error 498 if host, err = unescape(host, encodeHost); err != nil { 499 return "", err 500 } 501 return host, nil 502 } 503 504 // String reassembles the URL into a valid URL string. 505 // The general form of the result is one of: 506 // 507 // scheme:opaque?query#fragment 508 // scheme://userinfo@host/path?query#fragment 509 // 510 // If u.Opaque is non-empty, String uses the first form; 511 // otherwise it uses the second form. 512 // 513 // In the second form, the following rules apply: 514 // - if u.Scheme is empty, scheme: is omitted. 515 // - if u.User is nil, userinfo@ is omitted. 516 // - if u.Host is empty, host/ is omitted. 517 // - if u.Scheme and u.Host are empty and u.User is nil, 518 // the entire scheme://userinfo@host/ is omitted. 519 // - if u.Host is non-empty and u.Path begins with a /, 520 // the form host/path does not add its own /. 521 // - if u.RawQuery is empty, ?query is omitted. 522 // - if u.Fragment is empty, #fragment is omitted. 523 func (u *URL) String() string { 524 var buf bytes.Buffer 525 if u.Scheme != "" { 526 buf.WriteString(u.Scheme) 527 buf.WriteByte(':') 528 } 529 if u.Opaque != "" { 530 buf.WriteString(u.Opaque) 531 } else { 532 if u.Scheme != "" || u.Host != "" || u.User != nil { 533 buf.WriteString("//") 534 if ui := u.User; ui != nil { 535 buf.WriteString(ui.String()) 536 buf.WriteByte('@') 537 } 538 if h := u.Host; h != "" { 539 buf.WriteString(escape(h, encodeHost)) 540 } 541 } 542 if u.Path != "" && u.Path[0] != '/' && u.Host != "" { 543 buf.WriteByte('/') 544 } 545 buf.WriteString(escape(u.Path, encodePath)) 546 } 547 if u.RawQuery != "" { 548 buf.WriteByte('?') 549 buf.WriteString(u.RawQuery) 550 } 551 if u.Fragment != "" { 552 buf.WriteByte('#') 553 buf.WriteString(escape(u.Fragment, encodeFragment)) 554 } 555 return buf.String() 556 } 557 558 // Values maps a string key to a list of values. 559 // It is typically used for query parameters and form values. 560 // Unlike in the http.Header map, the keys in a Values map 561 // are case-sensitive. 562 type Values map[string][]string 563 564 // Get gets the first value associated with the given key. 565 // If there are no values associated with the key, Get returns 566 // the empty string. To access multiple values, use the map 567 // directly. 568 func (v Values) Get(key string) string { 569 if v == nil { 570 return "" 571 } 572 vs, ok := v[key] 573 if !ok || len(vs) == 0 { 574 return "" 575 } 576 return vs[0] 577 } 578 579 // Set sets the key to value. It replaces any existing 580 // values. 581 func (v Values) Set(key, value string) { 582 v[key] = []string{value} 583 } 584 585 // Add adds the value to key. It appends to any existing 586 // values associated with key. 587 func (v Values) Add(key, value string) { 588 v[key] = append(v[key], value) 589 } 590 591 // Del deletes the values associated with key. 592 func (v Values) Del(key string) { 593 delete(v, key) 594 } 595 596 // ParseQuery parses the URL-encoded query string and returns 597 // a map listing the values specified for each key. 598 // ParseQuery always returns a non-nil map containing all the 599 // valid query parameters found; err describes the first decoding error 600 // encountered, if any. 601 func ParseQuery(query string) (m Values, err error) { 602 m = make(Values) 603 err = parseQuery(m, query) 604 return 605 } 606 607 func parseQuery(m Values, query string) (err error) { 608 for query != "" { 609 key := query 610 if i := strings.IndexAny(key, "&;"); i >= 0 { 611 key, query = key[:i], key[i+1:] 612 } else { 613 query = "" 614 } 615 if key == "" { 616 continue 617 } 618 value := "" 619 if i := strings.Index(key, "="); i >= 0 { 620 key, value = key[:i], key[i+1:] 621 } 622 key, err1 := QueryUnescape(key) 623 if err1 != nil { 624 if err == nil { 625 err = err1 626 } 627 continue 628 } 629 value, err1 = QueryUnescape(value) 630 if err1 != nil { 631 if err == nil { 632 err = err1 633 } 634 continue 635 } 636 m[key] = append(m[key], value) 637 } 638 return err 639 } 640 641 // Encode encodes the values into ``URL encoded'' form 642 // ("bar=baz&foo=quux") sorted by key. 643 func (v Values) Encode() string { 644 if v == nil { 645 return "" 646 } 647 var buf bytes.Buffer 648 keys := make([]string, 0, len(v)) 649 for k := range v { 650 keys = append(keys, k) 651 } 652 sort.Strings(keys) 653 for _, k := range keys { 654 vs := v[k] 655 prefix := QueryEscape(k) + "=" 656 for _, v := range vs { 657 if buf.Len() > 0 { 658 buf.WriteByte('&') 659 } 660 buf.WriteString(prefix) 661 buf.WriteString(QueryEscape(v)) 662 } 663 } 664 return buf.String() 665 } 666 667 // resolvePath applies special path segments from refs and applies 668 // them to base, per RFC 3986. 669 func resolvePath(base, ref string) string { 670 var full string 671 if ref == "" { 672 full = base 673 } else if ref[0] != '/' { 674 i := strings.LastIndex(base, "/") 675 full = base[:i+1] + ref 676 } else { 677 full = ref 678 } 679 if full == "" { 680 return "" 681 } 682 var dst []string 683 src := strings.Split(full, "/") 684 for _, elem := range src { 685 switch elem { 686 case ".": 687 // drop 688 case "..": 689 if len(dst) > 0 { 690 dst = dst[:len(dst)-1] 691 } 692 default: 693 dst = append(dst, elem) 694 } 695 } 696 if last := src[len(src)-1]; last == "." || last == ".." { 697 // Add final slash to the joined path. 698 dst = append(dst, "") 699 } 700 return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/") 701 } 702 703 // IsAbs reports whether the URL is absolute. 704 func (u *URL) IsAbs() bool { 705 return u.Scheme != "" 706 } 707 708 // Parse parses a URL in the context of the receiver. The provided URL 709 // may be relative or absolute. Parse returns nil, err on parse 710 // failure, otherwise its return value is the same as ResolveReference. 711 func (u *URL) Parse(ref string) (*URL, error) { 712 refurl, err := Parse(ref) 713 if err != nil { 714 return nil, err 715 } 716 return u.ResolveReference(refurl), nil 717 } 718 719 // ResolveReference resolves a URI reference to an absolute URI from 720 // an absolute base URI, per RFC 3986 Section 5.2. The URI reference 721 // may be relative or absolute. ResolveReference always returns a new 722 // URL instance, even if the returned URL is identical to either the 723 // base or reference. If ref is an absolute URL, then ResolveReference 724 // ignores base and returns a copy of ref. 725 func (u *URL) ResolveReference(ref *URL) *URL { 726 url := *ref 727 if ref.Scheme == "" { 728 url.Scheme = u.Scheme 729 } 730 if ref.Scheme != "" || ref.Host != "" || ref.User != nil { 731 // The "absoluteURI" or "net_path" cases. 732 url.Path = resolvePath(ref.Path, "") 733 return &url 734 } 735 if ref.Opaque != "" { 736 url.User = nil 737 url.Host = "" 738 url.Path = "" 739 return &url 740 } 741 if ref.Path == "" { 742 if ref.RawQuery == "" { 743 url.RawQuery = u.RawQuery 744 if ref.Fragment == "" { 745 url.Fragment = u.Fragment 746 } 747 } 748 } 749 // The "abs_path" or "rel_path" cases. 750 url.Host = u.Host 751 url.User = u.User 752 url.Path = resolvePath(u.Path, ref.Path) 753 return &url 754 } 755 756 // Query parses RawQuery and returns the corresponding values. 757 func (u *URL) Query() Values { 758 v, _ := ParseQuery(u.RawQuery) 759 return v 760 } 761 762 // RequestURI returns the encoded path?query or opaque?query 763 // string that would be used in an HTTP request for u. 764 func (u *URL) RequestURI() string { 765 result := u.Opaque 766 if result == "" { 767 result = escape(u.Path, encodePath) 768 if result == "" { 769 result = "/" 770 } 771 } else { 772 if strings.HasPrefix(result, "//") { 773 result = u.Scheme + ":" + result 774 } 775 } 776 if u.RawQuery != "" { 777 result += "?" + u.RawQuery 778 } 779 return result 780 }