github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/net/url/url.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package url parses URLs and implements query escaping. 6 // See RFC 3986. 7 package url 8 9 import ( 10 "bytes" 11 "errors" 12 "sort" 13 "strconv" 14 "strings" 15 ) 16 17 // Error reports an error and the operation and URL that caused it. 18 type Error struct { 19 Op string 20 URL string 21 Err error 22 } 23 24 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() } 25 26 func ishex(c byte) bool { 27 switch { 28 case '0' <= c && c <= '9': 29 return true 30 case 'a' <= c && c <= 'f': 31 return true 32 case 'A' <= c && c <= 'F': 33 return true 34 } 35 return false 36 } 37 38 func unhex(c byte) byte { 39 switch { 40 case '0' <= c && c <= '9': 41 return c - '0' 42 case 'a' <= c && c <= 'f': 43 return c - 'a' + 10 44 case 'A' <= c && c <= 'F': 45 return c - 'A' + 10 46 } 47 return 0 48 } 49 50 type encoding int 51 52 const ( 53 encodePath encoding = 1 + iota 54 encodeUserPassword 55 encodeQueryComponent 56 encodeFragment 57 ) 58 59 type EscapeError string 60 61 func (e EscapeError) Error() string { 62 return "invalid URL escape " + strconv.Quote(string(e)) 63 } 64 65 // Return true if the specified character should be escaped when 66 // appearing in a URL string, according to RFC 3986. 67 // When 'all' is true the full range of reserved characters are matched. 68 func shouldEscape(c byte, mode encoding) bool { 69 // §2.3 Unreserved characters (alphanum) 70 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { 71 return false 72 } 73 74 switch c { 75 case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) 76 return false 77 78 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) 79 // Different sections of the URL allow a few of 80 // the reserved characters to appear unescaped. 81 switch mode { 82 case encodePath: // §3.3 83 // The RFC allows : @ & = + $ but saves / ; , for assigning 84 // meaning to individual path segments. This package 85 // only manipulates the path as a whole, so we allow those 86 // last two as well. That leaves only ? to escape. 87 return c == '?' 88 89 case encodeUserPassword: // §3.2.2 90 // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /. 91 // The parsing of userinfo treats : as special so we must escape that too. 92 return c == '@' || c == '/' || c == ':' 93 94 case encodeQueryComponent: // §3.4 95 // The RFC reserves (so we must escape) everything. 96 return true 97 98 case encodeFragment: // §4.1 99 // The RFC text is silent but the grammar allows 100 // everything, so escape nothing. 101 return false 102 } 103 } 104 105 // Everything else must be escaped. 106 return true 107 } 108 109 // QueryUnescape does the inverse transformation of QueryEscape, converting 110 // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if 111 // any % is not followed by two hexadecimal digits. 112 func QueryUnescape(s string) (string, error) { 113 return unescape(s, encodeQueryComponent) 114 } 115 116 // unescape unescapes a string; the mode specifies 117 // which section of the URL string is being unescaped. 118 func unescape(s string, mode encoding) (string, error) { 119 // Count %, check that they're well-formed. 120 n := 0 121 hasPlus := false 122 for i := 0; i < len(s); { 123 switch s[i] { 124 case '%': 125 n++ 126 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { 127 s = s[i:] 128 if len(s) > 3 { 129 s = s[0:3] 130 } 131 return "", EscapeError(s) 132 } 133 i += 3 134 case '+': 135 hasPlus = mode == encodeQueryComponent 136 i++ 137 default: 138 i++ 139 } 140 } 141 142 if n == 0 && !hasPlus { 143 return s, nil 144 } 145 146 t := make([]byte, len(s)-2*n) 147 j := 0 148 for i := 0; i < len(s); { 149 switch s[i] { 150 case '%': 151 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) 152 j++ 153 i += 3 154 case '+': 155 if mode == encodeQueryComponent { 156 t[j] = ' ' 157 } else { 158 t[j] = '+' 159 } 160 j++ 161 i++ 162 default: 163 t[j] = s[i] 164 j++ 165 i++ 166 } 167 } 168 return string(t), nil 169 } 170 171 // QueryEscape escapes the string so it can be safely placed 172 // inside a URL query. 173 func QueryEscape(s string) string { 174 return escape(s, encodeQueryComponent) 175 } 176 177 func escape(s string, mode encoding) string { 178 spaceCount, hexCount := 0, 0 179 for i := 0; i < len(s); i++ { 180 c := s[i] 181 if shouldEscape(c, mode) { 182 if c == ' ' && mode == encodeQueryComponent { 183 spaceCount++ 184 } else { 185 hexCount++ 186 } 187 } 188 } 189 190 if spaceCount == 0 && hexCount == 0 { 191 return s 192 } 193 194 t := make([]byte, len(s)+2*hexCount) 195 j := 0 196 for i := 0; i < len(s); i++ { 197 switch c := s[i]; { 198 case c == ' ' && mode == encodeQueryComponent: 199 t[j] = '+' 200 j++ 201 case shouldEscape(c, mode): 202 t[j] = '%' 203 t[j+1] = "0123456789ABCDEF"[c>>4] 204 t[j+2] = "0123456789ABCDEF"[c&15] 205 j += 3 206 default: 207 t[j] = s[i] 208 j++ 209 } 210 } 211 return string(t) 212 } 213 214 // A URL represents a parsed URL (technically, a URI reference). 215 // The general form represented is: 216 // 217 // scheme://[userinfo@]host/path[?query][#fragment] 218 // 219 // URLs that do not start with a slash after the scheme are interpreted as: 220 // 221 // scheme:opaque[?query][#fragment] 222 // 223 // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/. 224 // A consequence is that it is impossible to tell which slashes in the Path were 225 // slashes in the raw URL and which were %2f. This distinction is rarely important, 226 // but when it is a client must use other routines to parse the raw URL or construct 227 // the parsed URL. For example, an HTTP server can consult req.RequestURI, and 228 // an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"} 229 // instead of URL{Host: "example.com", Path: "/Go/"}. 230 type URL struct { 231 Scheme string 232 Opaque string // encoded opaque data 233 User *Userinfo // username and password information 234 Host string // host or host:port 235 Path string 236 RawQuery string // encoded query values, without '?' 237 Fragment string // fragment for references, without '#' 238 } 239 240 // User returns a Userinfo containing the provided username 241 // and no password set. 242 func User(username string) *Userinfo { 243 return &Userinfo{username, "", false} 244 } 245 246 // UserPassword returns a Userinfo containing the provided username 247 // and password. 248 // This functionality should only be used with legacy web sites. 249 // RFC 2396 warns that interpreting Userinfo this way 250 // ``is NOT RECOMMENDED, because the passing of authentication 251 // information in clear text (such as URI) has proven to be a 252 // security risk in almost every case where it has been used.'' 253 func UserPassword(username, password string) *Userinfo { 254 return &Userinfo{username, password, true} 255 } 256 257 // The Userinfo type is an immutable encapsulation of username and 258 // password details for a URL. An existing Userinfo value is guaranteed 259 // to have a username set (potentially empty, as allowed by RFC 2396), 260 // and optionally a password. 261 type Userinfo struct { 262 username string 263 password string 264 passwordSet bool 265 } 266 267 // Username returns the username. 268 func (u *Userinfo) Username() string { 269 return u.username 270 } 271 272 // Password returns the password in case it is set, and whether it is set. 273 func (u *Userinfo) Password() (string, bool) { 274 if u.passwordSet { 275 return u.password, true 276 } 277 return "", false 278 } 279 280 // String returns the encoded userinfo information in the standard form 281 // of "username[:password]". 282 func (u *Userinfo) String() string { 283 s := escape(u.username, encodeUserPassword) 284 if u.passwordSet { 285 s += ":" + escape(u.password, encodeUserPassword) 286 } 287 return s 288 } 289 290 // Maybe rawurl is of the form scheme:path. 291 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) 292 // If so, return scheme, path; else return "", rawurl. 293 func getscheme(rawurl string) (scheme, path string, err error) { 294 for i := 0; i < len(rawurl); i++ { 295 c := rawurl[i] 296 switch { 297 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': 298 // do nothing 299 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': 300 if i == 0 { 301 return "", rawurl, nil 302 } 303 case c == ':': 304 if i == 0 { 305 return "", "", errors.New("missing protocol scheme") 306 } 307 return rawurl[0:i], rawurl[i+1:], nil 308 default: 309 // we have encountered an invalid character, 310 // so there is no valid scheme 311 return "", rawurl, nil 312 } 313 } 314 return "", rawurl, nil 315 } 316 317 // Maybe s is of the form t c u. 318 // If so, return t, c u (or t, u if cutc == true). 319 // If not, return s, "". 320 func split(s string, c string, cutc bool) (string, string) { 321 i := strings.Index(s, c) 322 if i < 0 { 323 return s, "" 324 } 325 if cutc { 326 return s[0:i], s[i+len(c):] 327 } 328 return s[0:i], s[i:] 329 } 330 331 // Parse parses rawurl into a URL structure. 332 // The rawurl may be relative or absolute. 333 func Parse(rawurl string) (url *URL, err error) { 334 // Cut off #frag 335 u, frag := split(rawurl, "#", true) 336 if url, err = parse(u, false); err != nil { 337 return nil, err 338 } 339 if frag == "" { 340 return url, nil 341 } 342 if url.Fragment, err = unescape(frag, encodeFragment); err != nil { 343 return nil, &Error{"parse", rawurl, err} 344 } 345 return url, nil 346 } 347 348 // ParseRequestURI parses rawurl into a URL structure. It assumes that 349 // rawurl was received in an HTTP request, so the rawurl is interpreted 350 // only as an absolute URI or an absolute path. 351 // The string rawurl is assumed not to have a #fragment suffix. 352 // (Web browsers strip #fragment before sending the URL to a web server.) 353 func ParseRequestURI(rawurl string) (url *URL, err error) { 354 return parse(rawurl, true) 355 } 356 357 // parse parses a URL from a string in one of two contexts. If 358 // viaRequest is true, the URL is assumed to have arrived via an HTTP request, 359 // in which case only absolute URLs or path-absolute relative URLs are allowed. 360 // If viaRequest is false, all forms of relative URLs are allowed. 361 func parse(rawurl string, viaRequest bool) (url *URL, err error) { 362 var rest string 363 364 if rawurl == "" && viaRequest { 365 err = errors.New("empty url") 366 goto Error 367 } 368 url = new(URL) 369 370 if rawurl == "*" { 371 url.Path = "*" 372 return 373 } 374 375 // Split off possible leading "http:", "mailto:", etc. 376 // Cannot contain escaped characters. 377 if url.Scheme, rest, err = getscheme(rawurl); err != nil { 378 goto Error 379 } 380 url.Scheme = strings.ToLower(url.Scheme) 381 382 rest, url.RawQuery = split(rest, "?", true) 383 384 if !strings.HasPrefix(rest, "/") { 385 if url.Scheme != "" { 386 // We consider rootless paths per RFC 3986 as opaque. 387 url.Opaque = rest 388 return url, nil 389 } 390 if viaRequest { 391 err = errors.New("invalid URI for request") 392 goto Error 393 } 394 } 395 396 if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") { 397 var authority string 398 authority, rest = split(rest[2:], "/", false) 399 url.User, url.Host, err = parseAuthority(authority) 400 if err != nil { 401 goto Error 402 } 403 if strings.Contains(url.Host, "%") { 404 err = errors.New("hexadecimal escape in host") 405 goto Error 406 } 407 } 408 if url.Path, err = unescape(rest, encodePath); err != nil { 409 goto Error 410 } 411 return url, nil 412 413 Error: 414 return nil, &Error{"parse", rawurl, err} 415 } 416 417 func parseAuthority(authority string) (user *Userinfo, host string, err error) { 418 i := strings.LastIndex(authority, "@") 419 if i < 0 { 420 host = authority 421 return 422 } 423 userinfo, host := authority[:i], authority[i+1:] 424 if strings.Index(userinfo, ":") < 0 { 425 if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil { 426 return 427 } 428 user = User(userinfo) 429 } else { 430 username, password := split(userinfo, ":", true) 431 if username, err = unescape(username, encodeUserPassword); err != nil { 432 return 433 } 434 if password, err = unescape(password, encodeUserPassword); err != nil { 435 return 436 } 437 user = UserPassword(username, password) 438 } 439 return 440 } 441 442 // String reassembles the URL into a valid URL string. 443 func (u *URL) String() string { 444 var buf bytes.Buffer 445 if u.Scheme != "" { 446 buf.WriteString(u.Scheme) 447 buf.WriteByte(':') 448 } 449 if u.Opaque != "" { 450 buf.WriteString(u.Opaque) 451 } else { 452 if u.Scheme != "" || u.Host != "" || u.User != nil { 453 buf.WriteString("//") 454 if u := u.User; u != nil { 455 buf.WriteString(u.String()) 456 buf.WriteByte('@') 457 } 458 if h := u.Host; h != "" { 459 buf.WriteString(h) 460 } 461 } 462 buf.WriteString(escape(u.Path, encodePath)) 463 } 464 if u.RawQuery != "" { 465 buf.WriteByte('?') 466 buf.WriteString(u.RawQuery) 467 } 468 if u.Fragment != "" { 469 buf.WriteByte('#') 470 buf.WriteString(escape(u.Fragment, encodeFragment)) 471 } 472 return buf.String() 473 } 474 475 // Values maps a string key to a list of values. 476 // It is typically used for query parameters and form values. 477 // Unlike in the http.Header map, the keys in a Values map 478 // are case-sensitive. 479 type Values map[string][]string 480 481 // Get gets the first value associated with the given key. 482 // If there are no values associated with the key, Get returns 483 // the empty string. To access multiple values, use the map 484 // directly. 485 func (v Values) Get(key string) string { 486 if v == nil { 487 return "" 488 } 489 vs, ok := v[key] 490 if !ok || len(vs) == 0 { 491 return "" 492 } 493 return vs[0] 494 } 495 496 // Set sets the key to value. It replaces any existing 497 // values. 498 func (v Values) Set(key, value string) { 499 v[key] = []string{value} 500 } 501 502 // Add adds the key to value. It appends to any existing 503 // values associated with key. 504 func (v Values) Add(key, value string) { 505 v[key] = append(v[key], value) 506 } 507 508 // Del deletes the values associated with key. 509 func (v Values) Del(key string) { 510 delete(v, key) 511 } 512 513 // ParseQuery parses the URL-encoded query string and returns 514 // a map listing the values specified for each key. 515 // ParseQuery always returns a non-nil map containing all the 516 // valid query parameters found; err describes the first decoding error 517 // encountered, if any. 518 func ParseQuery(query string) (m Values, err error) { 519 m = make(Values) 520 err = parseQuery(m, query) 521 return 522 } 523 524 func parseQuery(m Values, query string) (err error) { 525 for query != "" { 526 key := query 527 if i := strings.IndexAny(key, "&;"); i >= 0 { 528 key, query = key[:i], key[i+1:] 529 } else { 530 query = "" 531 } 532 if key == "" { 533 continue 534 } 535 value := "" 536 if i := strings.Index(key, "="); i >= 0 { 537 key, value = key[:i], key[i+1:] 538 } 539 key, err1 := QueryUnescape(key) 540 if err1 != nil { 541 if err == nil { 542 err = err1 543 } 544 continue 545 } 546 value, err1 = QueryUnescape(value) 547 if err1 != nil { 548 if err == nil { 549 err = err1 550 } 551 continue 552 } 553 m[key] = append(m[key], value) 554 } 555 return err 556 } 557 558 // Encode encodes the values into ``URL encoded'' form. 559 // e.g. "foo=bar&bar=baz" 560 func (v Values) Encode() string { 561 if v == nil { 562 return "" 563 } 564 var buf bytes.Buffer 565 keys := make([]string, 0, len(v)) 566 for k := range v { 567 keys = append(keys, k) 568 } 569 sort.Strings(keys) 570 for _, k := range keys { 571 vs := v[k] 572 prefix := QueryEscape(k) + "=" 573 for _, v := range vs { 574 if buf.Len() > 0 { 575 buf.WriteByte('&') 576 } 577 buf.WriteString(prefix) 578 buf.WriteString(QueryEscape(v)) 579 } 580 } 581 return buf.String() 582 } 583 584 // resolvePath applies special path segments from refs and applies 585 // them to base, per RFC 3986. 586 func resolvePath(base, ref string) string { 587 var full string 588 if ref == "" { 589 full = base 590 } else if ref[0] != '/' { 591 i := strings.LastIndex(base, "/") 592 full = base[:i+1] + ref 593 } else { 594 full = ref 595 } 596 if full == "" { 597 return "" 598 } 599 var dst []string 600 src := strings.Split(full, "/") 601 for _, elem := range src { 602 switch elem { 603 case ".": 604 // drop 605 case "..": 606 if len(dst) > 0 { 607 dst = dst[:len(dst)-1] 608 } 609 default: 610 dst = append(dst, elem) 611 } 612 } 613 if last := src[len(src)-1]; last == "." || last == ".." { 614 // Add final slash to the joined path. 615 dst = append(dst, "") 616 } 617 return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/") 618 } 619 620 // IsAbs returns true if the URL is absolute. 621 func (u *URL) IsAbs() bool { 622 return u.Scheme != "" 623 } 624 625 // Parse parses a URL in the context of the receiver. The provided URL 626 // may be relative or absolute. Parse returns nil, err on parse 627 // failure, otherwise its return value is the same as ResolveReference. 628 func (u *URL) Parse(ref string) (*URL, error) { 629 refurl, err := Parse(ref) 630 if err != nil { 631 return nil, err 632 } 633 return u.ResolveReference(refurl), nil 634 } 635 636 // ResolveReference resolves a URI reference to an absolute URI from 637 // an absolute base URI, per RFC 3986 Section 5.2. The URI reference 638 // may be relative or absolute. ResolveReference always returns a new 639 // URL instance, even if the returned URL is identical to either the 640 // base or reference. If ref is an absolute URL, then ResolveReference 641 // ignores base and returns a copy of ref. 642 func (u *URL) ResolveReference(ref *URL) *URL { 643 url := *ref 644 if ref.Scheme == "" { 645 url.Scheme = u.Scheme 646 } 647 if ref.Scheme != "" || ref.Host != "" || ref.User != nil { 648 // The "absoluteURI" or "net_path" cases. 649 url.Path = resolvePath(ref.Path, "") 650 return &url 651 } 652 if ref.Opaque != "" { 653 url.User = nil 654 url.Host = "" 655 url.Path = "" 656 return &url 657 } 658 if ref.Path == "" { 659 if ref.RawQuery == "" { 660 url.RawQuery = u.RawQuery 661 if ref.Fragment == "" { 662 url.Fragment = u.Fragment 663 } 664 } 665 } 666 // The "abs_path" or "rel_path" cases. 667 url.Host = u.Host 668 url.User = u.User 669 url.Path = resolvePath(u.Path, ref.Path) 670 return &url 671 } 672 673 // Query parses RawQuery and returns the corresponding values. 674 func (u *URL) Query() Values { 675 v, _ := ParseQuery(u.RawQuery) 676 return v 677 } 678 679 // RequestURI returns the encoded path?query or opaque?query 680 // string that would be used in an HTTP request for u. 681 func (u *URL) RequestURI() string { 682 result := u.Opaque 683 if result == "" { 684 result = escape(u.Path, encodePath) 685 if result == "" { 686 result = "/" 687 } 688 } else { 689 if strings.HasPrefix(result, "//") { 690 result = u.Scheme + ":" + result 691 } 692 } 693 if u.RawQuery != "" { 694 result += "?" + u.RawQuery 695 } 696 return result 697 }