github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/net/url/url.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package url parses URLs and implements query escaping. 6 // See RFC 3986. 7 package url 8 9 import ( 10 "bytes" 11 "errors" 12 "sort" 13 "strconv" 14 "strings" 15 ) 16 17 // Error reports an error and the operation and URL that caused it. 18 type Error struct { 19 Op string 20 URL string 21 Err error 22 } 23 24 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() } 25 26 func ishex(c byte) bool { 27 switch { 28 case '0' <= c && c <= '9': 29 return true 30 case 'a' <= c && c <= 'f': 31 return true 32 case 'A' <= c && c <= 'F': 33 return true 34 } 35 return false 36 } 37 38 func unhex(c byte) byte { 39 switch { 40 case '0' <= c && c <= '9': 41 return c - '0' 42 case 'a' <= c && c <= 'f': 43 return c - 'a' + 10 44 case 'A' <= c && c <= 'F': 45 return c - 'A' + 10 46 } 47 return 0 48 } 49 50 type encoding int 51 52 const ( 53 encodePath encoding = 1 + iota 54 encodeUserPassword 55 encodeQueryComponent 56 encodeFragment 57 ) 58 59 type EscapeError string 60 61 func (e EscapeError) Error() string { 62 return "invalid URL escape " + strconv.Quote(string(e)) 63 } 64 65 // Return true if the specified character should be escaped when 66 // appearing in a URL string, according to RFC 3986. 67 func shouldEscape(c byte, mode encoding) bool { 68 // §2.3 Unreserved characters (alphanum) 69 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { 70 return false 71 } 72 73 switch c { 74 case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) 75 return false 76 77 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) 78 // Different sections of the URL allow a few of 79 // the reserved characters to appear unescaped. 80 switch mode { 81 case encodePath: // §3.3 82 // The RFC allows : @ & = + $ but saves / ; , for assigning 83 // meaning to individual path segments. This package 84 // only manipulates the path as a whole, so we allow those 85 // last two as well. That leaves only ? to escape. 86 return c == '?' 87 88 case encodeUserPassword: // §3.2.1 89 // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in 90 // userinfo, so we must escape only '@', '/', and '?'. 91 // The parsing of userinfo treats ':' as special so we must escape 92 // that too. 93 return c == '@' || c == '/' || c == '?' || c == ':' 94 95 case encodeQueryComponent: // §3.4 96 // The RFC reserves (so we must escape) everything. 97 return true 98 99 case encodeFragment: // §4.1 100 // The RFC text is silent but the grammar allows 101 // everything, so escape nothing. 102 return false 103 } 104 } 105 106 // Everything else must be escaped. 107 return true 108 } 109 110 // QueryUnescape does the inverse transformation of QueryEscape, converting 111 // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if 112 // any % is not followed by two hexadecimal digits. 113 func QueryUnescape(s string) (string, error) { 114 return unescape(s, encodeQueryComponent) 115 } 116 117 // unescape unescapes a string; the mode specifies 118 // which section of the URL string is being unescaped. 119 func unescape(s string, mode encoding) (string, error) { 120 // Count %, check that they're well-formed. 121 n := 0 122 hasPlus := false 123 for i := 0; i < len(s); { 124 switch s[i] { 125 case '%': 126 n++ 127 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { 128 s = s[i:] 129 if len(s) > 3 { 130 s = s[0:3] 131 } 132 return "", EscapeError(s) 133 } 134 i += 3 135 case '+': 136 hasPlus = mode == encodeQueryComponent 137 i++ 138 default: 139 i++ 140 } 141 } 142 143 if n == 0 && !hasPlus { 144 return s, nil 145 } 146 147 t := make([]byte, len(s)-2*n) 148 j := 0 149 for i := 0; i < len(s); { 150 switch s[i] { 151 case '%': 152 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) 153 j++ 154 i += 3 155 case '+': 156 if mode == encodeQueryComponent { 157 t[j] = ' ' 158 } else { 159 t[j] = '+' 160 } 161 j++ 162 i++ 163 default: 164 t[j] = s[i] 165 j++ 166 i++ 167 } 168 } 169 return string(t), nil 170 } 171 172 // QueryEscape escapes the string so it can be safely placed 173 // inside a URL query. 174 func QueryEscape(s string) string { 175 return escape(s, encodeQueryComponent) 176 } 177 178 func escape(s string, mode encoding) string { 179 spaceCount, hexCount := 0, 0 180 for i := 0; i < len(s); i++ { 181 c := s[i] 182 if shouldEscape(c, mode) { 183 if c == ' ' && mode == encodeQueryComponent { 184 spaceCount++ 185 } else { 186 hexCount++ 187 } 188 } 189 } 190 191 if spaceCount == 0 && hexCount == 0 { 192 return s 193 } 194 195 t := make([]byte, len(s)+2*hexCount) 196 j := 0 197 for i := 0; i < len(s); i++ { 198 switch c := s[i]; { 199 case c == ' ' && mode == encodeQueryComponent: 200 t[j] = '+' 201 j++ 202 case shouldEscape(c, mode): 203 t[j] = '%' 204 t[j+1] = "0123456789ABCDEF"[c>>4] 205 t[j+2] = "0123456789ABCDEF"[c&15] 206 j += 3 207 default: 208 t[j] = s[i] 209 j++ 210 } 211 } 212 return string(t) 213 } 214 215 // A URL represents a parsed URL (technically, a URI reference). 216 // The general form represented is: 217 // 218 // scheme://[userinfo@]host/path[?query][#fragment] 219 // 220 // URLs that do not start with a slash after the scheme are interpreted as: 221 // 222 // scheme:opaque[?query][#fragment] 223 // 224 // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/. 225 // A consequence is that it is impossible to tell which slashes in the Path were 226 // slashes in the raw URL and which were %2f. This distinction is rarely important, 227 // but when it is a client must use other routines to parse the raw URL or construct 228 // the parsed URL. For example, an HTTP server can consult req.RequestURI, and 229 // an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"} 230 // instead of URL{Host: "example.com", Path: "/Go/"}. 231 type URL struct { 232 Scheme string 233 Opaque string // encoded opaque data 234 User *Userinfo // username and password information 235 Host string // host or host:port 236 Path string 237 RawQuery string // encoded query values, without '?' 238 Fragment string // fragment for references, without '#' 239 } 240 241 // User returns a Userinfo containing the provided username 242 // and no password set. 243 func User(username string) *Userinfo { 244 return &Userinfo{username, "", false} 245 } 246 247 // UserPassword returns a Userinfo containing the provided username 248 // and password. 249 // This functionality should only be used with legacy web sites. 250 // RFC 2396 warns that interpreting Userinfo this way 251 // ``is NOT RECOMMENDED, because the passing of authentication 252 // information in clear text (such as URI) has proven to be a 253 // security risk in almost every case where it has been used.'' 254 func UserPassword(username, password string) *Userinfo { 255 return &Userinfo{username, password, true} 256 } 257 258 // The Userinfo type is an immutable encapsulation of username and 259 // password details for a URL. An existing Userinfo value is guaranteed 260 // to have a username set (potentially empty, as allowed by RFC 2396), 261 // and optionally a password. 262 type Userinfo struct { 263 username string 264 password string 265 passwordSet bool 266 } 267 268 // Username returns the username. 269 func (u *Userinfo) Username() string { 270 return u.username 271 } 272 273 // Password returns the password in case it is set, and whether it is set. 274 func (u *Userinfo) Password() (string, bool) { 275 if u.passwordSet { 276 return u.password, true 277 } 278 return "", false 279 } 280 281 // String returns the encoded userinfo information in the standard form 282 // of "username[:password]". 283 func (u *Userinfo) String() string { 284 s := escape(u.username, encodeUserPassword) 285 if u.passwordSet { 286 s += ":" + escape(u.password, encodeUserPassword) 287 } 288 return s 289 } 290 291 // Maybe rawurl is of the form scheme:path. 292 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) 293 // If so, return scheme, path; else return "", rawurl. 294 func getscheme(rawurl string) (scheme, path string, err error) { 295 for i := 0; i < len(rawurl); i++ { 296 c := rawurl[i] 297 switch { 298 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': 299 // do nothing 300 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': 301 if i == 0 { 302 return "", rawurl, nil 303 } 304 case c == ':': 305 if i == 0 { 306 return "", "", errors.New("missing protocol scheme") 307 } 308 return rawurl[0:i], rawurl[i+1:], nil 309 default: 310 // we have encountered an invalid character, 311 // so there is no valid scheme 312 return "", rawurl, nil 313 } 314 } 315 return "", rawurl, nil 316 } 317 318 // Maybe s is of the form t c u. 319 // If so, return t, c u (or t, u if cutc == true). 320 // If not, return s, "". 321 func split(s string, c string, cutc bool) (string, string) { 322 i := strings.Index(s, c) 323 if i < 0 { 324 return s, "" 325 } 326 if cutc { 327 return s[0:i], s[i+len(c):] 328 } 329 return s[0:i], s[i:] 330 } 331 332 // Parse parses rawurl into a URL structure. 333 // The rawurl may be relative or absolute. 334 func Parse(rawurl string) (url *URL, err error) { 335 // Cut off #frag 336 u, frag := split(rawurl, "#", true) 337 if url, err = parse(u, false); err != nil { 338 return nil, err 339 } 340 if frag == "" { 341 return url, nil 342 } 343 if url.Fragment, err = unescape(frag, encodeFragment); err != nil { 344 return nil, &Error{"parse", rawurl, err} 345 } 346 return url, nil 347 } 348 349 // ParseRequestURI parses rawurl into a URL structure. It assumes that 350 // rawurl was received in an HTTP request, so the rawurl is interpreted 351 // only as an absolute URI or an absolute path. 352 // The string rawurl is assumed not to have a #fragment suffix. 353 // (Web browsers strip #fragment before sending the URL to a web server.) 354 func ParseRequestURI(rawurl string) (url *URL, err error) { 355 return parse(rawurl, true) 356 } 357 358 // parse parses a URL from a string in one of two contexts. If 359 // viaRequest is true, the URL is assumed to have arrived via an HTTP request, 360 // in which case only absolute URLs or path-absolute relative URLs are allowed. 361 // If viaRequest is false, all forms of relative URLs are allowed. 362 func parse(rawurl string, viaRequest bool) (url *URL, err error) { 363 var rest string 364 365 if rawurl == "" && viaRequest { 366 err = errors.New("empty url") 367 goto Error 368 } 369 url = new(URL) 370 371 if rawurl == "*" { 372 url.Path = "*" 373 return 374 } 375 376 // Split off possible leading "http:", "mailto:", etc. 377 // Cannot contain escaped characters. 378 if url.Scheme, rest, err = getscheme(rawurl); err != nil { 379 goto Error 380 } 381 url.Scheme = strings.ToLower(url.Scheme) 382 383 rest, url.RawQuery = split(rest, "?", true) 384 385 if !strings.HasPrefix(rest, "/") { 386 if url.Scheme != "" { 387 // We consider rootless paths per RFC 3986 as opaque. 388 url.Opaque = rest 389 return url, nil 390 } 391 if viaRequest { 392 err = errors.New("invalid URI for request") 393 goto Error 394 } 395 } 396 397 if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") { 398 var authority string 399 authority, rest = split(rest[2:], "/", false) 400 url.User, url.Host, err = parseAuthority(authority) 401 if err != nil { 402 goto Error 403 } 404 if strings.Contains(url.Host, "%") { 405 err = errors.New("hexadecimal escape in host") 406 goto Error 407 } 408 } 409 if url.Path, err = unescape(rest, encodePath); err != nil { 410 goto Error 411 } 412 return url, nil 413 414 Error: 415 return nil, &Error{"parse", rawurl, err} 416 } 417 418 func parseAuthority(authority string) (user *Userinfo, host string, err error) { 419 i := strings.LastIndex(authority, "@") 420 if i < 0 { 421 host = authority 422 return 423 } 424 userinfo, host := authority[:i], authority[i+1:] 425 if strings.Index(userinfo, ":") < 0 { 426 if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil { 427 return 428 } 429 user = User(userinfo) 430 } else { 431 username, password := split(userinfo, ":", true) 432 if username, err = unescape(username, encodeUserPassword); err != nil { 433 return 434 } 435 if password, err = unescape(password, encodeUserPassword); err != nil { 436 return 437 } 438 user = UserPassword(username, password) 439 } 440 return 441 } 442 443 // String reassembles the URL into a valid URL string. 444 // The general form of the result is one of: 445 // 446 // scheme:opaque 447 // scheme://userinfo@host/path?query#fragment 448 // 449 // If u.Opaque is non-empty, String uses the first form; 450 // otherwise it uses the second form. 451 // 452 // In the second form, the following rules apply: 453 // - if u.Scheme is empty, scheme: is omitted. 454 // - if u.User is nil, userinfo@ is omitted. 455 // - if u.Host is empty, host/ is omitted. 456 // - if u.Scheme and u.Host are empty and u.User is nil, 457 // the entire scheme://userinfo@host/ is omitted. 458 // - if u.Host is non-empty and u.Path begins with a /, 459 // the form host/path does not add its own /. 460 // - if u.RawQuery is empty, ?query is omitted. 461 // - if u.Fragment is empty, #fragment is omitted. 462 func (u *URL) String() string { 463 var buf bytes.Buffer 464 if u.Scheme != "" { 465 buf.WriteString(u.Scheme) 466 buf.WriteByte(':') 467 } 468 if u.Opaque != "" { 469 buf.WriteString(u.Opaque) 470 } else { 471 if u.Scheme != "" || u.Host != "" || u.User != nil { 472 buf.WriteString("//") 473 if ui := u.User; ui != nil { 474 buf.WriteString(ui.String()) 475 buf.WriteByte('@') 476 } 477 if h := u.Host; h != "" { 478 buf.WriteString(h) 479 } 480 } 481 if u.Path != "" && u.Path[0] != '/' && u.Host != "" { 482 buf.WriteByte('/') 483 } 484 buf.WriteString(escape(u.Path, encodePath)) 485 } 486 if u.RawQuery != "" { 487 buf.WriteByte('?') 488 buf.WriteString(u.RawQuery) 489 } 490 if u.Fragment != "" { 491 buf.WriteByte('#') 492 buf.WriteString(escape(u.Fragment, encodeFragment)) 493 } 494 return buf.String() 495 } 496 497 // Values maps a string key to a list of values. 498 // It is typically used for query parameters and form values. 499 // Unlike in the http.Header map, the keys in a Values map 500 // are case-sensitive. 501 type Values map[string][]string 502 503 // Get gets the first value associated with the given key. 504 // If there are no values associated with the key, Get returns 505 // the empty string. To access multiple values, use the map 506 // directly. 507 func (v Values) Get(key string) string { 508 if v == nil { 509 return "" 510 } 511 vs, ok := v[key] 512 if !ok || len(vs) == 0 { 513 return "" 514 } 515 return vs[0] 516 } 517 518 // Set sets the key to value. It replaces any existing 519 // values. 520 func (v Values) Set(key, value string) { 521 v[key] = []string{value} 522 } 523 524 // Add adds the value to key. It appends to any existing 525 // values associated with key. 526 func (v Values) Add(key, value string) { 527 v[key] = append(v[key], value) 528 } 529 530 // Del deletes the values associated with key. 531 func (v Values) Del(key string) { 532 delete(v, key) 533 } 534 535 // ParseQuery parses the URL-encoded query string and returns 536 // a map listing the values specified for each key. 537 // ParseQuery always returns a non-nil map containing all the 538 // valid query parameters found; err describes the first decoding error 539 // encountered, if any. 540 func ParseQuery(query string) (m Values, err error) { 541 m = make(Values) 542 err = parseQuery(m, query) 543 return 544 } 545 546 func parseQuery(m Values, query string) (err error) { 547 for query != "" { 548 key := query 549 if i := strings.IndexAny(key, "&;"); i >= 0 { 550 key, query = key[:i], key[i+1:] 551 } else { 552 query = "" 553 } 554 if key == "" { 555 continue 556 } 557 value := "" 558 if i := strings.Index(key, "="); i >= 0 { 559 key, value = key[:i], key[i+1:] 560 } 561 key, err1 := QueryUnescape(key) 562 if err1 != nil { 563 if err == nil { 564 err = err1 565 } 566 continue 567 } 568 value, err1 = QueryUnescape(value) 569 if err1 != nil { 570 if err == nil { 571 err = err1 572 } 573 continue 574 } 575 m[key] = append(m[key], value) 576 } 577 return err 578 } 579 580 // Encode encodes the values into ``URL encoded'' form 581 // ("bar=baz&foo=quux") sorted by key. 582 func (v Values) Encode() string { 583 if v == nil { 584 return "" 585 } 586 var buf bytes.Buffer 587 keys := make([]string, 0, len(v)) 588 for k := range v { 589 keys = append(keys, k) 590 } 591 sort.Strings(keys) 592 for _, k := range keys { 593 vs := v[k] 594 prefix := QueryEscape(k) + "=" 595 for _, v := range vs { 596 if buf.Len() > 0 { 597 buf.WriteByte('&') 598 } 599 buf.WriteString(prefix) 600 buf.WriteString(QueryEscape(v)) 601 } 602 } 603 return buf.String() 604 } 605 606 // resolvePath applies special path segments from refs and applies 607 // them to base, per RFC 3986. 608 func resolvePath(base, ref string) string { 609 var full string 610 if ref == "" { 611 full = base 612 } else if ref[0] != '/' { 613 i := strings.LastIndex(base, "/") 614 full = base[:i+1] + ref 615 } else { 616 full = ref 617 } 618 if full == "" { 619 return "" 620 } 621 var dst []string 622 src := strings.Split(full, "/") 623 for _, elem := range src { 624 switch elem { 625 case ".": 626 // drop 627 case "..": 628 if len(dst) > 0 { 629 dst = dst[:len(dst)-1] 630 } 631 default: 632 dst = append(dst, elem) 633 } 634 } 635 if last := src[len(src)-1]; last == "." || last == ".." { 636 // Add final slash to the joined path. 637 dst = append(dst, "") 638 } 639 return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/") 640 } 641 642 // IsAbs returns true if the URL is absolute. 643 func (u *URL) IsAbs() bool { 644 return u.Scheme != "" 645 } 646 647 // Parse parses a URL in the context of the receiver. The provided URL 648 // may be relative or absolute. Parse returns nil, err on parse 649 // failure, otherwise its return value is the same as ResolveReference. 650 func (u *URL) Parse(ref string) (*URL, error) { 651 refurl, err := Parse(ref) 652 if err != nil { 653 return nil, err 654 } 655 return u.ResolveReference(refurl), nil 656 } 657 658 // ResolveReference resolves a URI reference to an absolute URI from 659 // an absolute base URI, per RFC 3986 Section 5.2. The URI reference 660 // may be relative or absolute. ResolveReference always returns a new 661 // URL instance, even if the returned URL is identical to either the 662 // base or reference. If ref is an absolute URL, then ResolveReference 663 // ignores base and returns a copy of ref. 664 func (u *URL) ResolveReference(ref *URL) *URL { 665 url := *ref 666 if ref.Scheme == "" { 667 url.Scheme = u.Scheme 668 } 669 if ref.Scheme != "" || ref.Host != "" || ref.User != nil { 670 // The "absoluteURI" or "net_path" cases. 671 url.Path = resolvePath(ref.Path, "") 672 return &url 673 } 674 if ref.Opaque != "" { 675 url.User = nil 676 url.Host = "" 677 url.Path = "" 678 return &url 679 } 680 if ref.Path == "" { 681 if ref.RawQuery == "" { 682 url.RawQuery = u.RawQuery 683 if ref.Fragment == "" { 684 url.Fragment = u.Fragment 685 } 686 } 687 } 688 // The "abs_path" or "rel_path" cases. 689 url.Host = u.Host 690 url.User = u.User 691 url.Path = resolvePath(u.Path, ref.Path) 692 return &url 693 } 694 695 // Query parses RawQuery and returns the corresponding values. 696 func (u *URL) Query() Values { 697 v, _ := ParseQuery(u.RawQuery) 698 return v 699 } 700 701 // RequestURI returns the encoded path?query or opaque?query 702 // string that would be used in an HTTP request for u. 703 func (u *URL) RequestURI() string { 704 result := u.Opaque 705 if result == "" { 706 result = escape(u.Path, encodePath) 707 if result == "" { 708 result = "/" 709 } 710 } else { 711 if strings.HasPrefix(result, "//") { 712 result = u.Scheme + ":" + result 713 } 714 } 715 if u.RawQuery != "" { 716 result += "?" + u.RawQuery 717 } 718 return result 719 }