github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/net/url/url.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package url parses URLs and implements query escaping. 6 // See RFC 3986. 7 package url 8 9 import ( 10 "bytes" 11 "errors" 12 "sort" 13 "strconv" 14 "strings" 15 ) 16 17 // Error reports an error and the operation and URL that caused it. 18 type Error struct { 19 Op string 20 URL string 21 Err error 22 } 23 24 func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() } 25 26 func ishex(c byte) bool { 27 switch { 28 case '0' <= c && c <= '9': 29 return true 30 case 'a' <= c && c <= 'f': 31 return true 32 case 'A' <= c && c <= 'F': 33 return true 34 } 35 return false 36 } 37 38 func unhex(c byte) byte { 39 switch { 40 case '0' <= c && c <= '9': 41 return c - '0' 42 case 'a' <= c && c <= 'f': 43 return c - 'a' + 10 44 case 'A' <= c && c <= 'F': 45 return c - 'A' + 10 46 } 47 return 0 48 } 49 50 type encoding int 51 52 const ( 53 encodePath encoding = 1 + iota 54 encodeUserPassword 55 encodeQueryComponent 56 encodeFragment 57 ) 58 59 type EscapeError string 60 61 func (e EscapeError) Error() string { 62 return "invalid URL escape " + strconv.Quote(string(e)) 63 } 64 65 // Return true if the specified character should be escaped when 66 // appearing in a URL string, according to RFC 3986. 67 // When 'all' is true the full range of reserved characters are matched. 68 func shouldEscape(c byte, mode encoding) bool { 69 // §2.3 Unreserved characters (alphanum) 70 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { 71 return false 72 } 73 74 switch c { 75 case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) 76 return false 77 78 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) 79 // Different sections of the URL allow a few of 80 // the reserved characters to appear unescaped. 81 switch mode { 82 case encodePath: // §3.3 83 // The RFC allows : @ & = + $ but saves / ; , for assigning 84 // meaning to individual path segments. This package 85 // only manipulates the path as a whole, so we allow those 86 // last two as well. That leaves only ? to escape. 87 return c == '?' 88 89 case encodeUserPassword: // §3.2.2 90 // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /. 91 // The parsing of userinfo treats : as special so we must escape that too. 92 return c == '@' || c == '/' || c == ':' 93 94 case encodeQueryComponent: // §3.4 95 // The RFC reserves (so we must escape) everything. 96 return true 97 98 case encodeFragment: // §4.1 99 // The RFC text is silent but the grammar allows 100 // everything, so escape nothing. 101 return false 102 } 103 } 104 105 // Everything else must be escaped. 106 return true 107 } 108 109 // QueryUnescape does the inverse transformation of QueryEscape, converting 110 // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if 111 // any % is not followed by two hexadecimal digits. 112 func QueryUnescape(s string) (string, error) { 113 return unescape(s, encodeQueryComponent) 114 } 115 116 // unescape unescapes a string; the mode specifies 117 // which section of the URL string is being unescaped. 118 func unescape(s string, mode encoding) (string, error) { 119 // Count %, check that they're well-formed. 120 n := 0 121 hasPlus := false 122 for i := 0; i < len(s); { 123 switch s[i] { 124 case '%': 125 n++ 126 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { 127 s = s[i:] 128 if len(s) > 3 { 129 s = s[0:3] 130 } 131 return "", EscapeError(s) 132 } 133 i += 3 134 case '+': 135 hasPlus = mode == encodeQueryComponent 136 i++ 137 default: 138 i++ 139 } 140 } 141 142 if n == 0 && !hasPlus { 143 return s, nil 144 } 145 146 t := make([]byte, len(s)-2*n) 147 j := 0 148 for i := 0; i < len(s); { 149 switch s[i] { 150 case '%': 151 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) 152 j++ 153 i += 3 154 case '+': 155 if mode == encodeQueryComponent { 156 t[j] = ' ' 157 } else { 158 t[j] = '+' 159 } 160 j++ 161 i++ 162 default: 163 t[j] = s[i] 164 j++ 165 i++ 166 } 167 } 168 return string(t), nil 169 } 170 171 // QueryEscape escapes the string so it can be safely placed 172 // inside a URL query. 173 func QueryEscape(s string) string { 174 return escape(s, encodeQueryComponent) 175 } 176 177 func escape(s string, mode encoding) string { 178 spaceCount, hexCount := 0, 0 179 for i := 0; i < len(s); i++ { 180 c := s[i] 181 if shouldEscape(c, mode) { 182 if c == ' ' && mode == encodeQueryComponent { 183 spaceCount++ 184 } else { 185 hexCount++ 186 } 187 } 188 } 189 190 if spaceCount == 0 && hexCount == 0 { 191 return s 192 } 193 194 t := make([]byte, len(s)+2*hexCount) 195 j := 0 196 for i := 0; i < len(s); i++ { 197 switch c := s[i]; { 198 case c == ' ' && mode == encodeQueryComponent: 199 t[j] = '+' 200 j++ 201 case shouldEscape(c, mode): 202 t[j] = '%' 203 t[j+1] = "0123456789ABCDEF"[c>>4] 204 t[j+2] = "0123456789ABCDEF"[c&15] 205 j += 3 206 default: 207 t[j] = s[i] 208 j++ 209 } 210 } 211 return string(t) 212 } 213 214 // A URL represents a parsed URL (technically, a URI reference). 215 // The general form represented is: 216 // 217 // scheme://[userinfo@]host/path[?query][#fragment] 218 // 219 // URLs that do not start with a slash after the scheme are interpreted as: 220 // 221 // scheme:opaque[?query][#fragment] 222 // 223 // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/. 224 // A consequence is that it is impossible to tell which slashes in the Path were 225 // slashes in the raw URL and which were %2f. This distinction is rarely important, 226 // but when it is a client must use other routines to parse the raw URL or construct 227 // the parsed URL. For example, an HTTP server can consult req.RequestURI, and 228 // an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"} 229 // instead of URL{Host: "example.com", Path: "/Go/"}. 230 type URL struct { 231 Scheme string 232 Opaque string // encoded opaque data 233 User *Userinfo // username and password information 234 Host string // host or host:port 235 Path string 236 RawQuery string // encoded query values, without '?' 237 Fragment string // fragment for references, without '#' 238 } 239 240 // User returns a Userinfo containing the provided username 241 // and no password set. 242 func User(username string) *Userinfo { 243 return &Userinfo{username, "", false} 244 } 245 246 // UserPassword returns a Userinfo containing the provided username 247 // and password. 248 // This functionality should only be used with legacy web sites. 249 // RFC 2396 warns that interpreting Userinfo this way 250 // ``is NOT RECOMMENDED, because the passing of authentication 251 // information in clear text (such as URI) has proven to be a 252 // security risk in almost every case where it has been used.'' 253 func UserPassword(username, password string) *Userinfo { 254 return &Userinfo{username, password, true} 255 } 256 257 // The Userinfo type is an immutable encapsulation of username and 258 // password details for a URL. An existing Userinfo value is guaranteed 259 // to have a username set (potentially empty, as allowed by RFC 2396), 260 // and optionally a password. 261 type Userinfo struct { 262 username string 263 password string 264 passwordSet bool 265 } 266 267 // Username returns the username. 268 func (u *Userinfo) Username() string { 269 return u.username 270 } 271 272 // Password returns the password in case it is set, and whether it is set. 273 func (u *Userinfo) Password() (string, bool) { 274 if u.passwordSet { 275 return u.password, true 276 } 277 return "", false 278 } 279 280 // String returns the encoded userinfo information in the standard form 281 // of "username[:password]". 282 func (u *Userinfo) String() string { 283 s := escape(u.username, encodeUserPassword) 284 if u.passwordSet { 285 s += ":" + escape(u.password, encodeUserPassword) 286 } 287 return s 288 } 289 290 // Maybe rawurl is of the form scheme:path. 291 // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) 292 // If so, return scheme, path; else return "", rawurl. 293 func getscheme(rawurl string) (scheme, path string, err error) { 294 for i := 0; i < len(rawurl); i++ { 295 c := rawurl[i] 296 switch { 297 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': 298 // do nothing 299 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': 300 if i == 0 { 301 return "", rawurl, nil 302 } 303 case c == ':': 304 if i == 0 { 305 return "", "", errors.New("missing protocol scheme") 306 } 307 return rawurl[0:i], rawurl[i+1:], nil 308 default: 309 // we have encountered an invalid character, 310 // so there is no valid scheme 311 return "", rawurl, nil 312 } 313 } 314 return "", rawurl, nil 315 } 316 317 // Maybe s is of the form t c u. 318 // If so, return t, c u (or t, u if cutc == true). 319 // If not, return s, "". 320 func split(s string, c string, cutc bool) (string, string) { 321 i := strings.Index(s, c) 322 if i < 0 { 323 return s, "" 324 } 325 if cutc { 326 return s[0:i], s[i+len(c):] 327 } 328 return s[0:i], s[i:] 329 } 330 331 // Parse parses rawurl into a URL structure. 332 // The rawurl may be relative or absolute. 333 func Parse(rawurl string) (url *URL, err error) { 334 // Cut off #frag 335 u, frag := split(rawurl, "#", true) 336 if url, err = parse(u, false); err != nil { 337 return nil, err 338 } 339 if frag == "" { 340 return url, nil 341 } 342 if url.Fragment, err = unescape(frag, encodeFragment); err != nil { 343 return nil, &Error{"parse", rawurl, err} 344 } 345 return url, nil 346 } 347 348 // ParseRequestURI parses rawurl into a URL structure. It assumes that 349 // rawurl was received in an HTTP request, so the rawurl is interpreted 350 // only as an absolute URI or an absolute path. 351 // The string rawurl is assumed not to have a #fragment suffix. 352 // (Web browsers strip #fragment before sending the URL to a web server.) 353 func ParseRequestURI(rawurl string) (url *URL, err error) { 354 return parse(rawurl, true) 355 } 356 357 // parse parses a URL from a string in one of two contexts. If 358 // viaRequest is true, the URL is assumed to have arrived via an HTTP request, 359 // in which case only absolute URLs or path-absolute relative URLs are allowed. 360 // If viaRequest is false, all forms of relative URLs are allowed. 361 func parse(rawurl string, viaRequest bool) (url *URL, err error) { 362 var rest string 363 364 if rawurl == "" && viaRequest { 365 err = errors.New("empty url") 366 goto Error 367 } 368 url = new(URL) 369 370 if rawurl == "*" { 371 url.Path = "*" 372 return 373 } 374 375 // Split off possible leading "http:", "mailto:", etc. 376 // Cannot contain escaped characters. 377 if url.Scheme, rest, err = getscheme(rawurl); err != nil { 378 goto Error 379 } 380 url.Scheme = strings.ToLower(url.Scheme) 381 382 rest, url.RawQuery = split(rest, "?", true) 383 384 if !strings.HasPrefix(rest, "/") { 385 if url.Scheme != "" { 386 // We consider rootless paths per RFC 3986 as opaque. 387 url.Opaque = rest 388 return url, nil 389 } 390 if viaRequest { 391 err = errors.New("invalid URI for request") 392 goto Error 393 } 394 } 395 396 if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") { 397 var authority string 398 authority, rest = split(rest[2:], "/", false) 399 url.User, url.Host, err = parseAuthority(authority) 400 if err != nil { 401 goto Error 402 } 403 if strings.Contains(url.Host, "%") { 404 err = errors.New("hexadecimal escape in host") 405 goto Error 406 } 407 } 408 if url.Path, err = unescape(rest, encodePath); err != nil { 409 goto Error 410 } 411 return url, nil 412 413 Error: 414 return nil, &Error{"parse", rawurl, err} 415 } 416 417 func parseAuthority(authority string) (user *Userinfo, host string, err error) { 418 i := strings.LastIndex(authority, "@") 419 if i < 0 { 420 host = authority 421 return 422 } 423 userinfo, host := authority[:i], authority[i+1:] 424 if strings.Index(userinfo, ":") < 0 { 425 if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil { 426 return 427 } 428 user = User(userinfo) 429 } else { 430 username, password := split(userinfo, ":", true) 431 if username, err = unescape(username, encodeUserPassword); err != nil { 432 return 433 } 434 if password, err = unescape(password, encodeUserPassword); err != nil { 435 return 436 } 437 user = UserPassword(username, password) 438 } 439 return 440 } 441 442 // String reassembles the URL into a valid URL string. 443 func (u *URL) String() string { 444 var buf bytes.Buffer 445 if u.Scheme != "" { 446 buf.WriteString(u.Scheme) 447 buf.WriteByte(':') 448 } 449 if u.Opaque != "" { 450 buf.WriteString(u.Opaque) 451 } else { 452 if u.Scheme != "" || u.Host != "" || u.User != nil { 453 buf.WriteString("//") 454 if ui := u.User; ui != nil { 455 buf.WriteString(ui.String()) 456 buf.WriteByte('@') 457 } 458 if h := u.Host; h != "" { 459 buf.WriteString(h) 460 } 461 } 462 if u.Path != "" && u.Path[0] != '/' && u.Host != "" { 463 buf.WriteByte('/') 464 } 465 buf.WriteString(escape(u.Path, encodePath)) 466 } 467 if u.RawQuery != "" { 468 buf.WriteByte('?') 469 buf.WriteString(u.RawQuery) 470 } 471 if u.Fragment != "" { 472 buf.WriteByte('#') 473 buf.WriteString(escape(u.Fragment, encodeFragment)) 474 } 475 return buf.String() 476 } 477 478 // Values maps a string key to a list of values. 479 // It is typically used for query parameters and form values. 480 // Unlike in the http.Header map, the keys in a Values map 481 // are case-sensitive. 482 type Values map[string][]string 483 484 // Get gets the first value associated with the given key. 485 // If there are no values associated with the key, Get returns 486 // the empty string. To access multiple values, use the map 487 // directly. 488 func (v Values) Get(key string) string { 489 if v == nil { 490 return "" 491 } 492 vs, ok := v[key] 493 if !ok || len(vs) == 0 { 494 return "" 495 } 496 return vs[0] 497 } 498 499 // Set sets the key to value. It replaces any existing 500 // values. 501 func (v Values) Set(key, value string) { 502 v[key] = []string{value} 503 } 504 505 // Add adds the key to value. It appends to any existing 506 // values associated with key. 507 func (v Values) Add(key, value string) { 508 v[key] = append(v[key], value) 509 } 510 511 // Del deletes the values associated with key. 512 func (v Values) Del(key string) { 513 delete(v, key) 514 } 515 516 // ParseQuery parses the URL-encoded query string and returns 517 // a map listing the values specified for each key. 518 // ParseQuery always returns a non-nil map containing all the 519 // valid query parameters found; err describes the first decoding error 520 // encountered, if any. 521 func ParseQuery(query string) (m Values, err error) { 522 m = make(Values) 523 err = parseQuery(m, query) 524 return 525 } 526 527 func parseQuery(m Values, query string) (err error) { 528 for query != "" { 529 key := query 530 if i := strings.IndexAny(key, "&;"); i >= 0 { 531 key, query = key[:i], key[i+1:] 532 } else { 533 query = "" 534 } 535 if key == "" { 536 continue 537 } 538 value := "" 539 if i := strings.Index(key, "="); i >= 0 { 540 key, value = key[:i], key[i+1:] 541 } 542 key, err1 := QueryUnescape(key) 543 if err1 != nil { 544 if err == nil { 545 err = err1 546 } 547 continue 548 } 549 value, err1 = QueryUnescape(value) 550 if err1 != nil { 551 if err == nil { 552 err = err1 553 } 554 continue 555 } 556 m[key] = append(m[key], value) 557 } 558 return err 559 } 560 561 // Encode encodes the values into ``URL encoded'' form 562 // ("bar=baz&foo=quux") sorted by key. 563 func (v Values) Encode() string { 564 if v == nil { 565 return "" 566 } 567 var buf bytes.Buffer 568 keys := make([]string, 0, len(v)) 569 for k := range v { 570 keys = append(keys, k) 571 } 572 sort.Strings(keys) 573 for _, k := range keys { 574 vs := v[k] 575 prefix := QueryEscape(k) + "=" 576 for _, v := range vs { 577 if buf.Len() > 0 { 578 buf.WriteByte('&') 579 } 580 buf.WriteString(prefix) 581 buf.WriteString(QueryEscape(v)) 582 } 583 } 584 return buf.String() 585 } 586 587 // resolvePath applies special path segments from refs and applies 588 // them to base, per RFC 3986. 589 func resolvePath(base, ref string) string { 590 var full string 591 if ref == "" { 592 full = base 593 } else if ref[0] != '/' { 594 i := strings.LastIndex(base, "/") 595 full = base[:i+1] + ref 596 } else { 597 full = ref 598 } 599 if full == "" { 600 return "" 601 } 602 var dst []string 603 src := strings.Split(full, "/") 604 for _, elem := range src { 605 switch elem { 606 case ".": 607 // drop 608 case "..": 609 if len(dst) > 0 { 610 dst = dst[:len(dst)-1] 611 } 612 default: 613 dst = append(dst, elem) 614 } 615 } 616 if last := src[len(src)-1]; last == "." || last == ".." { 617 // Add final slash to the joined path. 618 dst = append(dst, "") 619 } 620 return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/") 621 } 622 623 // IsAbs returns true if the URL is absolute. 624 func (u *URL) IsAbs() bool { 625 return u.Scheme != "" 626 } 627 628 // Parse parses a URL in the context of the receiver. The provided URL 629 // may be relative or absolute. Parse returns nil, err on parse 630 // failure, otherwise its return value is the same as ResolveReference. 631 func (u *URL) Parse(ref string) (*URL, error) { 632 refurl, err := Parse(ref) 633 if err != nil { 634 return nil, err 635 } 636 return u.ResolveReference(refurl), nil 637 } 638 639 // ResolveReference resolves a URI reference to an absolute URI from 640 // an absolute base URI, per RFC 3986 Section 5.2. The URI reference 641 // may be relative or absolute. ResolveReference always returns a new 642 // URL instance, even if the returned URL is identical to either the 643 // base or reference. If ref is an absolute URL, then ResolveReference 644 // ignores base and returns a copy of ref. 645 func (u *URL) ResolveReference(ref *URL) *URL { 646 url := *ref 647 if ref.Scheme == "" { 648 url.Scheme = u.Scheme 649 } 650 if ref.Scheme != "" || ref.Host != "" || ref.User != nil { 651 // The "absoluteURI" or "net_path" cases. 652 url.Path = resolvePath(ref.Path, "") 653 return &url 654 } 655 if ref.Opaque != "" { 656 url.User = nil 657 url.Host = "" 658 url.Path = "" 659 return &url 660 } 661 if ref.Path == "" { 662 if ref.RawQuery == "" { 663 url.RawQuery = u.RawQuery 664 if ref.Fragment == "" { 665 url.Fragment = u.Fragment 666 } 667 } 668 } 669 // The "abs_path" or "rel_path" cases. 670 url.Host = u.Host 671 url.User = u.User 672 url.Path = resolvePath(u.Path, ref.Path) 673 return &url 674 } 675 676 // Query parses RawQuery and returns the corresponding values. 677 func (u *URL) Query() Values { 678 v, _ := ParseQuery(u.RawQuery) 679 return v 680 } 681 682 // RequestURI returns the encoded path?query or opaque?query 683 // string that would be used in an HTTP request for u. 684 func (u *URL) RequestURI() string { 685 result := u.Opaque 686 if result == "" { 687 result = escape(u.Path, encodePath) 688 if result == "" { 689 result = "/" 690 } 691 } else { 692 if strings.HasPrefix(result, "//") { 693 result = u.Scheme + ":" + result 694 } 695 } 696 if u.RawQuery != "" { 697 result += "?" + u.RawQuery 698 } 699 return result 700 }