github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/net/url/url.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package url parses URLs and implements query escaping.
     6  // See RFC 3986.
     7  package url
     8  
     9  import (
    10  	"bytes"
    11  	"errors"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  )
    16  
    17  // Error reports an error and the operation and URL that caused it.
    18  type Error struct {
    19  	Op  string
    20  	URL string
    21  	Err error
    22  }
    23  
    24  func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
    25  
    26  func ishex(c byte) bool {
    27  	switch {
    28  	case '0' <= c && c <= '9':
    29  		return true
    30  	case 'a' <= c && c <= 'f':
    31  		return true
    32  	case 'A' <= c && c <= 'F':
    33  		return true
    34  	}
    35  	return false
    36  }
    37  
    38  func unhex(c byte) byte {
    39  	switch {
    40  	case '0' <= c && c <= '9':
    41  		return c - '0'
    42  	case 'a' <= c && c <= 'f':
    43  		return c - 'a' + 10
    44  	case 'A' <= c && c <= 'F':
    45  		return c - 'A' + 10
    46  	}
    47  	return 0
    48  }
    49  
    50  type encoding int
    51  
    52  const (
    53  	encodePath encoding = 1 + iota
    54  	encodeUserPassword
    55  	encodeQueryComponent
    56  	encodeFragment
    57  )
    58  
    59  type EscapeError string
    60  
    61  func (e EscapeError) Error() string {
    62  	return "invalid URL escape " + strconv.Quote(string(e))
    63  }
    64  
    65  // Return true if the specified character should be escaped when
    66  // appearing in a URL string, according to RFC 3986.
    67  // When 'all' is true the full range of reserved characters are matched.
    68  func shouldEscape(c byte, mode encoding) bool {
    69  	// §2.3 Unreserved characters (alphanum)
    70  	if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
    71  		return false
    72  	}
    73  
    74  	switch c {
    75  	case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
    76  		return false
    77  
    78  	case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
    79  		// Different sections of the URL allow a few of
    80  		// the reserved characters to appear unescaped.
    81  		switch mode {
    82  		case encodePath: // §3.3
    83  			// The RFC allows : @ & = + $ but saves / ; , for assigning
    84  			// meaning to individual path segments. This package
    85  			// only manipulates the path as a whole, so we allow those
    86  			// last two as well. That leaves only ? to escape.
    87  			return c == '?'
    88  
    89  		case encodeUserPassword: // §3.2.2
    90  			// The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
    91  			// The parsing of userinfo treats : as special so we must escape that too.
    92  			return c == '@' || c == '/' || c == ':'
    93  
    94  		case encodeQueryComponent: // §3.4
    95  			// The RFC reserves (so we must escape) everything.
    96  			return true
    97  
    98  		case encodeFragment: // §4.1
    99  			// The RFC text is silent but the grammar allows
   100  			// everything, so escape nothing.
   101  			return false
   102  		}
   103  	}
   104  
   105  	// Everything else must be escaped.
   106  	return true
   107  }
   108  
   109  // QueryUnescape does the inverse transformation of QueryEscape, converting
   110  // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
   111  // any % is not followed by two hexadecimal digits.
   112  func QueryUnescape(s string) (string, error) {
   113  	return unescape(s, encodeQueryComponent)
   114  }
   115  
   116  // unescape unescapes a string; the mode specifies
   117  // which section of the URL string is being unescaped.
   118  func unescape(s string, mode encoding) (string, error) {
   119  	// Count %, check that they're well-formed.
   120  	n := 0
   121  	hasPlus := false
   122  	for i := 0; i < len(s); {
   123  		switch s[i] {
   124  		case '%':
   125  			n++
   126  			if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   127  				s = s[i:]
   128  				if len(s) > 3 {
   129  					s = s[0:3]
   130  				}
   131  				return "", EscapeError(s)
   132  			}
   133  			i += 3
   134  		case '+':
   135  			hasPlus = mode == encodeQueryComponent
   136  			i++
   137  		default:
   138  			i++
   139  		}
   140  	}
   141  
   142  	if n == 0 && !hasPlus {
   143  		return s, nil
   144  	}
   145  
   146  	t := make([]byte, len(s)-2*n)
   147  	j := 0
   148  	for i := 0; i < len(s); {
   149  		switch s[i] {
   150  		case '%':
   151  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   152  			j++
   153  			i += 3
   154  		case '+':
   155  			if mode == encodeQueryComponent {
   156  				t[j] = ' '
   157  			} else {
   158  				t[j] = '+'
   159  			}
   160  			j++
   161  			i++
   162  		default:
   163  			t[j] = s[i]
   164  			j++
   165  			i++
   166  		}
   167  	}
   168  	return string(t), nil
   169  }
   170  
   171  // QueryEscape escapes the string so it can be safely placed
   172  // inside a URL query.
   173  func QueryEscape(s string) string {
   174  	return escape(s, encodeQueryComponent)
   175  }
   176  
   177  func escape(s string, mode encoding) string {
   178  	spaceCount, hexCount := 0, 0
   179  	for i := 0; i < len(s); i++ {
   180  		c := s[i]
   181  		if shouldEscape(c, mode) {
   182  			if c == ' ' && mode == encodeQueryComponent {
   183  				spaceCount++
   184  			} else {
   185  				hexCount++
   186  			}
   187  		}
   188  	}
   189  
   190  	if spaceCount == 0 && hexCount == 0 {
   191  		return s
   192  	}
   193  
   194  	t := make([]byte, len(s)+2*hexCount)
   195  	j := 0
   196  	for i := 0; i < len(s); i++ {
   197  		switch c := s[i]; {
   198  		case c == ' ' && mode == encodeQueryComponent:
   199  			t[j] = '+'
   200  			j++
   201  		case shouldEscape(c, mode):
   202  			t[j] = '%'
   203  			t[j+1] = "0123456789ABCDEF"[c>>4]
   204  			t[j+2] = "0123456789ABCDEF"[c&15]
   205  			j += 3
   206  		default:
   207  			t[j] = s[i]
   208  			j++
   209  		}
   210  	}
   211  	return string(t)
   212  }
   213  
   214  // A URL represents a parsed URL (technically, a URI reference).
   215  // The general form represented is:
   216  //
   217  //	scheme://[userinfo@]host/path[?query][#fragment]
   218  //
   219  // URLs that do not start with a slash after the scheme are interpreted as:
   220  //
   221  //	scheme:opaque[?query][#fragment]
   222  //
   223  // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
   224  // A consequence is that it is impossible to tell which slashes in the Path were
   225  // slashes in the raw URL and which were %2f. This distinction is rarely important,
   226  // but when it is a client must use other routines to parse the raw URL or construct
   227  // the parsed URL. For example, an HTTP server can consult req.RequestURI, and
   228  // an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"}
   229  // instead of URL{Host: "example.com", Path: "/Go/"}.
   230  type URL struct {
   231  	Scheme   string
   232  	Opaque   string    // encoded opaque data
   233  	User     *Userinfo // username and password information
   234  	Host     string    // host or host:port
   235  	Path     string
   236  	RawQuery string // encoded query values, without '?'
   237  	Fragment string // fragment for references, without '#'
   238  }
   239  
   240  // User returns a Userinfo containing the provided username
   241  // and no password set.
   242  func User(username string) *Userinfo {
   243  	return &Userinfo{username, "", false}
   244  }
   245  
   246  // UserPassword returns a Userinfo containing the provided username
   247  // and password.
   248  // This functionality should only be used with legacy web sites.
   249  // RFC 2396 warns that interpreting Userinfo this way
   250  // ``is NOT RECOMMENDED, because the passing of authentication
   251  // information in clear text (such as URI) has proven to be a
   252  // security risk in almost every case where it has been used.''
   253  func UserPassword(username, password string) *Userinfo {
   254  	return &Userinfo{username, password, true}
   255  }
   256  
   257  // The Userinfo type is an immutable encapsulation of username and
   258  // password details for a URL. An existing Userinfo value is guaranteed
   259  // to have a username set (potentially empty, as allowed by RFC 2396),
   260  // and optionally a password.
   261  type Userinfo struct {
   262  	username    string
   263  	password    string
   264  	passwordSet bool
   265  }
   266  
   267  // Username returns the username.
   268  func (u *Userinfo) Username() string {
   269  	return u.username
   270  }
   271  
   272  // Password returns the password in case it is set, and whether it is set.
   273  func (u *Userinfo) Password() (string, bool) {
   274  	if u.passwordSet {
   275  		return u.password, true
   276  	}
   277  	return "", false
   278  }
   279  
   280  // String returns the encoded userinfo information in the standard form
   281  // of "username[:password]".
   282  func (u *Userinfo) String() string {
   283  	s := escape(u.username, encodeUserPassword)
   284  	if u.passwordSet {
   285  		s += ":" + escape(u.password, encodeUserPassword)
   286  	}
   287  	return s
   288  }
   289  
   290  // Maybe rawurl is of the form scheme:path.
   291  // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
   292  // If so, return scheme, path; else return "", rawurl.
   293  func getscheme(rawurl string) (scheme, path string, err error) {
   294  	for i := 0; i < len(rawurl); i++ {
   295  		c := rawurl[i]
   296  		switch {
   297  		case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
   298  		// do nothing
   299  		case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
   300  			if i == 0 {
   301  				return "", rawurl, nil
   302  			}
   303  		case c == ':':
   304  			if i == 0 {
   305  				return "", "", errors.New("missing protocol scheme")
   306  			}
   307  			return rawurl[0:i], rawurl[i+1:], nil
   308  		default:
   309  			// we have encountered an invalid character,
   310  			// so there is no valid scheme
   311  			return "", rawurl, nil
   312  		}
   313  	}
   314  	return "", rawurl, nil
   315  }
   316  
   317  // Maybe s is of the form t c u.
   318  // If so, return t, c u (or t, u if cutc == true).
   319  // If not, return s, "".
   320  func split(s string, c string, cutc bool) (string, string) {
   321  	i := strings.Index(s, c)
   322  	if i < 0 {
   323  		return s, ""
   324  	}
   325  	if cutc {
   326  		return s[0:i], s[i+len(c):]
   327  	}
   328  	return s[0:i], s[i:]
   329  }
   330  
   331  // Parse parses rawurl into a URL structure.
   332  // The rawurl may be relative or absolute.
   333  func Parse(rawurl string) (url *URL, err error) {
   334  	// Cut off #frag
   335  	u, frag := split(rawurl, "#", true)
   336  	if url, err = parse(u, false); err != nil {
   337  		return nil, err
   338  	}
   339  	if frag == "" {
   340  		return url, nil
   341  	}
   342  	if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
   343  		return nil, &Error{"parse", rawurl, err}
   344  	}
   345  	return url, nil
   346  }
   347  
   348  // ParseRequestURI parses rawurl into a URL structure.  It assumes that
   349  // rawurl was received in an HTTP request, so the rawurl is interpreted
   350  // only as an absolute URI or an absolute path.
   351  // The string rawurl is assumed not to have a #fragment suffix.
   352  // (Web browsers strip #fragment before sending the URL to a web server.)
   353  func ParseRequestURI(rawurl string) (url *URL, err error) {
   354  	return parse(rawurl, true)
   355  }
   356  
   357  // parse parses a URL from a string in one of two contexts.  If
   358  // viaRequest is true, the URL is assumed to have arrived via an HTTP request,
   359  // in which case only absolute URLs or path-absolute relative URLs are allowed.
   360  // If viaRequest is false, all forms of relative URLs are allowed.
   361  func parse(rawurl string, viaRequest bool) (url *URL, err error) {
   362  	var rest string
   363  
   364  	if rawurl == "" && viaRequest {
   365  		err = errors.New("empty url")
   366  		goto Error
   367  	}
   368  	url = new(URL)
   369  
   370  	if rawurl == "*" {
   371  		url.Path = "*"
   372  		return
   373  	}
   374  
   375  	// Split off possible leading "http:", "mailto:", etc.
   376  	// Cannot contain escaped characters.
   377  	if url.Scheme, rest, err = getscheme(rawurl); err != nil {
   378  		goto Error
   379  	}
   380  	url.Scheme = strings.ToLower(url.Scheme)
   381  
   382  	rest, url.RawQuery = split(rest, "?", true)
   383  
   384  	if !strings.HasPrefix(rest, "/") {
   385  		if url.Scheme != "" {
   386  			// We consider rootless paths per RFC 3986 as opaque.
   387  			url.Opaque = rest
   388  			return url, nil
   389  		}
   390  		if viaRequest {
   391  			err = errors.New("invalid URI for request")
   392  			goto Error
   393  		}
   394  	}
   395  
   396  	if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
   397  		var authority string
   398  		authority, rest = split(rest[2:], "/", false)
   399  		url.User, url.Host, err = parseAuthority(authority)
   400  		if err != nil {
   401  			goto Error
   402  		}
   403  		if strings.Contains(url.Host, "%") {
   404  			err = errors.New("hexadecimal escape in host")
   405  			goto Error
   406  		}
   407  	}
   408  	if url.Path, err = unescape(rest, encodePath); err != nil {
   409  		goto Error
   410  	}
   411  	return url, nil
   412  
   413  Error:
   414  	return nil, &Error{"parse", rawurl, err}
   415  }
   416  
   417  func parseAuthority(authority string) (user *Userinfo, host string, err error) {
   418  	i := strings.LastIndex(authority, "@")
   419  	if i < 0 {
   420  		host = authority
   421  		return
   422  	}
   423  	userinfo, host := authority[:i], authority[i+1:]
   424  	if strings.Index(userinfo, ":") < 0 {
   425  		if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
   426  			return
   427  		}
   428  		user = User(userinfo)
   429  	} else {
   430  		username, password := split(userinfo, ":", true)
   431  		if username, err = unescape(username, encodeUserPassword); err != nil {
   432  			return
   433  		}
   434  		if password, err = unescape(password, encodeUserPassword); err != nil {
   435  			return
   436  		}
   437  		user = UserPassword(username, password)
   438  	}
   439  	return
   440  }
   441  
   442  // String reassembles the URL into a valid URL string.
   443  func (u *URL) String() string {
   444  	var buf bytes.Buffer
   445  	if u.Scheme != "" {
   446  		buf.WriteString(u.Scheme)
   447  		buf.WriteByte(':')
   448  	}
   449  	if u.Opaque != "" {
   450  		buf.WriteString(u.Opaque)
   451  	} else {
   452  		if u.Scheme != "" || u.Host != "" || u.User != nil {
   453  			buf.WriteString("//")
   454  			if ui := u.User; ui != nil {
   455  				buf.WriteString(ui.String())
   456  				buf.WriteByte('@')
   457  			}
   458  			if h := u.Host; h != "" {
   459  				buf.WriteString(h)
   460  			}
   461  		}
   462  		if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
   463  			buf.WriteByte('/')
   464  		}
   465  		buf.WriteString(escape(u.Path, encodePath))
   466  	}
   467  	if u.RawQuery != "" {
   468  		buf.WriteByte('?')
   469  		buf.WriteString(u.RawQuery)
   470  	}
   471  	if u.Fragment != "" {
   472  		buf.WriteByte('#')
   473  		buf.WriteString(escape(u.Fragment, encodeFragment))
   474  	}
   475  	return buf.String()
   476  }
   477  
   478  // Values maps a string key to a list of values.
   479  // It is typically used for query parameters and form values.
   480  // Unlike in the http.Header map, the keys in a Values map
   481  // are case-sensitive.
   482  type Values map[string][]string
   483  
   484  // Get gets the first value associated with the given key.
   485  // If there are no values associated with the key, Get returns
   486  // the empty string. To access multiple values, use the map
   487  // directly.
   488  func (v Values) Get(key string) string {
   489  	if v == nil {
   490  		return ""
   491  	}
   492  	vs, ok := v[key]
   493  	if !ok || len(vs) == 0 {
   494  		return ""
   495  	}
   496  	return vs[0]
   497  }
   498  
   499  // Set sets the key to value. It replaces any existing
   500  // values.
   501  func (v Values) Set(key, value string) {
   502  	v[key] = []string{value}
   503  }
   504  
   505  // Add adds the key to value. It appends to any existing
   506  // values associated with key.
   507  func (v Values) Add(key, value string) {
   508  	v[key] = append(v[key], value)
   509  }
   510  
   511  // Del deletes the values associated with key.
   512  func (v Values) Del(key string) {
   513  	delete(v, key)
   514  }
   515  
   516  // ParseQuery parses the URL-encoded query string and returns
   517  // a map listing the values specified for each key.
   518  // ParseQuery always returns a non-nil map containing all the
   519  // valid query parameters found; err describes the first decoding error
   520  // encountered, if any.
   521  func ParseQuery(query string) (m Values, err error) {
   522  	m = make(Values)
   523  	err = parseQuery(m, query)
   524  	return
   525  }
   526  
   527  func parseQuery(m Values, query string) (err error) {
   528  	for query != "" {
   529  		key := query
   530  		if i := strings.IndexAny(key, "&;"); i >= 0 {
   531  			key, query = key[:i], key[i+1:]
   532  		} else {
   533  			query = ""
   534  		}
   535  		if key == "" {
   536  			continue
   537  		}
   538  		value := ""
   539  		if i := strings.Index(key, "="); i >= 0 {
   540  			key, value = key[:i], key[i+1:]
   541  		}
   542  		key, err1 := QueryUnescape(key)
   543  		if err1 != nil {
   544  			if err == nil {
   545  				err = err1
   546  			}
   547  			continue
   548  		}
   549  		value, err1 = QueryUnescape(value)
   550  		if err1 != nil {
   551  			if err == nil {
   552  				err = err1
   553  			}
   554  			continue
   555  		}
   556  		m[key] = append(m[key], value)
   557  	}
   558  	return err
   559  }
   560  
   561  // Encode encodes the values into ``URL encoded'' form
   562  // ("bar=baz&foo=quux") sorted by key.
   563  func (v Values) Encode() string {
   564  	if v == nil {
   565  		return ""
   566  	}
   567  	var buf bytes.Buffer
   568  	keys := make([]string, 0, len(v))
   569  	for k := range v {
   570  		keys = append(keys, k)
   571  	}
   572  	sort.Strings(keys)
   573  	for _, k := range keys {
   574  		vs := v[k]
   575  		prefix := QueryEscape(k) + "="
   576  		for _, v := range vs {
   577  			if buf.Len() > 0 {
   578  				buf.WriteByte('&')
   579  			}
   580  			buf.WriteString(prefix)
   581  			buf.WriteString(QueryEscape(v))
   582  		}
   583  	}
   584  	return buf.String()
   585  }
   586  
   587  // resolvePath applies special path segments from refs and applies
   588  // them to base, per RFC 3986.
   589  func resolvePath(base, ref string) string {
   590  	var full string
   591  	if ref == "" {
   592  		full = base
   593  	} else if ref[0] != '/' {
   594  		i := strings.LastIndex(base, "/")
   595  		full = base[:i+1] + ref
   596  	} else {
   597  		full = ref
   598  	}
   599  	if full == "" {
   600  		return ""
   601  	}
   602  	var dst []string
   603  	src := strings.Split(full, "/")
   604  	for _, elem := range src {
   605  		switch elem {
   606  		case ".":
   607  			// drop
   608  		case "..":
   609  			if len(dst) > 0 {
   610  				dst = dst[:len(dst)-1]
   611  			}
   612  		default:
   613  			dst = append(dst, elem)
   614  		}
   615  	}
   616  	if last := src[len(src)-1]; last == "." || last == ".." {
   617  		// Add final slash to the joined path.
   618  		dst = append(dst, "")
   619  	}
   620  	return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/")
   621  }
   622  
   623  // IsAbs returns true if the URL is absolute.
   624  func (u *URL) IsAbs() bool {
   625  	return u.Scheme != ""
   626  }
   627  
   628  // Parse parses a URL in the context of the receiver.  The provided URL
   629  // may be relative or absolute.  Parse returns nil, err on parse
   630  // failure, otherwise its return value is the same as ResolveReference.
   631  func (u *URL) Parse(ref string) (*URL, error) {
   632  	refurl, err := Parse(ref)
   633  	if err != nil {
   634  		return nil, err
   635  	}
   636  	return u.ResolveReference(refurl), nil
   637  }
   638  
   639  // ResolveReference resolves a URI reference to an absolute URI from
   640  // an absolute base URI, per RFC 3986 Section 5.2.  The URI reference
   641  // may be relative or absolute.  ResolveReference always returns a new
   642  // URL instance, even if the returned URL is identical to either the
   643  // base or reference. If ref is an absolute URL, then ResolveReference
   644  // ignores base and returns a copy of ref.
   645  func (u *URL) ResolveReference(ref *URL) *URL {
   646  	url := *ref
   647  	if ref.Scheme == "" {
   648  		url.Scheme = u.Scheme
   649  	}
   650  	if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
   651  		// The "absoluteURI" or "net_path" cases.
   652  		url.Path = resolvePath(ref.Path, "")
   653  		return &url
   654  	}
   655  	if ref.Opaque != "" {
   656  		url.User = nil
   657  		url.Host = ""
   658  		url.Path = ""
   659  		return &url
   660  	}
   661  	if ref.Path == "" {
   662  		if ref.RawQuery == "" {
   663  			url.RawQuery = u.RawQuery
   664  			if ref.Fragment == "" {
   665  				url.Fragment = u.Fragment
   666  			}
   667  		}
   668  	}
   669  	// The "abs_path" or "rel_path" cases.
   670  	url.Host = u.Host
   671  	url.User = u.User
   672  	url.Path = resolvePath(u.Path, ref.Path)
   673  	return &url
   674  }
   675  
   676  // Query parses RawQuery and returns the corresponding values.
   677  func (u *URL) Query() Values {
   678  	v, _ := ParseQuery(u.RawQuery)
   679  	return v
   680  }
   681  
   682  // RequestURI returns the encoded path?query or opaque?query
   683  // string that would be used in an HTTP request for u.
   684  func (u *URL) RequestURI() string {
   685  	result := u.Opaque
   686  	if result == "" {
   687  		result = escape(u.Path, encodePath)
   688  		if result == "" {
   689  			result = "/"
   690  		}
   691  	} else {
   692  		if strings.HasPrefix(result, "//") {
   693  			result = u.Scheme + ":" + result
   694  		}
   695  	}
   696  	if u.RawQuery != "" {
   697  		result += "?" + u.RawQuery
   698  	}
   699  	return result
   700  }