github.com/qiniu/x@v1.11.9/url/urlescape.go (about)

     1  package url
     2  
     3  import (
     4  	"strconv"
     5  )
     6  
     7  type Encoding int
     8  
     9  const (
    10  	EncodePath Encoding = 1 + iota
    11  	EncodeUserPassword
    12  	EncodeQueryComponent
    13  	EncodeFragment
    14  )
    15  
    16  type EscapeError string
    17  
    18  func (e EscapeError) Error() string {
    19  	return "invalid URL escape " + strconv.Quote(string(e))
    20  }
    21  
    22  func ishex(c byte) bool {
    23  	switch {
    24  	case '0' <= c && c <= '9':
    25  		return true
    26  	case 'a' <= c && c <= 'f':
    27  		return true
    28  	case 'A' <= c && c <= 'F':
    29  		return true
    30  	}
    31  	return false
    32  }
    33  
    34  func unhex(c byte) byte {
    35  	switch {
    36  	case '0' <= c && c <= '9':
    37  		return c - '0'
    38  	case 'a' <= c && c <= 'f':
    39  		return c - 'a' + 10
    40  	case 'A' <= c && c <= 'F':
    41  		return c - 'A' + 10
    42  	}
    43  	return 0
    44  }
    45  
    46  // Return true if the specified character should be escaped when
    47  // appearing in a URL string, according to RFC 3986.
    48  // When 'all' is true the full range of reserved characters are matched.
    49  func shouldEscape(c byte, mode Encoding) bool {
    50  	// §2.3 Unreserved characters (alphanum)
    51  	if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
    52  		return false
    53  	}
    54  
    55  	switch c {
    56  	case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
    57  		return false
    58  
    59  	case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
    60  		// Different sections of the URL allow a few of
    61  		// the reserved characters to appear unescaped.
    62  		switch mode {
    63  		case EncodePath: // §3.3
    64  			// The RFC allows : @ & = + $ but saves / ; , for assigning
    65  			// meaning to individual path segments. This package
    66  			// only manipulates the path as a whole, so we allow those
    67  			// last two as well. That leaves only ? to escape.
    68  			return c == '?'
    69  
    70  		case EncodeUserPassword: // §3.2.2
    71  			// The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
    72  			// The parsing of userinfo treats : as special so we must escape that too.
    73  			return c == '@' || c == '/' || c == ':'
    74  
    75  		case EncodeQueryComponent: // §3.4
    76  			// The RFC reserves (so we must escape) everything.
    77  			return true
    78  
    79  		case EncodeFragment: // §4.1
    80  			// The RFC text is silent but the grammar allows
    81  			// everything, so escape nothing.
    82  			return false
    83  		}
    84  	}
    85  
    86  	// Everything else must be escaped.
    87  	return true
    88  }
    89  
    90  // QueryUnescape does the inverse transformation of QueryEscape, converting
    91  // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
    92  // any % is not followed by two hexadecimal digits.
    93  func QueryUnescape(s string) (string, error) {
    94  	return UnescapeEx(s, EncodeQueryComponent)
    95  }
    96  
    97  func Unescape(s string) (string, error) {
    98  	return UnescapeEx(s, EncodePath)
    99  }
   100  
   101  // UnescapeEx unescapes a string; the mode specifies
   102  // which section of the URL string is being unescaped.
   103  func UnescapeEx(s string, mode Encoding) (string, error) {
   104  	// Count %, check that they're well-formed.
   105  	n := 0
   106  	hasPlus := false
   107  	for i := 0; i < len(s); {
   108  		switch s[i] {
   109  		case '%':
   110  			n++
   111  			if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   112  				s = s[i:]
   113  				if len(s) > 3 {
   114  					s = s[0:3]
   115  				}
   116  				return "", EscapeError(s)
   117  			}
   118  			i += 3
   119  		case '+':
   120  			hasPlus = mode == EncodeQueryComponent
   121  			i++
   122  		default:
   123  			i++
   124  		}
   125  	}
   126  
   127  	if n == 0 && !hasPlus {
   128  		return s, nil
   129  	}
   130  
   131  	t := make([]byte, len(s)-2*n)
   132  	j := 0
   133  	for i := 0; i < len(s); {
   134  		switch s[i] {
   135  		case '%':
   136  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   137  			j++
   138  			i += 3
   139  		case '+':
   140  			if mode == EncodeQueryComponent {
   141  				t[j] = ' '
   142  			} else {
   143  				t[j] = '+'
   144  			}
   145  			j++
   146  			i++
   147  		default:
   148  			t[j] = s[i]
   149  			j++
   150  			i++
   151  		}
   152  	}
   153  	return string(t), nil
   154  }
   155  
   156  // QueryEscape escapes the string so it can be safely placed
   157  // inside a URL query.
   158  func QueryEscape(s string) string {
   159  	return EscapeEx(s, EncodeQueryComponent)
   160  }
   161  
   162  func Escape(s string) string {
   163  	return EscapeEx(s, EncodePath)
   164  }
   165  
   166  func EscapeEx(s string, mode Encoding) string {
   167  	spaceCount, hexCount := 0, 0
   168  	for i := 0; i < len(s); i++ {
   169  		c := s[i]
   170  		if shouldEscape(c, mode) {
   171  			if c == ' ' && mode == EncodeQueryComponent {
   172  				spaceCount++
   173  			} else {
   174  				hexCount++
   175  			}
   176  		}
   177  	}
   178  
   179  	if spaceCount == 0 && hexCount == 0 {
   180  		return s
   181  	}
   182  
   183  	t := make([]byte, len(s)+2*hexCount)
   184  	j := 0
   185  	for i := 0; i < len(s); i++ {
   186  		switch c := s[i]; {
   187  		case c == ' ' && mode == EncodeQueryComponent:
   188  			t[j] = '+'
   189  			j++
   190  		case shouldEscape(c, mode):
   191  			t[j] = '%'
   192  			t[j+1] = "0123456789ABCDEF"[c>>4]
   193  			t[j+2] = "0123456789ABCDEF"[c&15]
   194  			j += 3
   195  		default:
   196  			t[j] = s[i]
   197  			j++
   198  		}
   199  	}
   200  	return string(t)
   201  }