github.com/qiniu/x@v1.11.9/url/urlescape.go (about) 1 package url 2 3 import ( 4 "strconv" 5 ) 6 7 type Encoding int 8 9 const ( 10 EncodePath Encoding = 1 + iota 11 EncodeUserPassword 12 EncodeQueryComponent 13 EncodeFragment 14 ) 15 16 type EscapeError string 17 18 func (e EscapeError) Error() string { 19 return "invalid URL escape " + strconv.Quote(string(e)) 20 } 21 22 func ishex(c byte) bool { 23 switch { 24 case '0' <= c && c <= '9': 25 return true 26 case 'a' <= c && c <= 'f': 27 return true 28 case 'A' <= c && c <= 'F': 29 return true 30 } 31 return false 32 } 33 34 func unhex(c byte) byte { 35 switch { 36 case '0' <= c && c <= '9': 37 return c - '0' 38 case 'a' <= c && c <= 'f': 39 return c - 'a' + 10 40 case 'A' <= c && c <= 'F': 41 return c - 'A' + 10 42 } 43 return 0 44 } 45 46 // Return true if the specified character should be escaped when 47 // appearing in a URL string, according to RFC 3986. 48 // When 'all' is true the full range of reserved characters are matched. 49 func shouldEscape(c byte, mode Encoding) bool { 50 // §2.3 Unreserved characters (alphanum) 51 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { 52 return false 53 } 54 55 switch c { 56 case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) 57 return false 58 59 case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) 60 // Different sections of the URL allow a few of 61 // the reserved characters to appear unescaped. 62 switch mode { 63 case EncodePath: // §3.3 64 // The RFC allows : @ & = + $ but saves / ; , for assigning 65 // meaning to individual path segments. This package 66 // only manipulates the path as a whole, so we allow those 67 // last two as well. That leaves only ? to escape. 68 return c == '?' 69 70 case EncodeUserPassword: // §3.2.2 71 // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /. 72 // The parsing of userinfo treats : as special so we must escape that too. 73 return c == '@' || c == '/' || c == ':' 74 75 case EncodeQueryComponent: // §3.4 76 // The RFC reserves (so we must escape) everything. 77 return true 78 79 case EncodeFragment: // §4.1 80 // The RFC text is silent but the grammar allows 81 // everything, so escape nothing. 82 return false 83 } 84 } 85 86 // Everything else must be escaped. 87 return true 88 } 89 90 // QueryUnescape does the inverse transformation of QueryEscape, converting 91 // %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if 92 // any % is not followed by two hexadecimal digits. 93 func QueryUnescape(s string) (string, error) { 94 return UnescapeEx(s, EncodeQueryComponent) 95 } 96 97 func Unescape(s string) (string, error) { 98 return UnescapeEx(s, EncodePath) 99 } 100 101 // UnescapeEx unescapes a string; the mode specifies 102 // which section of the URL string is being unescaped. 103 func UnescapeEx(s string, mode Encoding) (string, error) { 104 // Count %, check that they're well-formed. 105 n := 0 106 hasPlus := false 107 for i := 0; i < len(s); { 108 switch s[i] { 109 case '%': 110 n++ 111 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { 112 s = s[i:] 113 if len(s) > 3 { 114 s = s[0:3] 115 } 116 return "", EscapeError(s) 117 } 118 i += 3 119 case '+': 120 hasPlus = mode == EncodeQueryComponent 121 i++ 122 default: 123 i++ 124 } 125 } 126 127 if n == 0 && !hasPlus { 128 return s, nil 129 } 130 131 t := make([]byte, len(s)-2*n) 132 j := 0 133 for i := 0; i < len(s); { 134 switch s[i] { 135 case '%': 136 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) 137 j++ 138 i += 3 139 case '+': 140 if mode == EncodeQueryComponent { 141 t[j] = ' ' 142 } else { 143 t[j] = '+' 144 } 145 j++ 146 i++ 147 default: 148 t[j] = s[i] 149 j++ 150 i++ 151 } 152 } 153 return string(t), nil 154 } 155 156 // QueryEscape escapes the string so it can be safely placed 157 // inside a URL query. 158 func QueryEscape(s string) string { 159 return EscapeEx(s, EncodeQueryComponent) 160 } 161 162 func Escape(s string) string { 163 return EscapeEx(s, EncodePath) 164 } 165 166 func EscapeEx(s string, mode Encoding) string { 167 spaceCount, hexCount := 0, 0 168 for i := 0; i < len(s); i++ { 169 c := s[i] 170 if shouldEscape(c, mode) { 171 if c == ' ' && mode == EncodeQueryComponent { 172 spaceCount++ 173 } else { 174 hexCount++ 175 } 176 } 177 } 178 179 if spaceCount == 0 && hexCount == 0 { 180 return s 181 } 182 183 t := make([]byte, len(s)+2*hexCount) 184 j := 0 185 for i := 0; i < len(s); i++ { 186 switch c := s[i]; { 187 case c == ' ' && mode == EncodeQueryComponent: 188 t[j] = '+' 189 j++ 190 case shouldEscape(c, mode): 191 t[j] = '%' 192 t[j+1] = "0123456789ABCDEF"[c>>4] 193 t[j+2] = "0123456789ABCDEF"[c&15] 194 j += 3 195 default: 196 t[j] = s[i] 197 j++ 198 } 199 } 200 return string(t) 201 }