gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/net/http/httpguts/httplex.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package httpguts 6 7 import ( 8 "net" 9 "strings" 10 "unicode/utf8" 11 12 "gitee.com/ks-custle/core-gm/net/idna" 13 ) 14 15 var isTokenTable = [127]bool{ 16 '!': true, 17 '#': true, 18 '$': true, 19 '%': true, 20 '&': true, 21 '\'': true, 22 '*': true, 23 '+': true, 24 '-': true, 25 '.': true, 26 '0': true, 27 '1': true, 28 '2': true, 29 '3': true, 30 '4': true, 31 '5': true, 32 '6': true, 33 '7': true, 34 '8': true, 35 '9': true, 36 'A': true, 37 'B': true, 38 'C': true, 39 'D': true, 40 'E': true, 41 'F': true, 42 'G': true, 43 'H': true, 44 'I': true, 45 'J': true, 46 'K': true, 47 'L': true, 48 'M': true, 49 'N': true, 50 'O': true, 51 'P': true, 52 'Q': true, 53 'R': true, 54 'S': true, 55 'T': true, 56 'U': true, 57 'W': true, 58 'V': true, 59 'X': true, 60 'Y': true, 61 'Z': true, 62 '^': true, 63 '_': true, 64 '`': true, 65 'a': true, 66 'b': true, 67 'c': true, 68 'd': true, 69 'e': true, 70 'f': true, 71 'g': true, 72 'h': true, 73 'i': true, 74 'j': true, 75 'k': true, 76 'l': true, 77 'm': true, 78 'n': true, 79 'o': true, 80 'p': true, 81 'q': true, 82 'r': true, 83 's': true, 84 't': true, 85 'u': true, 86 'v': true, 87 'w': true, 88 'x': true, 89 'y': true, 90 'z': true, 91 '|': true, 92 '~': true, 93 } 94 95 func IsTokenRune(r rune) bool { 96 i := int(r) 97 return i < len(isTokenTable) && isTokenTable[i] 98 } 99 100 func isNotToken(r rune) bool { 101 return !IsTokenRune(r) 102 } 103 104 // HeaderValuesContainsToken reports whether any string in values 105 // contains the provided token, ASCII case-insensitively. 106 func HeaderValuesContainsToken(values []string, token string) bool { 107 for _, v := range values { 108 if headerValueContainsToken(v, token) { 109 return true 110 } 111 } 112 return false 113 } 114 115 // isOWS reports whether b is an optional whitespace byte, as defined 116 // by RFC 7230 section 3.2.3. 117 func isOWS(b byte) bool { return b == ' ' || b == '\t' } 118 119 // trimOWS returns x with all optional whitespace removes from the 120 // beginning and end. 121 func trimOWS(x string) string { 122 // TODO: consider using strings.Trim(x, " \t") instead, 123 // if and when it's fast enough. See issue 10292. 124 // But this ASCII-only code will probably always beat UTF-8 125 // aware code. 126 for len(x) > 0 && isOWS(x[0]) { 127 x = x[1:] 128 } 129 for len(x) > 0 && isOWS(x[len(x)-1]) { 130 x = x[:len(x)-1] 131 } 132 return x 133 } 134 135 // headerValueContainsToken reports whether v (assumed to be a 136 // 0#element, in the ABNF extension described in RFC 7230 section 7) 137 // contains token amongst its comma-separated tokens, ASCII 138 // case-insensitively. 139 func headerValueContainsToken(v string, token string) bool { 140 for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') { 141 if tokenEqual(trimOWS(v[:comma]), token) { 142 return true 143 } 144 v = v[comma+1:] 145 } 146 return tokenEqual(trimOWS(v), token) 147 } 148 149 // lowerASCII returns the ASCII lowercase version of b. 150 func lowerASCII(b byte) byte { 151 if 'A' <= b && b <= 'Z' { 152 return b + ('a' - 'A') 153 } 154 return b 155 } 156 157 // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively. 158 func tokenEqual(t1, t2 string) bool { 159 if len(t1) != len(t2) { 160 return false 161 } 162 for i, b := range t1 { 163 if b >= utf8.RuneSelf { 164 // No UTF-8 or non-ASCII allowed in tokens. 165 return false 166 } 167 if lowerASCII(byte(b)) != lowerASCII(t2[i]) { 168 return false 169 } 170 } 171 return true 172 } 173 174 // isLWS reports whether b is linear white space, according 175 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 176 // 177 // LWS = [CRLF] 1*( SP | HT ) 178 func isLWS(b byte) bool { return b == ' ' || b == '\t' } 179 180 // isCTL reports whether b is a control byte, according 181 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 182 // 183 // CTL = <any US-ASCII control character 184 // (octets 0 - 31) and DEL (127)> 185 func isCTL(b byte) bool { 186 const del = 0x7f // a CTL 187 return b < ' ' || b == del 188 } 189 190 // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name. 191 // HTTP/2 imposes the additional restriction that uppercase ASCII 192 // letters are not allowed. 193 // 194 // RFC 7230 says: 195 // header-field = field-name ":" OWS field-value OWS 196 // field-name = token 197 // token = 1*tchar 198 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 199 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 200 func ValidHeaderFieldName(v string) bool { 201 if len(v) == 0 { 202 return false 203 } 204 for _, r := range v { 205 if !IsTokenRune(r) { 206 return false 207 } 208 } 209 return true 210 } 211 212 // ValidHostHeader reports whether h is a valid host header. 213 func ValidHostHeader(h string) bool { 214 // The latest spec is actually this: 215 // 216 // http://tools.ietf.org/html/rfc7230#section-5.4 217 // Host = uri-host [ ":" port ] 218 // 219 // Where uri-host is: 220 // http://tools.ietf.org/html/rfc3986#section-3.2.2 221 // 222 // But we're going to be much more lenient for now and just 223 // search for any byte that's not a valid byte in any of those 224 // expressions. 225 for i := 0; i < len(h); i++ { 226 if !validHostByte[h[i]] { 227 return false 228 } 229 } 230 return true 231 } 232 233 // See the validHostHeader comment. 234 var validHostByte = [256]bool{ 235 '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, 236 '8': true, '9': true, 237 238 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, 239 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, 240 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 241 'y': true, 'z': true, 242 243 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, 244 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, 245 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 246 'Y': true, 'Z': true, 247 248 '!': true, // sub-delims 249 '$': true, // sub-delims 250 '%': true, // pct-encoded (and used in IPv6 zones) 251 '&': true, // sub-delims 252 '(': true, // sub-delims 253 ')': true, // sub-delims 254 '*': true, // sub-delims 255 '+': true, // sub-delims 256 ',': true, // sub-delims 257 '-': true, // unreserved 258 '.': true, // unreserved 259 ':': true, // IPv6address + Host expression's optional port 260 ';': true, // sub-delims 261 '=': true, // sub-delims 262 '[': true, 263 '\'': true, // sub-delims 264 ']': true, 265 '_': true, // unreserved 266 '~': true, // unreserved 267 } 268 269 // ValidHeaderFieldValue reports whether v is a valid "field-value" according to 270 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 : 271 // 272 // message-header = field-name ":" [ field-value ] 273 // field-value = *( field-content | LWS ) 274 // field-content = <the OCTETs making up the field-value 275 // and consisting of either *TEXT or combinations 276 // of token, separators, and quoted-string> 277 // 278 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 : 279 // 280 // TEXT = <any OCTET except CTLs, 281 // but including LWS> 282 // LWS = [CRLF] 1*( SP | HT ) 283 // CTL = <any US-ASCII control character 284 // (octets 0 - 31) and DEL (127)> 285 // 286 // RFC 7230 says: 287 // 288 // field-value = *( field-content / obs-fold ) 289 // obj-fold = N/A to http2, and deprecated 290 // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] 291 // field-vchar = VCHAR / obs-text 292 // obs-text = %x80-FF 293 // VCHAR = "any visible [USASCII] character" 294 // 295 // http2 further says: "Similarly, HTTP/2 allows header field values 296 // that are not valid. While most of the values that can be encoded 297 // will not alter header field parsing, carriage return (CR, ASCII 298 // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII 299 // 0x0) might be exploited by an attacker if they are translated 300 // verbatim. Any request or response that contains a character not 301 // permitted in a header field value MUST be treated as malformed 302 // (Section 8.1.2.6). Valid characters are defined by the 303 // field-content ABNF rule in Section 3.2 of [RFC7230]." 304 // 305 // This function does not (yet?) properly handle the rejection of 306 // strings that begin or end with SP or HTAB. 307 func ValidHeaderFieldValue(v string) bool { 308 for i := 0; i < len(v); i++ { 309 b := v[i] 310 if isCTL(b) && !isLWS(b) { 311 return false 312 } 313 } 314 return true 315 } 316 317 func isASCII(s string) bool { 318 for i := 0; i < len(s); i++ { 319 if s[i] >= utf8.RuneSelf { 320 return false 321 } 322 } 323 return true 324 } 325 326 // PunycodeHostPort returns the IDNA Punycode version 327 // of the provided "host" or "host:port" string. 328 func PunycodeHostPort(v string) (string, error) { 329 if isASCII(v) { 330 return v, nil 331 } 332 333 host, port, err := net.SplitHostPort(v) 334 if err != nil { 335 // The input 'v' argument was just a "host" argument, 336 // without a port. This error should not be returned 337 // to the caller. 338 host = v 339 port = "" 340 } 341 host, err = idna.ToASCII(host) 342 if err != nil { 343 // Non-UTF-8? Not representable in Punycode, in any 344 // case. 345 return "", err 346 } 347 if port == "" { 348 return host, nil 349 } 350 return net.JoinHostPort(host, port), nil 351 }