github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/net/http/lex.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package http 6 7 import ( 8 "strings" 9 "unicode/utf8" 10 ) 11 12 // This file deals with lexical matters of HTTP 13 14 var isTokenTable = [127]bool{ 15 '!': true, 16 '#': true, 17 '$': true, 18 '%': true, 19 '&': true, 20 '\'': true, 21 '*': true, 22 '+': true, 23 '-': true, 24 '.': true, 25 '0': true, 26 '1': true, 27 '2': true, 28 '3': true, 29 '4': true, 30 '5': true, 31 '6': true, 32 '7': true, 33 '8': true, 34 '9': true, 35 'A': true, 36 'B': true, 37 'C': true, 38 'D': true, 39 'E': true, 40 'F': true, 41 'G': true, 42 'H': true, 43 'I': true, 44 'J': true, 45 'K': true, 46 'L': true, 47 'M': true, 48 'N': true, 49 'O': true, 50 'P': true, 51 'Q': true, 52 'R': true, 53 'S': true, 54 'T': true, 55 'U': true, 56 'W': true, 57 'V': true, 58 'X': true, 59 'Y': true, 60 'Z': true, 61 '^': true, 62 '_': true, 63 '`': true, 64 'a': true, 65 'b': true, 66 'c': true, 67 'd': true, 68 'e': true, 69 'f': true, 70 'g': true, 71 'h': true, 72 'i': true, 73 'j': true, 74 'k': true, 75 'l': true, 76 'm': true, 77 'n': true, 78 'o': true, 79 'p': true, 80 'q': true, 81 'r': true, 82 's': true, 83 't': true, 84 'u': true, 85 'v': true, 86 'w': true, 87 'x': true, 88 'y': true, 89 'z': true, 90 '|': true, 91 '~': true, 92 } 93 94 func isToken(r rune) bool { 95 i := int(r) 96 return i < len(isTokenTable) && isTokenTable[i] 97 } 98 99 func isNotToken(r rune) bool { 100 return !isToken(r) 101 } 102 103 // headerValuesContainsToken reports whether any string in values 104 // contains the provided token, ASCII case-insensitively. 105 func headerValuesContainsToken(values []string, token string) bool { 106 for _, v := range values { 107 if headerValueContainsToken(v, token) { 108 return true 109 } 110 } 111 return false 112 } 113 114 // isOWS reports whether b is an optional whitespace byte, as defined 115 // by RFC 7230 section 3.2.3. 116 func isOWS(b byte) bool { return b == ' ' || b == '\t' } 117 118 // trimOWS returns x with all optional whitespace removes from the 119 // beginning and end. 120 func trimOWS(x string) string { 121 // TODO: consider using strings.Trim(x, " \t") instead, 122 // if and when it's fast enough. See issue 10292. 123 // But this ASCII-only code will probably always beat UTF-8 124 // aware code. 125 for len(x) > 0 && isOWS(x[0]) { 126 x = x[1:] 127 } 128 for len(x) > 0 && isOWS(x[len(x)-1]) { 129 x = x[:len(x)-1] 130 } 131 return x 132 } 133 134 // headerValueContainsToken reports whether v (assumed to be a 135 // 0#element, in the ABNF extension described in RFC 7230 section 7) 136 // contains token amongst its comma-separated tokens, ASCII 137 // case-insensitively. 138 func headerValueContainsToken(v string, token string) bool { 139 v = trimOWS(v) 140 if comma := strings.IndexByte(v, ','); comma != -1 { 141 return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token) 142 } 143 return tokenEqual(v, token) 144 } 145 146 // lowerASCII returns the ASCII lowercase version of b. 147 func lowerASCII(b byte) byte { 148 if 'A' <= b && b <= 'Z' { 149 return b + ('a' - 'A') 150 } 151 return b 152 } 153 154 // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively. 155 func tokenEqual(t1, t2 string) bool { 156 if len(t1) != len(t2) { 157 return false 158 } 159 for i, b := range t1 { 160 if b >= utf8.RuneSelf { 161 // No UTF-8 or non-ASCII allowed in tokens. 162 return false 163 } 164 if lowerASCII(byte(b)) != lowerASCII(t2[i]) { 165 return false 166 } 167 } 168 return true 169 } 170 171 // isLWS reports whether b is linear white space, according 172 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 173 // LWS = [CRLF] 1*( SP | HT ) 174 func isLWS(b byte) bool { return b == ' ' || b == '\t' } 175 176 // isCTL reports whether b is a control byte, according 177 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 178 // CTL = <any US-ASCII control character 179 // (octets 0 - 31) and DEL (127)> 180 func isCTL(b byte) bool { 181 const del = 0x7f // a CTL 182 return b < ' ' || b == del 183 } 184 185 func validHeaderName(v string) bool { 186 if len(v) == 0 { 187 return false 188 } 189 for _, r := range v { 190 if !isToken(r) { 191 return false 192 } 193 } 194 return true 195 } 196 197 func validHostHeader(h string) bool { 198 // The latests spec is actually this: 199 // 200 // http://tools.ietf.org/html/rfc7230#section-5.4 201 // Host = uri-host [ ":" port ] 202 // 203 // Where uri-host is: 204 // http://tools.ietf.org/html/rfc3986#section-3.2.2 205 // 206 // But we're going to be much more lenient for now and just 207 // search for any byte that's not a valid byte in any of those 208 // expressions. 209 for i := 0; i < len(h); i++ { 210 if !validHostByte[h[i]] { 211 return false 212 } 213 } 214 return true 215 } 216 217 // See the validHostHeader comment. 218 var validHostByte = [256]bool{ 219 '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, 220 '8': true, '9': true, 221 222 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, 223 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, 224 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 225 'y': true, 'z': true, 226 227 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, 228 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, 229 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 230 'Y': true, 'Z': true, 231 232 '!': true, // sub-delims 233 '$': true, // sub-delims 234 '%': true, // pct-encoded (and used in IPv6 zones) 235 '&': true, // sub-delims 236 '(': true, // sub-delims 237 ')': true, // sub-delims 238 '*': true, // sub-delims 239 '+': true, // sub-delims 240 ',': true, // sub-delims 241 '-': true, // unreserved 242 '.': true, // unreserved 243 ':': true, // IPv6address + Host expression's optional port 244 ';': true, // sub-delims 245 '=': true, // sub-delims 246 '[': true, 247 '\'': true, // sub-delims 248 ']': true, 249 '_': true, // unreserved 250 '~': true, // unreserved 251 } 252 253 // validHeaderValue reports whether v is a valid "field-value" according to 254 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 : 255 // 256 // message-header = field-name ":" [ field-value ] 257 // field-value = *( field-content | LWS ) 258 // field-content = <the OCTETs making up the field-value 259 // and consisting of either *TEXT or combinations 260 // of token, separators, and quoted-string> 261 // 262 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 : 263 // 264 // TEXT = <any OCTET except CTLs, 265 // but including LWS> 266 // LWS = [CRLF] 1*( SP | HT ) 267 // CTL = <any US-ASCII control character 268 // (octets 0 - 31) and DEL (127)> 269 func validHeaderValue(v string) bool { 270 for i := 0; i < len(v); i++ { 271 b := v[i] 272 if isCTL(b) && !isLWS(b) { 273 return false 274 } 275 } 276 return true 277 }