github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/net/http/lex.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package http
     6  
     7  import (
     8  	"strings"
     9  	"unicode/utf8"
    10  )
    11  
    12  // This file deals with lexical matters of HTTP
    13  
    14  var isTokenTable = [127]bool{
    15  	'!':  true,
    16  	'#':  true,
    17  	'$':  true,
    18  	'%':  true,
    19  	'&':  true,
    20  	'\'': true,
    21  	'*':  true,
    22  	'+':  true,
    23  	'-':  true,
    24  	'.':  true,
    25  	'0':  true,
    26  	'1':  true,
    27  	'2':  true,
    28  	'3':  true,
    29  	'4':  true,
    30  	'5':  true,
    31  	'6':  true,
    32  	'7':  true,
    33  	'8':  true,
    34  	'9':  true,
    35  	'A':  true,
    36  	'B':  true,
    37  	'C':  true,
    38  	'D':  true,
    39  	'E':  true,
    40  	'F':  true,
    41  	'G':  true,
    42  	'H':  true,
    43  	'I':  true,
    44  	'J':  true,
    45  	'K':  true,
    46  	'L':  true,
    47  	'M':  true,
    48  	'N':  true,
    49  	'O':  true,
    50  	'P':  true,
    51  	'Q':  true,
    52  	'R':  true,
    53  	'S':  true,
    54  	'T':  true,
    55  	'U':  true,
    56  	'W':  true,
    57  	'V':  true,
    58  	'X':  true,
    59  	'Y':  true,
    60  	'Z':  true,
    61  	'^':  true,
    62  	'_':  true,
    63  	'`':  true,
    64  	'a':  true,
    65  	'b':  true,
    66  	'c':  true,
    67  	'd':  true,
    68  	'e':  true,
    69  	'f':  true,
    70  	'g':  true,
    71  	'h':  true,
    72  	'i':  true,
    73  	'j':  true,
    74  	'k':  true,
    75  	'l':  true,
    76  	'm':  true,
    77  	'n':  true,
    78  	'o':  true,
    79  	'p':  true,
    80  	'q':  true,
    81  	'r':  true,
    82  	's':  true,
    83  	't':  true,
    84  	'u':  true,
    85  	'v':  true,
    86  	'w':  true,
    87  	'x':  true,
    88  	'y':  true,
    89  	'z':  true,
    90  	'|':  true,
    91  	'~':  true,
    92  }
    93  
    94  func isToken(r rune) bool {
    95  	i := int(r)
    96  	return i < len(isTokenTable) && isTokenTable[i]
    97  }
    98  
    99  func isNotToken(r rune) bool {
   100  	return !isToken(r)
   101  }
   102  
   103  // headerValuesContainsToken reports whether any string in values
   104  // contains the provided token, ASCII case-insensitively.
   105  func headerValuesContainsToken(values []string, token string) bool {
   106  	for _, v := range values {
   107  		if headerValueContainsToken(v, token) {
   108  			return true
   109  		}
   110  	}
   111  	return false
   112  }
   113  
   114  // isOWS reports whether b is an optional whitespace byte, as defined
   115  // by RFC 7230 section 3.2.3.
   116  func isOWS(b byte) bool { return b == ' ' || b == '\t' }
   117  
   118  // trimOWS returns x with all optional whitespace removes from the
   119  // beginning and end.
   120  func trimOWS(x string) string {
   121  	// TODO: consider using strings.Trim(x, " \t") instead,
   122  	// if and when it's fast enough. See issue 10292.
   123  	// But this ASCII-only code will probably always beat UTF-8
   124  	// aware code.
   125  	for len(x) > 0 && isOWS(x[0]) {
   126  		x = x[1:]
   127  	}
   128  	for len(x) > 0 && isOWS(x[len(x)-1]) {
   129  		x = x[:len(x)-1]
   130  	}
   131  	return x
   132  }
   133  
   134  // headerValueContainsToken reports whether v (assumed to be a
   135  // 0#element, in the ABNF extension described in RFC 7230 section 7)
   136  // contains token amongst its comma-separated tokens, ASCII
   137  // case-insensitively.
   138  func headerValueContainsToken(v string, token string) bool {
   139  	v = trimOWS(v)
   140  	if comma := strings.IndexByte(v, ','); comma != -1 {
   141  		return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
   142  	}
   143  	return tokenEqual(v, token)
   144  }
   145  
   146  // lowerASCII returns the ASCII lowercase version of b.
   147  func lowerASCII(b byte) byte {
   148  	if 'A' <= b && b <= 'Z' {
   149  		return b + ('a' - 'A')
   150  	}
   151  	return b
   152  }
   153  
   154  // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
   155  func tokenEqual(t1, t2 string) bool {
   156  	if len(t1) != len(t2) {
   157  		return false
   158  	}
   159  	for i, b := range t1 {
   160  		if b >= utf8.RuneSelf {
   161  			// No UTF-8 or non-ASCII allowed in tokens.
   162  			return false
   163  		}
   164  		if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
   165  			return false
   166  		}
   167  	}
   168  	return true
   169  }
   170  
   171  // isLWS reports whether b is linear white space, according
   172  // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
   173  //      LWS            = [CRLF] 1*( SP | HT )
   174  func isLWS(b byte) bool { return b == ' ' || b == '\t' }
   175  
   176  // isCTL reports whether b is a control byte, according
   177  // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
   178  //      CTL            = <any US-ASCII control character
   179  //                       (octets 0 - 31) and DEL (127)>
   180  func isCTL(b byte) bool {
   181  	const del = 0x7f // a CTL
   182  	return b < ' ' || b == del
   183  }
   184  
   185  func validHeaderName(v string) bool {
   186  	if len(v) == 0 {
   187  		return false
   188  	}
   189  	for _, r := range v {
   190  		if !isToken(r) {
   191  			return false
   192  		}
   193  	}
   194  	return true
   195  }
   196  
   197  func validHostHeader(h string) bool {
   198  	// The latests spec is actually this:
   199  	//
   200  	// http://tools.ietf.org/html/rfc7230#section-5.4
   201  	//     Host = uri-host [ ":" port ]
   202  	//
   203  	// Where uri-host is:
   204  	//     http://tools.ietf.org/html/rfc3986#section-3.2.2
   205  	//
   206  	// But we're going to be much more lenient for now and just
   207  	// search for any byte that's not a valid byte in any of those
   208  	// expressions.
   209  	for i := 0; i < len(h); i++ {
   210  		if !validHostByte[h[i]] {
   211  			return false
   212  		}
   213  	}
   214  	return true
   215  }
   216  
   217  // See the validHostHeader comment.
   218  var validHostByte = [256]bool{
   219  	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
   220  	'8': true, '9': true,
   221  
   222  	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
   223  	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
   224  	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
   225  	'y': true, 'z': true,
   226  
   227  	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
   228  	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
   229  	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
   230  	'Y': true, 'Z': true,
   231  
   232  	'!':  true, // sub-delims
   233  	'$':  true, // sub-delims
   234  	'%':  true, // pct-encoded (and used in IPv6 zones)
   235  	'&':  true, // sub-delims
   236  	'(':  true, // sub-delims
   237  	')':  true, // sub-delims
   238  	'*':  true, // sub-delims
   239  	'+':  true, // sub-delims
   240  	',':  true, // sub-delims
   241  	'-':  true, // unreserved
   242  	'.':  true, // unreserved
   243  	':':  true, // IPv6address + Host expression's optional port
   244  	';':  true, // sub-delims
   245  	'=':  true, // sub-delims
   246  	'[':  true,
   247  	'\'': true, // sub-delims
   248  	']':  true,
   249  	'_':  true, // unreserved
   250  	'~':  true, // unreserved
   251  }
   252  
   253  // validHeaderValue reports whether v is a valid "field-value" according to
   254  // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
   255  //
   256  //        message-header = field-name ":" [ field-value ]
   257  //        field-value    = *( field-content | LWS )
   258  //        field-content  = <the OCTETs making up the field-value
   259  //                         and consisting of either *TEXT or combinations
   260  //                         of token, separators, and quoted-string>
   261  //
   262  // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
   263  //
   264  //        TEXT           = <any OCTET except CTLs,
   265  //                          but including LWS>
   266  //        LWS            = [CRLF] 1*( SP | HT )
   267  //        CTL            = <any US-ASCII control character
   268  //                         (octets 0 - 31) and DEL (127)>
   269  func validHeaderValue(v string) bool {
   270  	for i := 0; i < len(v); i++ {
   271  		b := v[i]
   272  		if isCTL(b) && !isLWS(b) {
   273  			return false
   274  		}
   275  	}
   276  	return true
   277  }