gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/net/http/httpguts/httplex.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package httpguts
     6  
     7  import (
     8  	"net"
     9  	"strings"
    10  	"unicode/utf8"
    11  
    12  	"gitee.com/ks-custle/core-gm/net/idna"
    13  )
    14  
    15  var isTokenTable = [127]bool{
    16  	'!':  true,
    17  	'#':  true,
    18  	'$':  true,
    19  	'%':  true,
    20  	'&':  true,
    21  	'\'': true,
    22  	'*':  true,
    23  	'+':  true,
    24  	'-':  true,
    25  	'.':  true,
    26  	'0':  true,
    27  	'1':  true,
    28  	'2':  true,
    29  	'3':  true,
    30  	'4':  true,
    31  	'5':  true,
    32  	'6':  true,
    33  	'7':  true,
    34  	'8':  true,
    35  	'9':  true,
    36  	'A':  true,
    37  	'B':  true,
    38  	'C':  true,
    39  	'D':  true,
    40  	'E':  true,
    41  	'F':  true,
    42  	'G':  true,
    43  	'H':  true,
    44  	'I':  true,
    45  	'J':  true,
    46  	'K':  true,
    47  	'L':  true,
    48  	'M':  true,
    49  	'N':  true,
    50  	'O':  true,
    51  	'P':  true,
    52  	'Q':  true,
    53  	'R':  true,
    54  	'S':  true,
    55  	'T':  true,
    56  	'U':  true,
    57  	'W':  true,
    58  	'V':  true,
    59  	'X':  true,
    60  	'Y':  true,
    61  	'Z':  true,
    62  	'^':  true,
    63  	'_':  true,
    64  	'`':  true,
    65  	'a':  true,
    66  	'b':  true,
    67  	'c':  true,
    68  	'd':  true,
    69  	'e':  true,
    70  	'f':  true,
    71  	'g':  true,
    72  	'h':  true,
    73  	'i':  true,
    74  	'j':  true,
    75  	'k':  true,
    76  	'l':  true,
    77  	'm':  true,
    78  	'n':  true,
    79  	'o':  true,
    80  	'p':  true,
    81  	'q':  true,
    82  	'r':  true,
    83  	's':  true,
    84  	't':  true,
    85  	'u':  true,
    86  	'v':  true,
    87  	'w':  true,
    88  	'x':  true,
    89  	'y':  true,
    90  	'z':  true,
    91  	'|':  true,
    92  	'~':  true,
    93  }
    94  
    95  func IsTokenRune(r rune) bool {
    96  	i := int(r)
    97  	return i < len(isTokenTable) && isTokenTable[i]
    98  }
    99  
   100  func isNotToken(r rune) bool {
   101  	return !IsTokenRune(r)
   102  }
   103  
   104  // HeaderValuesContainsToken reports whether any string in values
   105  // contains the provided token, ASCII case-insensitively.
   106  func HeaderValuesContainsToken(values []string, token string) bool {
   107  	for _, v := range values {
   108  		if headerValueContainsToken(v, token) {
   109  			return true
   110  		}
   111  	}
   112  	return false
   113  }
   114  
   115  // isOWS reports whether b is an optional whitespace byte, as defined
   116  // by RFC 7230 section 3.2.3.
   117  func isOWS(b byte) bool { return b == ' ' || b == '\t' }
   118  
   119  // trimOWS returns x with all optional whitespace removes from the
   120  // beginning and end.
   121  func trimOWS(x string) string {
   122  	// TODO: consider using strings.Trim(x, " \t") instead,
   123  	// if and when it's fast enough. See issue 10292.
   124  	// But this ASCII-only code will probably always beat UTF-8
   125  	// aware code.
   126  	for len(x) > 0 && isOWS(x[0]) {
   127  		x = x[1:]
   128  	}
   129  	for len(x) > 0 && isOWS(x[len(x)-1]) {
   130  		x = x[:len(x)-1]
   131  	}
   132  	return x
   133  }
   134  
   135  // headerValueContainsToken reports whether v (assumed to be a
   136  // 0#element, in the ABNF extension described in RFC 7230 section 7)
   137  // contains token amongst its comma-separated tokens, ASCII
   138  // case-insensitively.
   139  func headerValueContainsToken(v string, token string) bool {
   140  	for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {
   141  		if tokenEqual(trimOWS(v[:comma]), token) {
   142  			return true
   143  		}
   144  		v = v[comma+1:]
   145  	}
   146  	return tokenEqual(trimOWS(v), token)
   147  }
   148  
   149  // lowerASCII returns the ASCII lowercase version of b.
   150  func lowerASCII(b byte) byte {
   151  	if 'A' <= b && b <= 'Z' {
   152  		return b + ('a' - 'A')
   153  	}
   154  	return b
   155  }
   156  
   157  // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
   158  func tokenEqual(t1, t2 string) bool {
   159  	if len(t1) != len(t2) {
   160  		return false
   161  	}
   162  	for i, b := range t1 {
   163  		if b >= utf8.RuneSelf {
   164  			// No UTF-8 or non-ASCII allowed in tokens.
   165  			return false
   166  		}
   167  		if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
   168  			return false
   169  		}
   170  	}
   171  	return true
   172  }
   173  
   174  // isLWS reports whether b is linear white space, according
   175  // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
   176  //
   177  //	LWS            = [CRLF] 1*( SP | HT )
   178  func isLWS(b byte) bool { return b == ' ' || b == '\t' }
   179  
   180  // isCTL reports whether b is a control byte, according
   181  // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
   182  //
   183  //	CTL            = <any US-ASCII control character
   184  //	                 (octets 0 - 31) and DEL (127)>
   185  func isCTL(b byte) bool {
   186  	const del = 0x7f // a CTL
   187  	return b < ' ' || b == del
   188  }
   189  
   190  // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
   191  // HTTP/2 imposes the additional restriction that uppercase ASCII
   192  // letters are not allowed.
   193  //
   194  //	RFC 7230 says:
   195  //	 header-field   = field-name ":" OWS field-value OWS
   196  //	 field-name     = token
   197  //	 token          = 1*tchar
   198  //	 tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
   199  //	         "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
   200  func ValidHeaderFieldName(v string) bool {
   201  	if len(v) == 0 {
   202  		return false
   203  	}
   204  	for _, r := range v {
   205  		if !IsTokenRune(r) {
   206  			return false
   207  		}
   208  	}
   209  	return true
   210  }
   211  
   212  // ValidHostHeader reports whether h is a valid host header.
   213  func ValidHostHeader(h string) bool {
   214  	// The latest spec is actually this:
   215  	//
   216  	// http://tools.ietf.org/html/rfc7230#section-5.4
   217  	//     Host = uri-host [ ":" port ]
   218  	//
   219  	// Where uri-host is:
   220  	//     http://tools.ietf.org/html/rfc3986#section-3.2.2
   221  	//
   222  	// But we're going to be much more lenient for now and just
   223  	// search for any byte that's not a valid byte in any of those
   224  	// expressions.
   225  	for i := 0; i < len(h); i++ {
   226  		if !validHostByte[h[i]] {
   227  			return false
   228  		}
   229  	}
   230  	return true
   231  }
   232  
   233  // See the validHostHeader comment.
   234  var validHostByte = [256]bool{
   235  	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
   236  	'8': true, '9': true,
   237  
   238  	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
   239  	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
   240  	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
   241  	'y': true, 'z': true,
   242  
   243  	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
   244  	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
   245  	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
   246  	'Y': true, 'Z': true,
   247  
   248  	'!':  true, // sub-delims
   249  	'$':  true, // sub-delims
   250  	'%':  true, // pct-encoded (and used in IPv6 zones)
   251  	'&':  true, // sub-delims
   252  	'(':  true, // sub-delims
   253  	')':  true, // sub-delims
   254  	'*':  true, // sub-delims
   255  	'+':  true, // sub-delims
   256  	',':  true, // sub-delims
   257  	'-':  true, // unreserved
   258  	'.':  true, // unreserved
   259  	':':  true, // IPv6address + Host expression's optional port
   260  	';':  true, // sub-delims
   261  	'=':  true, // sub-delims
   262  	'[':  true,
   263  	'\'': true, // sub-delims
   264  	']':  true,
   265  	'_':  true, // unreserved
   266  	'~':  true, // unreserved
   267  }
   268  
   269  // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
   270  // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
   271  //
   272  //	message-header = field-name ":" [ field-value ]
   273  //	field-value    = *( field-content | LWS )
   274  //	field-content  = <the OCTETs making up the field-value
   275  //	                 and consisting of either *TEXT or combinations
   276  //	                 of token, separators, and quoted-string>
   277  //
   278  // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
   279  //
   280  //	TEXT           = <any OCTET except CTLs,
   281  //	                  but including LWS>
   282  //	LWS            = [CRLF] 1*( SP | HT )
   283  //	CTL            = <any US-ASCII control character
   284  //	                 (octets 0 - 31) and DEL (127)>
   285  //
   286  // RFC 7230 says:
   287  //
   288  //	field-value    = *( field-content / obs-fold )
   289  //	obj-fold       =  N/A to http2, and deprecated
   290  //	field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
   291  //	field-vchar    = VCHAR / obs-text
   292  //	obs-text       = %x80-FF
   293  //	VCHAR          = "any visible [USASCII] character"
   294  //
   295  // http2 further says: "Similarly, HTTP/2 allows header field values
   296  // that are not valid. While most of the values that can be encoded
   297  // will not alter header field parsing, carriage return (CR, ASCII
   298  // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
   299  // 0x0) might be exploited by an attacker if they are translated
   300  // verbatim. Any request or response that contains a character not
   301  // permitted in a header field value MUST be treated as malformed
   302  // (Section 8.1.2.6). Valid characters are defined by the
   303  // field-content ABNF rule in Section 3.2 of [RFC7230]."
   304  //
   305  // This function does not (yet?) properly handle the rejection of
   306  // strings that begin or end with SP or HTAB.
   307  func ValidHeaderFieldValue(v string) bool {
   308  	for i := 0; i < len(v); i++ {
   309  		b := v[i]
   310  		if isCTL(b) && !isLWS(b) {
   311  			return false
   312  		}
   313  	}
   314  	return true
   315  }
   316  
   317  func isASCII(s string) bool {
   318  	for i := 0; i < len(s); i++ {
   319  		if s[i] >= utf8.RuneSelf {
   320  			return false
   321  		}
   322  	}
   323  	return true
   324  }
   325  
   326  // PunycodeHostPort returns the IDNA Punycode version
   327  // of the provided "host" or "host:port" string.
   328  func PunycodeHostPort(v string) (string, error) {
   329  	if isASCII(v) {
   330  		return v, nil
   331  	}
   332  
   333  	host, port, err := net.SplitHostPort(v)
   334  	if err != nil {
   335  		// The input 'v' argument was just a "host" argument,
   336  		// without a port. This error should not be returned
   337  		// to the caller.
   338  		host = v
   339  		port = ""
   340  	}
   341  	host, err = idna.ToASCII(host)
   342  	if err != nil {
   343  		// Non-UTF-8? Not representable in Punycode, in any
   344  		// case.
   345  		return "", err
   346  	}
   347  	if port == "" {
   348  		return host, nil
   349  	}
   350  	return net.JoinHostPort(host, port), nil
   351  }