github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/net/http/internal/chunked.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // The wire protocol for HTTP's "chunked" Transfer-Encoding. 6 7 // Package internal contains HTTP internals shared by net/http and 8 // net/http/httputil. 9 package internal 10 11 import ( 12 "bufio" 13 "bytes" 14 "errors" 15 "fmt" 16 "io" 17 ) 18 19 const maxLineLength = 4096 // assumed <= bufio.defaultBufSize 20 21 var ErrLineTooLong = errors.New("header line too long") 22 23 // NewChunkedReader returns a new chunkedReader that translates the data read from r 24 // out of HTTP "chunked" format before returning it. 25 // The chunkedReader returns [io.EOF] when the final 0-length chunk is read. 26 // 27 // NewChunkedReader is not needed by normal applications. The http package 28 // automatically decodes chunking when reading response bodies. 29 func NewChunkedReader(r io.Reader) io.Reader { 30 br, ok := r.(*bufio.Reader) 31 if !ok { 32 br = bufio.NewReader(r) 33 } 34 return &chunkedReader{r: br} 35 } 36 37 type chunkedReader struct { 38 r *bufio.Reader 39 n uint64 // unread bytes in chunk 40 err error 41 buf [2]byte 42 checkEnd bool // whether need to check for \r\n chunk footer 43 excess int64 // "excessive" chunk overhead, for malicious sender detection 44 } 45 46 func (cr *chunkedReader) beginChunk() { 47 // chunk-size CRLF 48 var line []byte 49 line, cr.err = readChunkLine(cr.r) 50 if cr.err != nil { 51 return 52 } 53 cr.excess += int64(len(line)) + 2 // header, plus \r\n after the chunk data 54 line = trimTrailingWhitespace(line) 55 line, cr.err = removeChunkExtension(line) 56 if cr.err != nil { 57 return 58 } 59 cr.n, cr.err = parseHexUint(line) 60 if cr.err != nil { 61 return 62 } 63 // A sender who sends one byte per chunk will send 5 bytes of overhead 64 // for every byte of data. ("1\r\nX\r\n" to send "X".) 65 // We want to allow this, since streaming a byte at a time can be legitimate. 66 // 67 // A sender can use chunk extensions to add arbitrary amounts of additional 68 // data per byte read. ("1;very long extension\r\nX\r\n" to send "X".) 69 // We don't want to disallow extensions (although we discard them), 70 // but we also don't want to allow a sender to reduce the signal/noise ratio 71 // arbitrarily. 72 // 73 // We track the amount of excess overhead read, 74 // and produce an error if it grows too large. 75 // 76 // Currently, we say that we're willing to accept 16 bytes of overhead per chunk, 77 // plus twice the amount of real data in the chunk. 78 cr.excess -= 16 + (2 * int64(cr.n)) 79 cr.excess = max(cr.excess, 0) 80 if cr.excess > 16*1024 { 81 cr.err = errors.New("chunked encoding contains too much non-data") 82 } 83 if cr.n == 0 { 84 cr.err = io.EOF 85 } 86 } 87 88 func (cr *chunkedReader) chunkHeaderAvailable() bool { 89 n := cr.r.Buffered() 90 if n > 0 { 91 peek, _ := cr.r.Peek(n) 92 return bytes.IndexByte(peek, '\n') >= 0 93 } 94 return false 95 } 96 97 func (cr *chunkedReader) Read(b []uint8) (n int, err error) { 98 for cr.err == nil { 99 if cr.checkEnd { 100 if n > 0 && cr.r.Buffered() < 2 { 101 // We have some data. Return early (per the io.Reader 102 // contract) instead of potentially blocking while 103 // reading more. 104 break 105 } 106 if _, cr.err = io.ReadFull(cr.r, cr.buf[:2]); cr.err == nil { 107 if string(cr.buf[:]) != "\r\n" { 108 cr.err = errors.New("malformed chunked encoding") 109 break 110 } 111 } else { 112 if cr.err == io.EOF { 113 cr.err = io.ErrUnexpectedEOF 114 } 115 break 116 } 117 cr.checkEnd = false 118 } 119 if cr.n == 0 { 120 if n > 0 && !cr.chunkHeaderAvailable() { 121 // We've read enough. Don't potentially block 122 // reading a new chunk header. 123 break 124 } 125 cr.beginChunk() 126 continue 127 } 128 if len(b) == 0 { 129 break 130 } 131 rbuf := b 132 if uint64(len(rbuf)) > cr.n { 133 rbuf = rbuf[:cr.n] 134 } 135 var n0 int 136 n0, cr.err = cr.r.Read(rbuf) 137 n += n0 138 b = b[n0:] 139 cr.n -= uint64(n0) 140 // If we're at the end of a chunk, read the next two 141 // bytes to verify they are "\r\n". 142 if cr.n == 0 && cr.err == nil { 143 cr.checkEnd = true 144 } else if cr.err == io.EOF { 145 cr.err = io.ErrUnexpectedEOF 146 } 147 } 148 return n, cr.err 149 } 150 151 // Read a line of bytes (up to \n) from b. 152 // Give up if the line exceeds maxLineLength. 153 // The returned bytes are owned by the bufio.Reader 154 // so they are only valid until the next bufio read. 155 func readChunkLine(b *bufio.Reader) ([]byte, error) { 156 p, err := b.ReadSlice('\n') 157 if err != nil { 158 // We always know when EOF is coming. 159 // If the caller asked for a line, there should be a line. 160 if err == io.EOF { 161 err = io.ErrUnexpectedEOF 162 } else if err == bufio.ErrBufferFull { 163 err = ErrLineTooLong 164 } 165 return nil, err 166 } 167 if len(p) >= maxLineLength { 168 return nil, ErrLineTooLong 169 } 170 return p, nil 171 } 172 173 func trimTrailingWhitespace(b []byte) []byte { 174 for len(b) > 0 && isASCIISpace(b[len(b)-1]) { 175 b = b[:len(b)-1] 176 } 177 return b 178 } 179 180 func isASCIISpace(b byte) bool { 181 return b == ' ' || b == '\t' || b == '\n' || b == '\r' 182 } 183 184 var semi = []byte(";") 185 186 // removeChunkExtension removes any chunk-extension from p. 187 // For example, 188 // 189 // "0" => "0" 190 // "0;token" => "0" 191 // "0;token=val" => "0" 192 // `0;token="quoted string"` => "0" 193 func removeChunkExtension(p []byte) ([]byte, error) { 194 p, _, _ = bytes.Cut(p, semi) 195 // TODO: care about exact syntax of chunk extensions? We're 196 // ignoring and stripping them anyway. For now just never 197 // return an error. 198 return p, nil 199 } 200 201 // NewChunkedWriter returns a new chunkedWriter that translates writes into HTTP 202 // "chunked" format before writing them to w. Closing the returned chunkedWriter 203 // sends the final 0-length chunk that marks the end of the stream but does 204 // not send the final CRLF that appears after trailers; trailers and the last 205 // CRLF must be written separately. 206 // 207 // NewChunkedWriter is not needed by normal applications. The http 208 // package adds chunking automatically if handlers don't set a 209 // Content-Length header. Using newChunkedWriter inside a handler 210 // would result in double chunking or chunking with a Content-Length 211 // length, both of which are wrong. 212 func NewChunkedWriter(w io.Writer) io.WriteCloser { 213 return &chunkedWriter{w} 214 } 215 216 // Writing to chunkedWriter translates to writing in HTTP chunked Transfer 217 // Encoding wire format to the underlying Wire chunkedWriter. 218 type chunkedWriter struct { 219 Wire io.Writer 220 } 221 222 // Write the contents of data as one chunk to Wire. 223 // NOTE: Note that the corresponding chunk-writing procedure in Conn.Write has 224 // a bug since it does not check for success of [io.WriteString] 225 func (cw *chunkedWriter) Write(data []byte) (n int, err error) { 226 227 // Don't send 0-length data. It looks like EOF for chunked encoding. 228 if len(data) == 0 { 229 return 0, nil 230 } 231 232 if _, err = fmt.Fprintf(cw.Wire, "%x\r\n", len(data)); err != nil { 233 return 0, err 234 } 235 if n, err = cw.Wire.Write(data); err != nil { 236 return 237 } 238 if n != len(data) { 239 err = io.ErrShortWrite 240 return 241 } 242 if _, err = io.WriteString(cw.Wire, "\r\n"); err != nil { 243 return 244 } 245 if bw, ok := cw.Wire.(*FlushAfterChunkWriter); ok { 246 err = bw.Flush() 247 } 248 return 249 } 250 251 func (cw *chunkedWriter) Close() error { 252 _, err := io.WriteString(cw.Wire, "0\r\n") 253 return err 254 } 255 256 // FlushAfterChunkWriter signals from the caller of [NewChunkedWriter] 257 // that each chunk should be followed by a flush. It is used by the 258 // [net/http.Transport] code to keep the buffering behavior for headers and 259 // trailers, but flush out chunks aggressively in the middle for 260 // request bodies which may be generated slowly. See Issue 6574. 261 type FlushAfterChunkWriter struct { 262 *bufio.Writer 263 } 264 265 func parseHexUint(v []byte) (n uint64, err error) { 266 if len(v) == 0 { 267 return 0, errors.New("empty hex number for chunk length") 268 } 269 for i, b := range v { 270 switch { 271 case '0' <= b && b <= '9': 272 b = b - '0' 273 case 'a' <= b && b <= 'f': 274 b = b - 'a' + 10 275 case 'A' <= b && b <= 'F': 276 b = b - 'A' + 10 277 default: 278 return 0, errors.New("invalid byte in chunk length") 279 } 280 if i == 16 { 281 return 0, errors.New("http chunk length too large") 282 } 283 n <<= 4 284 n |= uint64(b) 285 } 286 return 287 }