github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/mime/multipart/multipart.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 13 # Limits 14 15 To protect against malicious inputs, this package sets limits on the size 16 of the MIME data it processes. 17 18 [Reader.NextPart] and [Reader.NextRawPart] limit the number of headers in a 19 part to 10000 and [Reader.ReadForm] limits the total number of headers in all 20 FileHeaders to 10000. 21 These limits may be adjusted with the GODEBUG=multipartmaxheaders=<values> 22 setting. 23 24 Reader.ReadForm further limits the number of parts in a form to 1000. 25 This limit may be adjusted with the GODEBUG=multipartmaxparts=<value> 26 setting. 27 */ 28 package multipart 29 30 import ( 31 "bufio" 32 "bytes" 33 "fmt" 34 "internal/godebug" 35 "io" 36 "mime" 37 "mime/quotedprintable" 38 "net/textproto" 39 "path/filepath" 40 "strconv" 41 "strings" 42 ) 43 44 var emptyParams = make(map[string]string) 45 46 // This constant needs to be at least 76 for this package to work correctly. 47 // This is because \r\n--separator_of_len_70- would fill the buffer and it 48 // wouldn't be safe to consume a single byte from it. 49 const peekBufferSize = 4096 50 51 // A Part represents a single part in a multipart body. 52 type Part struct { 53 // The headers of the body, if any, with the keys canonicalized 54 // in the same fashion that the Go http.Request headers are. 55 // For example, "foo-bar" changes case to "Foo-Bar" 56 Header textproto.MIMEHeader 57 58 mr *Reader 59 60 disposition string 61 dispositionParams map[string]string 62 63 // r is either a reader directly reading from mr, or it's a 64 // wrapper around such a reader, decoding the 65 // Content-Transfer-Encoding 66 r io.Reader 67 68 n int // known data bytes waiting in mr.bufReader 69 total int64 // total data bytes read already 70 err error // error to return when n == 0 71 readErr error // read error observed from mr.bufReader 72 } 73 74 // FormName returns the name parameter if p has a Content-Disposition 75 // of type "form-data". Otherwise it returns the empty string. 76 func (p *Part) FormName() string { 77 // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF 78 // of Content-Disposition value format. 79 if p.dispositionParams == nil { 80 p.parseContentDisposition() 81 } 82 if p.disposition != "form-data" { 83 return "" 84 } 85 return p.dispositionParams["name"] 86 } 87 88 // FileName returns the filename parameter of the [Part]'s Content-Disposition 89 // header. If not empty, the filename is passed through filepath.Base (which is 90 // platform dependent) before being returned. 91 func (p *Part) FileName() string { 92 if p.dispositionParams == nil { 93 p.parseContentDisposition() 94 } 95 filename := p.dispositionParams["filename"] 96 if filename == "" { 97 return "" 98 } 99 // RFC 7578, Section 4.2 requires that if a filename is provided, the 100 // directory path information must not be used. 101 return filepath.Base(filename) 102 } 103 104 func (p *Part) parseContentDisposition() { 105 v := p.Header.Get("Content-Disposition") 106 var err error 107 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 108 if err != nil { 109 p.dispositionParams = emptyParams 110 } 111 } 112 113 // NewReader creates a new multipart [Reader] reading from r using the 114 // given MIME boundary. 115 // 116 // The boundary is usually obtained from the "boundary" parameter of 117 // the message's "Content-Type" header. Use [mime.ParseMediaType] to 118 // parse such headers. 119 func NewReader(r io.Reader, boundary string) *Reader { 120 b := []byte("\r\n--" + boundary + "--") 121 return &Reader{ 122 bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), 123 nl: b[:2], 124 nlDashBoundary: b[:len(b)-2], 125 dashBoundaryDash: b[2:], 126 dashBoundary: b[2 : len(b)-2], 127 } 128 } 129 130 // stickyErrorReader is an io.Reader which never calls Read on its 131 // underlying Reader once an error has been seen. (the io.Reader 132 // interface's contract promises nothing about the return values of 133 // Read calls after an error, yet this package does do multiple Reads 134 // after error) 135 type stickyErrorReader struct { 136 r io.Reader 137 err error 138 } 139 140 func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { 141 if r.err != nil { 142 return 0, r.err 143 } 144 n, r.err = r.r.Read(p) 145 return n, r.err 146 } 147 148 func newPart(mr *Reader, rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { 149 bp := &Part{ 150 Header: make(map[string][]string), 151 mr: mr, 152 } 153 if err := bp.populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders); err != nil { 154 return nil, err 155 } 156 bp.r = partReader{bp} 157 158 // rawPart is used to switch between Part.NextPart and Part.NextRawPart. 159 if !rawPart { 160 const cte = "Content-Transfer-Encoding" 161 if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { 162 bp.Header.Del(cte) 163 bp.r = quotedprintable.NewReader(bp.r) 164 } 165 } 166 return bp, nil 167 } 168 169 func (p *Part) populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders int64) error { 170 r := textproto.NewReader(p.mr.bufReader) 171 header, err := readMIMEHeader(r, maxMIMEHeaderSize, maxMIMEHeaders) 172 if err == nil { 173 p.Header = header 174 } 175 // TODO: Add a distinguishable error to net/textproto. 176 if err != nil && err.Error() == "message too large" { 177 err = ErrMessageTooLarge 178 } 179 return err 180 } 181 182 // Read reads the body of a part, after its headers and before the 183 // next part (if any) begins. 184 func (p *Part) Read(d []byte) (n int, err error) { 185 return p.r.Read(d) 186 } 187 188 // partReader implements io.Reader by reading raw bytes directly from the 189 // wrapped *Part, without doing any Transfer-Encoding decoding. 190 type partReader struct { 191 p *Part 192 } 193 194 func (pr partReader) Read(d []byte) (int, error) { 195 p := pr.p 196 br := p.mr.bufReader 197 198 // Read into buffer until we identify some data to return, 199 // or we find a reason to stop (boundary or read error). 200 for p.n == 0 && p.err == nil { 201 peek, _ := br.Peek(br.Buffered()) 202 p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) 203 if p.n == 0 && p.err == nil { 204 // Force buffered I/O to read more into buffer. 205 _, p.readErr = br.Peek(len(peek) + 1) 206 if p.readErr == io.EOF { 207 p.readErr = io.ErrUnexpectedEOF 208 } 209 } 210 } 211 212 // Read out from "data to return" part of buffer. 213 if p.n == 0 { 214 return 0, p.err 215 } 216 n := len(d) 217 if n > p.n { 218 n = p.n 219 } 220 n, _ = br.Read(d[:n]) 221 p.total += int64(n) 222 p.n -= n 223 if p.n == 0 { 224 return n, p.err 225 } 226 return n, nil 227 } 228 229 // scanUntilBoundary scans buf to identify how much of it can be safely 230 // returned as part of the Part body. 231 // dashBoundary is "--boundary". 232 // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. 233 // The comments below (and the name) assume "\n--boundary", but either is accepted. 234 // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. 235 // readErr is the read error, if any, that followed reading the bytes in buf. 236 // scanUntilBoundary returns the number of data bytes from buf that can be 237 // returned as part of the Part body and also the error to return (if any) 238 // once those data bytes are done. 239 func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { 240 if total == 0 { 241 // At beginning of body, allow dashBoundary. 242 if bytes.HasPrefix(buf, dashBoundary) { 243 switch matchAfterPrefix(buf, dashBoundary, readErr) { 244 case -1: 245 return len(dashBoundary), nil 246 case 0: 247 return 0, nil 248 case +1: 249 return 0, io.EOF 250 } 251 } 252 if bytes.HasPrefix(dashBoundary, buf) { 253 return 0, readErr 254 } 255 } 256 257 // Search for "\n--boundary". 258 if i := bytes.Index(buf, nlDashBoundary); i >= 0 { 259 switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { 260 case -1: 261 return i + len(nlDashBoundary), nil 262 case 0: 263 return i, nil 264 case +1: 265 return i, io.EOF 266 } 267 } 268 if bytes.HasPrefix(nlDashBoundary, buf) { 269 return 0, readErr 270 } 271 272 // Otherwise, anything up to the final \n is not part of the boundary 273 // and so must be part of the body. 274 // Also if the section from the final \n onward is not a prefix of the boundary, 275 // it too must be part of the body. 276 i := bytes.LastIndexByte(buf, nlDashBoundary[0]) 277 if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { 278 return i, nil 279 } 280 return len(buf), readErr 281 } 282 283 // matchAfterPrefix checks whether buf should be considered to match the boundary. 284 // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", 285 // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. 286 // 287 // matchAfterPrefix returns +1 if the buffer does match the boundary, 288 // meaning the prefix is followed by a double dash, space, tab, cr, nl, 289 // or end of input. 290 // It returns -1 if the buffer definitely does NOT match the boundary, 291 // meaning the prefix is followed by some other character. 292 // For example, "--foobar" does not match "--foo". 293 // It returns 0 more input needs to be read to make the decision, 294 // meaning that len(buf) == len(prefix) and readErr == nil. 295 func matchAfterPrefix(buf, prefix []byte, readErr error) int { 296 if len(buf) == len(prefix) { 297 if readErr != nil { 298 return +1 299 } 300 return 0 301 } 302 c := buf[len(prefix)] 303 304 if c == ' ' || c == '\t' || c == '\r' || c == '\n' { 305 return +1 306 } 307 308 // Try to detect boundaryDash 309 if c == '-' { 310 if len(buf) == len(prefix)+1 { 311 if readErr != nil { 312 // Prefix + "-" does not match 313 return -1 314 } 315 return 0 316 } 317 if buf[len(prefix)+1] == '-' { 318 return +1 319 } 320 } 321 322 return -1 323 } 324 325 func (p *Part) Close() error { 326 io.Copy(io.Discard, p) 327 return nil 328 } 329 330 // Reader is an iterator over parts in a MIME multipart body. 331 // Reader's underlying parser consumes its input as needed. Seeking 332 // isn't supported. 333 type Reader struct { 334 bufReader *bufio.Reader 335 tempDir string // used in tests 336 337 currentPart *Part 338 partsRead int 339 340 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 341 nlDashBoundary []byte // nl + "--boundary" 342 dashBoundaryDash []byte // "--boundary--" 343 dashBoundary []byte // "--boundary" 344 } 345 346 // maxMIMEHeaderSize is the maximum size of a MIME header we will parse, 347 // including header keys, values, and map overhead. 348 const maxMIMEHeaderSize = 10 << 20 349 350 // multipartmaxheaders is the maximum number of header entries NextPart will return, 351 // as well as the maximum combined total of header entries Reader.ReadForm will return 352 // in FileHeaders. 353 var multipartmaxheaders = godebug.New("multipartmaxheaders") 354 355 func maxMIMEHeaders() int64 { 356 if s := multipartmaxheaders.Value(); s != "" { 357 if v, err := strconv.ParseInt(s, 10, 64); err == nil && v >= 0 { 358 multipartmaxheaders.IncNonDefault() 359 return v 360 } 361 } 362 return 10000 363 } 364 365 // NextPart returns the next part in the multipart or an error. 366 // When there are no more parts, the error [io.EOF] is returned. 367 // 368 // As a special case, if the "Content-Transfer-Encoding" header 369 // has a value of "quoted-printable", that header is instead 370 // hidden and the body is transparently decoded during Read calls. 371 func (r *Reader) NextPart() (*Part, error) { 372 return r.nextPart(false, maxMIMEHeaderSize, maxMIMEHeaders()) 373 } 374 375 // NextRawPart returns the next part in the multipart or an error. 376 // When there are no more parts, the error [io.EOF] is returned. 377 // 378 // Unlike [Reader.NextPart], it does not have special handling for 379 // "Content-Transfer-Encoding: quoted-printable". 380 func (r *Reader) NextRawPart() (*Part, error) { 381 return r.nextPart(true, maxMIMEHeaderSize, maxMIMEHeaders()) 382 } 383 384 func (r *Reader) nextPart(rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { 385 if r.currentPart != nil { 386 r.currentPart.Close() 387 } 388 if string(r.dashBoundary) == "--" { 389 return nil, fmt.Errorf("multipart: boundary is empty") 390 } 391 expectNewPart := false 392 for { 393 line, err := r.bufReader.ReadSlice('\n') 394 395 if err == io.EOF && r.isFinalBoundary(line) { 396 // If the buffer ends in "--boundary--" without the 397 // trailing "\r\n", ReadSlice will return an error 398 // (since it's missing the '\n'), but this is a valid 399 // multipart EOF so we need to return io.EOF instead of 400 // a fmt-wrapped one. 401 return nil, io.EOF 402 } 403 if err != nil { 404 return nil, fmt.Errorf("multipart: NextPart: %w", err) 405 } 406 407 if r.isBoundaryDelimiterLine(line) { 408 r.partsRead++ 409 bp, err := newPart(r, rawPart, maxMIMEHeaderSize, maxMIMEHeaders) 410 if err != nil { 411 return nil, err 412 } 413 r.currentPart = bp 414 return bp, nil 415 } 416 417 if r.isFinalBoundary(line) { 418 // Expected EOF 419 return nil, io.EOF 420 } 421 422 if expectNewPart { 423 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 424 } 425 426 if r.partsRead == 0 { 427 // skip line 428 continue 429 } 430 431 // Consume the "\n" or "\r\n" separator between the 432 // body of the previous part and the boundary line we 433 // now expect will follow. (either a new part or the 434 // end boundary) 435 if bytes.Equal(line, r.nl) { 436 expectNewPart = true 437 continue 438 } 439 440 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 441 } 442 } 443 444 // isFinalBoundary reports whether line is the final boundary line 445 // indicating that all parts are over. 446 // It matches `^--boundary--[ \t]*(\r\n)?$` 447 func (r *Reader) isFinalBoundary(line []byte) bool { 448 if !bytes.HasPrefix(line, r.dashBoundaryDash) { 449 return false 450 } 451 rest := line[len(r.dashBoundaryDash):] 452 rest = skipLWSPChar(rest) 453 return len(rest) == 0 || bytes.Equal(rest, r.nl) 454 } 455 456 func (r *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 457 // https://tools.ietf.org/html/rfc2046#section-5.1 458 // The boundary delimiter line is then defined as a line 459 // consisting entirely of two hyphen characters ("-", 460 // decimal value 45) followed by the boundary parameter 461 // value from the Content-Type header field, optional linear 462 // whitespace, and a terminating CRLF. 463 if !bytes.HasPrefix(line, r.dashBoundary) { 464 return false 465 } 466 rest := line[len(r.dashBoundary):] 467 rest = skipLWSPChar(rest) 468 469 // On the first part, see our lines are ending in \n instead of \r\n 470 // and switch into that mode if so. This is a violation of the spec, 471 // but occurs in practice. 472 if r.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 473 r.nl = r.nl[1:] 474 r.nlDashBoundary = r.nlDashBoundary[1:] 475 } 476 return bytes.Equal(rest, r.nl) 477 } 478 479 // skipLWSPChar returns b with leading spaces and tabs removed. 480 // RFC 822 defines: 481 // 482 // LWSP-char = SPACE / HTAB 483 func skipLWSPChar(b []byte) []byte { 484 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 485 b = b[1:] 486 } 487 return b 488 }