github.com/lovishpuri/go-40569/src@v0.0.0-20230519171745-f8623e7c56cf/mime/multipart/multipart.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 13 # Limits 14 15 To protect against malicious inputs, this package sets limits on the size 16 of the MIME data it processes. 17 18 Reader.NextPart and Reader.NextRawPart limit the number of headers in a 19 part to 10000 and Reader.ReadForm limits the total number of headers in all 20 FileHeaders to 10000. 21 These limits may be adjusted with the GODEBUG=multipartmaxheaders=<values> 22 setting. 23 24 Reader.ReadForm further limits the number of parts in a form to 1000. 25 This limit may be adjusted with the GODEBUG=multipartmaxparts=<value> 26 setting. 27 */ 28 package multipart 29 30 import ( 31 "bufio" 32 "bytes" 33 "fmt" 34 "internal/godebug" 35 "io" 36 "mime" 37 "mime/quotedprintable" 38 "net/textproto" 39 "path/filepath" 40 "strconv" 41 "strings" 42 ) 43 44 var emptyParams = make(map[string]string) 45 46 // This constant needs to be at least 76 for this package to work correctly. 47 // This is because \r\n--separator_of_len_70- would fill the buffer and it 48 // wouldn't be safe to consume a single byte from it. 49 const peekBufferSize = 4096 50 51 // A Part represents a single part in a multipart body. 52 type Part struct { 53 // The headers of the body, if any, with the keys canonicalized 54 // in the same fashion that the Go http.Request headers are. 55 // For example, "foo-bar" changes case to "Foo-Bar" 56 Header textproto.MIMEHeader 57 58 mr *Reader 59 60 disposition string 61 dispositionParams map[string]string 62 63 // r is either a reader directly reading from mr, or it's a 64 // wrapper around such a reader, decoding the 65 // Content-Transfer-Encoding 66 r io.Reader 67 68 n int // known data bytes waiting in mr.bufReader 69 total int64 // total data bytes read already 70 err error // error to return when n == 0 71 readErr error // read error observed from mr.bufReader 72 } 73 74 // FormName returns the name parameter if p has a Content-Disposition 75 // of type "form-data". Otherwise it returns the empty string. 76 func (p *Part) FormName() string { 77 // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF 78 // of Content-Disposition value format. 79 if p.dispositionParams == nil { 80 p.parseContentDisposition() 81 } 82 if p.disposition != "form-data" { 83 return "" 84 } 85 return p.dispositionParams["name"] 86 } 87 88 // FileName returns the filename parameter of the Part's Content-Disposition 89 // header. If not empty, the filename is passed through filepath.Base (which is 90 // platform dependent) before being returned. 91 func (p *Part) FileName() string { 92 if p.dispositionParams == nil { 93 p.parseContentDisposition() 94 } 95 filename := p.dispositionParams["filename"] 96 if filename == "" { 97 return "" 98 } 99 // RFC 7578, Section 4.2 requires that if a filename is provided, the 100 // directory path information must not be used. 101 return filepath.Base(filename) 102 } 103 104 func (p *Part) parseContentDisposition() { 105 v := p.Header.Get("Content-Disposition") 106 var err error 107 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 108 if err != nil { 109 p.dispositionParams = emptyParams 110 } 111 } 112 113 // NewReader creates a new multipart Reader reading from r using the 114 // given MIME boundary. 115 // 116 // The boundary is usually obtained from the "boundary" parameter of 117 // the message's "Content-Type" header. Use mime.ParseMediaType to 118 // parse such headers. 119 func NewReader(r io.Reader, boundary string) *Reader { 120 b := []byte("\r\n--" + boundary + "--") 121 return &Reader{ 122 bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), 123 nl: b[:2], 124 nlDashBoundary: b[:len(b)-2], 125 dashBoundaryDash: b[2:], 126 dashBoundary: b[2 : len(b)-2], 127 } 128 } 129 130 // stickyErrorReader is an io.Reader which never calls Read on its 131 // underlying Reader once an error has been seen. (the io.Reader 132 // interface's contract promises nothing about the return values of 133 // Read calls after an error, yet this package does do multiple Reads 134 // after error) 135 type stickyErrorReader struct { 136 r io.Reader 137 err error 138 } 139 140 func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { 141 if r.err != nil { 142 return 0, r.err 143 } 144 n, r.err = r.r.Read(p) 145 return n, r.err 146 } 147 148 func newPart(mr *Reader, rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { 149 bp := &Part{ 150 Header: make(map[string][]string), 151 mr: mr, 152 } 153 if err := bp.populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders); err != nil { 154 return nil, err 155 } 156 bp.r = partReader{bp} 157 158 // rawPart is used to switch between Part.NextPart and Part.NextRawPart. 159 if !rawPart { 160 const cte = "Content-Transfer-Encoding" 161 if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { 162 bp.Header.Del(cte) 163 bp.r = quotedprintable.NewReader(bp.r) 164 } 165 } 166 return bp, nil 167 } 168 169 func (p *Part) populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders int64) error { 170 r := textproto.NewReader(p.mr.bufReader) 171 header, err := readMIMEHeader(r, maxMIMEHeaderSize, maxMIMEHeaders) 172 if err == nil { 173 p.Header = header 174 } 175 // TODO: Add a distinguishable error to net/textproto. 176 if err != nil && err.Error() == "message too large" { 177 err = ErrMessageTooLarge 178 } 179 return err 180 } 181 182 // Read reads the body of a part, after its headers and before the 183 // next part (if any) begins. 184 func (p *Part) Read(d []byte) (n int, err error) { 185 return p.r.Read(d) 186 } 187 188 // partReader implements io.Reader by reading raw bytes directly from the 189 // wrapped *Part, without doing any Transfer-Encoding decoding. 190 type partReader struct { 191 p *Part 192 } 193 194 func (pr partReader) Read(d []byte) (int, error) { 195 p := pr.p 196 br := p.mr.bufReader 197 198 // Read into buffer until we identify some data to return, 199 // or we find a reason to stop (boundary or read error). 200 for p.n == 0 && p.err == nil { 201 peek, _ := br.Peek(br.Buffered()) 202 p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) 203 if p.n == 0 && p.err == nil { 204 // Force buffered I/O to read more into buffer. 205 _, p.readErr = br.Peek(len(peek) + 1) 206 if p.readErr == io.EOF { 207 p.readErr = io.ErrUnexpectedEOF 208 } 209 } 210 } 211 212 // Read out from "data to return" part of buffer. 213 if p.n == 0 { 214 return 0, p.err 215 } 216 n := len(d) 217 if n > p.n { 218 n = p.n 219 } 220 n, _ = br.Read(d[:n]) 221 p.total += int64(n) 222 p.n -= n 223 if p.n == 0 { 224 return n, p.err 225 } 226 return n, nil 227 } 228 229 // scanUntilBoundary scans buf to identify how much of it can be safely 230 // returned as part of the Part body. 231 // dashBoundary is "--boundary". 232 // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. 233 // The comments below (and the name) assume "\n--boundary", but either is accepted. 234 // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. 235 // readErr is the read error, if any, that followed reading the bytes in buf. 236 // scanUntilBoundary returns the number of data bytes from buf that can be 237 // returned as part of the Part body and also the error to return (if any) 238 // once those data bytes are done. 239 func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { 240 if total == 0 { 241 // At beginning of body, allow dashBoundary. 242 if bytes.HasPrefix(buf, dashBoundary) { 243 switch matchAfterPrefix(buf, dashBoundary, readErr) { 244 case -1: 245 return len(dashBoundary), nil 246 case 0: 247 return 0, nil 248 case +1: 249 return 0, io.EOF 250 } 251 } 252 if bytes.HasPrefix(dashBoundary, buf) { 253 return 0, readErr 254 } 255 } 256 257 // Search for "\n--boundary". 258 if i := bytes.Index(buf, nlDashBoundary); i >= 0 { 259 switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { 260 case -1: 261 return i + len(nlDashBoundary), nil 262 case 0: 263 return i, nil 264 case +1: 265 return i, io.EOF 266 } 267 } 268 if bytes.HasPrefix(nlDashBoundary, buf) { 269 return 0, readErr 270 } 271 272 // Otherwise, anything up to the final \n is not part of the boundary 273 // and so must be part of the body. 274 // Also if the section from the final \n onward is not a prefix of the boundary, 275 // it too must be part of the body. 276 i := bytes.LastIndexByte(buf, nlDashBoundary[0]) 277 if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { 278 return i, nil 279 } 280 return len(buf), readErr 281 } 282 283 // matchAfterPrefix checks whether buf should be considered to match the boundary. 284 // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", 285 // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. 286 // 287 // matchAfterPrefix returns +1 if the buffer does match the boundary, 288 // meaning the prefix is followed by a double dash, space, tab, cr, nl, 289 // or end of input. 290 // It returns -1 if the buffer definitely does NOT match the boundary, 291 // meaning the prefix is followed by some other character. 292 // For example, "--foobar" does not match "--foo". 293 // It returns 0 more input needs to be read to make the decision, 294 // meaning that len(buf) == len(prefix) and readErr == nil. 295 func matchAfterPrefix(buf, prefix []byte, readErr error) int { 296 if len(buf) == len(prefix) { 297 if readErr != nil { 298 return +1 299 } 300 return 0 301 } 302 c := buf[len(prefix)] 303 304 if c == ' ' || c == '\t' || c == '\r' || c == '\n' { 305 return +1 306 } 307 308 // Try to detect boundaryDash 309 if c == '-' { 310 if len(buf) == len(prefix)+1 { 311 if readErr != nil { 312 // Prefix + "-" does not match 313 return -1 314 } 315 return 0 316 } 317 if buf[len(prefix)+1] == '-' { 318 return +1 319 } 320 } 321 322 return -1 323 } 324 325 func (p *Part) Close() error { 326 io.Copy(io.Discard, p) 327 return nil 328 } 329 330 // Reader is an iterator over parts in a MIME multipart body. 331 // Reader's underlying parser consumes its input as needed. Seeking 332 // isn't supported. 333 type Reader struct { 334 bufReader *bufio.Reader 335 tempDir string // used in tests 336 337 currentPart *Part 338 partsRead int 339 340 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 341 nlDashBoundary []byte // nl + "--boundary" 342 dashBoundaryDash []byte // "--boundary--" 343 dashBoundary []byte // "--boundary" 344 } 345 346 // maxMIMEHeaderSize is the maximum size of a MIME header we will parse, 347 // including header keys, values, and map overhead. 348 const maxMIMEHeaderSize = 10 << 20 349 350 // multipartMaxHeaders is the maximum number of header entries NextPart will return, 351 // as well as the maximum combined total of header entries Reader.ReadForm will return 352 // in FileHeaders. 353 var multipartMaxHeaders = godebug.New("multipartmaxheaders") 354 355 func maxMIMEHeaders() int64 { 356 if s := multipartMaxHeaders.Value(); s != "" { 357 if v, err := strconv.ParseInt(s, 10, 64); err == nil && v >= 0 { 358 multipartMaxHeaders.IncNonDefault() 359 return v 360 } 361 } 362 return 10000 363 } 364 365 // NextPart returns the next part in the multipart or an error. 366 // When there are no more parts, the error io.EOF is returned. 367 // 368 // As a special case, if the "Content-Transfer-Encoding" header 369 // has a value of "quoted-printable", that header is instead 370 // hidden and the body is transparently decoded during Read calls. 371 func (r *Reader) NextPart() (*Part, error) { 372 return r.nextPart(false, maxMIMEHeaderSize, maxMIMEHeaders()) 373 } 374 375 // NextRawPart returns the next part in the multipart or an error. 376 // When there are no more parts, the error io.EOF is returned. 377 // 378 // Unlike NextPart, it does not have special handling for 379 // "Content-Transfer-Encoding: quoted-printable". 380 func (r *Reader) NextRawPart() (*Part, error) { 381 return r.nextPart(true, maxMIMEHeaderSize, maxMIMEHeaders()) 382 } 383 384 func (r *Reader) nextPart(rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { 385 if r.currentPart != nil { 386 r.currentPart.Close() 387 } 388 if string(r.dashBoundary) == "--" { 389 return nil, fmt.Errorf("multipart: boundary is empty") 390 } 391 expectNewPart := false 392 for { 393 line, err := r.bufReader.ReadSlice('\n') 394 395 if err == io.EOF && r.isFinalBoundary(line) { 396 // If the buffer ends in "--boundary--" without the 397 // trailing "\r\n", ReadSlice will return an error 398 // (since it's missing the '\n'), but this is a valid 399 // multipart EOF so we need to return io.EOF instead of 400 // a fmt-wrapped one. 401 return nil, io.EOF 402 } 403 if err != nil { 404 return nil, fmt.Errorf("multipart: NextPart: %w", err) 405 } 406 407 if r.isBoundaryDelimiterLine(line) { 408 r.partsRead++ 409 bp, err := newPart(r, rawPart, maxMIMEHeaderSize, maxMIMEHeaders) 410 if err != nil { 411 return nil, err 412 } 413 r.currentPart = bp 414 return bp, nil 415 } 416 417 if r.isFinalBoundary(line) { 418 // Expected EOF 419 return nil, io.EOF 420 } 421 422 if expectNewPart { 423 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 424 } 425 426 if r.partsRead == 0 { 427 // skip line 428 continue 429 } 430 431 // Consume the "\n" or "\r\n" separator between the 432 // body of the previous part and the boundary line we 433 // now expect will follow. (either a new part or the 434 // end boundary) 435 if bytes.Equal(line, r.nl) { 436 expectNewPart = true 437 continue 438 } 439 440 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 441 } 442 } 443 444 // isFinalBoundary reports whether line is the final boundary line 445 // indicating that all parts are over. 446 // It matches `^--boundary--[ \t]*(\r\n)?$` 447 func (r *Reader) isFinalBoundary(line []byte) bool { 448 if !bytes.HasPrefix(line, r.dashBoundaryDash) { 449 return false 450 } 451 rest := line[len(r.dashBoundaryDash):] 452 rest = skipLWSPChar(rest) 453 return len(rest) == 0 || bytes.Equal(rest, r.nl) 454 } 455 456 func (r *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 457 // https://tools.ietf.org/html/rfc2046#section-5.1 458 // The boundary delimiter line is then defined as a line 459 // consisting entirely of two hyphen characters ("-", 460 // decimal value 45) followed by the boundary parameter 461 // value from the Content-Type header field, optional linear 462 // whitespace, and a terminating CRLF. 463 if !bytes.HasPrefix(line, r.dashBoundary) { 464 return false 465 } 466 rest := line[len(r.dashBoundary):] 467 rest = skipLWSPChar(rest) 468 469 // On the first part, see our lines are ending in \n instead of \r\n 470 // and switch into that mode if so. This is a violation of the spec, 471 // but occurs in practice. 472 if r.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 473 r.nl = r.nl[1:] 474 r.nlDashBoundary = r.nlDashBoundary[1:] 475 } 476 return bytes.Equal(rest, r.nl) 477 } 478 479 // skipLWSPChar returns b with leading spaces and tabs removed. 480 // RFC 822 defines: 481 // 482 // LWSP-char = SPACE / HTAB 483 func skipLWSPChar(b []byte) []byte { 484 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 485 b = b[1:] 486 } 487 return b 488 }