github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/mime/multipart/multipart.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 */ 13 package multipart 14 15 import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "mime" 21 "mime/quotedprintable" 22 "net/textproto" 23 "strings" 24 ) 25 26 var emptyParams = make(map[string]string) 27 28 // This constant needs to be at least 76 for this package to work correctly. 29 // This is because \r\n--separator_of_len_70- would fill the buffer and it 30 // wouldn't be safe to consume a single byte from it. 31 const peekBufferSize = 4096 32 33 // A Part represents a single part in a multipart body. 34 type Part struct { 35 // The headers of the body, if any, with the keys canonicalized 36 // in the same fashion that the Go http.Request headers are. 37 // For example, "foo-bar" changes case to "Foo-Bar" 38 Header textproto.MIMEHeader 39 40 mr *Reader 41 42 disposition string 43 dispositionParams map[string]string 44 45 // r is either a reader directly reading from mr, or it's a 46 // wrapper around such a reader, decoding the 47 // Content-Transfer-Encoding 48 r io.Reader 49 50 n int // known data bytes waiting in mr.bufReader 51 total int64 // total data bytes read already 52 err error // error to return when n == 0 53 readErr error // read error observed from mr.bufReader 54 } 55 56 // FormName returns the name parameter if p has a Content-Disposition 57 // of type "form-data". Otherwise it returns the empty string. 58 func (p *Part) FormName() string { 59 // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF 60 // of Content-Disposition value format. 61 if p.dispositionParams == nil { 62 p.parseContentDisposition() 63 } 64 if p.disposition != "form-data" { 65 return "" 66 } 67 return p.dispositionParams["name"] 68 } 69 70 // FileName returns the filename parameter of the Part's 71 // Content-Disposition header. 72 func (p *Part) FileName() string { 73 if p.dispositionParams == nil { 74 p.parseContentDisposition() 75 } 76 return p.dispositionParams["filename"] 77 } 78 79 func (p *Part) parseContentDisposition() { 80 v := p.Header.Get("Content-Disposition") 81 var err error 82 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 83 if err != nil { 84 p.dispositionParams = emptyParams 85 } 86 } 87 88 // NewReader creates a new multipart Reader reading from r using the 89 // given MIME boundary. 90 // 91 // The boundary is usually obtained from the "boundary" parameter of 92 // the message's "Content-Type" header. Use mime.ParseMediaType to 93 // parse such headers. 94 func NewReader(r io.Reader, boundary string) *Reader { 95 b := []byte("\r\n--" + boundary + "--") 96 return &Reader{ 97 bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), 98 nl: b[:2], 99 nlDashBoundary: b[:len(b)-2], 100 dashBoundaryDash: b[2:], 101 dashBoundary: b[2 : len(b)-2], 102 } 103 } 104 105 // stickyErrorReader is an io.Reader which never calls Read on its 106 // underlying Reader once an error has been seen. (the io.Reader 107 // interface's contract promises nothing about the return values of 108 // Read calls after an error, yet this package does do multiple Reads 109 // after error) 110 type stickyErrorReader struct { 111 r io.Reader 112 err error 113 } 114 115 func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { 116 if r.err != nil { 117 return 0, r.err 118 } 119 n, r.err = r.r.Read(p) 120 return n, r.err 121 } 122 123 func newPart(mr *Reader, rawPart bool) (*Part, error) { 124 bp := &Part{ 125 Header: make(map[string][]string), 126 mr: mr, 127 } 128 if err := bp.populateHeaders(); err != nil { 129 return nil, err 130 } 131 bp.r = partReader{bp} 132 133 // rawPart is used to switch between Part.NextPart and Part.NextRawPart. 134 if !rawPart { 135 const cte = "Content-Transfer-Encoding" 136 if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { 137 bp.Header.Del(cte) 138 bp.r = quotedprintable.NewReader(bp.r) 139 } 140 } 141 return bp, nil 142 } 143 144 func (bp *Part) populateHeaders() error { 145 r := textproto.NewReader(bp.mr.bufReader) 146 header, err := r.ReadMIMEHeader() 147 if err == nil { 148 bp.Header = header 149 } 150 return err 151 } 152 153 // Read reads the body of a part, after its headers and before the 154 // next part (if any) begins. 155 func (p *Part) Read(d []byte) (n int, err error) { 156 return p.r.Read(d) 157 } 158 159 // partReader implements io.Reader by reading raw bytes directly from the 160 // wrapped *Part, without doing any Transfer-Encoding decoding. 161 type partReader struct { 162 p *Part 163 } 164 165 func (pr partReader) Read(d []byte) (int, error) { 166 p := pr.p 167 br := p.mr.bufReader 168 169 // Read into buffer until we identify some data to return, 170 // or we find a reason to stop (boundary or read error). 171 for p.n == 0 && p.err == nil { 172 peek, _ := br.Peek(br.Buffered()) 173 p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) 174 if p.n == 0 && p.err == nil { 175 // Force buffered I/O to read more into buffer. 176 _, p.readErr = br.Peek(len(peek) + 1) 177 if p.readErr == io.EOF { 178 p.readErr = io.ErrUnexpectedEOF 179 } 180 } 181 } 182 183 // Read out from "data to return" part of buffer. 184 if p.n == 0 { 185 return 0, p.err 186 } 187 n := len(d) 188 if n > p.n { 189 n = p.n 190 } 191 n, _ = br.Read(d[:n]) 192 p.total += int64(n) 193 p.n -= n 194 if p.n == 0 { 195 return n, p.err 196 } 197 return n, nil 198 } 199 200 // scanUntilBoundary scans buf to identify how much of it can be safely 201 // returned as part of the Part body. 202 // dashBoundary is "--boundary". 203 // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. 204 // The comments below (and the name) assume "\n--boundary", but either is accepted. 205 // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. 206 // readErr is the read error, if any, that followed reading the bytes in buf. 207 // scanUntilBoundary returns the number of data bytes from buf that can be 208 // returned as part of the Part body and also the error to return (if any) 209 // once those data bytes are done. 210 func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { 211 if total == 0 { 212 // At beginning of body, allow dashBoundary. 213 if bytes.HasPrefix(buf, dashBoundary) { 214 switch matchAfterPrefix(buf, dashBoundary, readErr) { 215 case -1: 216 return len(dashBoundary), nil 217 case 0: 218 return 0, nil 219 case +1: 220 return 0, io.EOF 221 } 222 } 223 if bytes.HasPrefix(dashBoundary, buf) { 224 return 0, readErr 225 } 226 } 227 228 // Search for "\n--boundary". 229 if i := bytes.Index(buf, nlDashBoundary); i >= 0 { 230 switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { 231 case -1: 232 return i + len(nlDashBoundary), nil 233 case 0: 234 return i, nil 235 case +1: 236 return i, io.EOF 237 } 238 } 239 if bytes.HasPrefix(nlDashBoundary, buf) { 240 return 0, readErr 241 } 242 243 // Otherwise, anything up to the final \n is not part of the boundary 244 // and so must be part of the body. 245 // Also if the section from the final \n onward is not a prefix of the boundary, 246 // it too must be part of the body. 247 i := bytes.LastIndexByte(buf, nlDashBoundary[0]) 248 if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { 249 return i, nil 250 } 251 return len(buf), readErr 252 } 253 254 // matchAfterPrefix checks whether buf should be considered to match the boundary. 255 // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", 256 // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. 257 // 258 // matchAfterPrefix returns +1 if the buffer does match the boundary, 259 // meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input. 260 // It returns -1 if the buffer definitely does NOT match the boundary, 261 // meaning the prefix is followed by some other character. 262 // For example, "--foobar" does not match "--foo". 263 // It returns 0 more input needs to be read to make the decision, 264 // meaning that len(buf) == len(prefix) and readErr == nil. 265 func matchAfterPrefix(buf, prefix []byte, readErr error) int { 266 if len(buf) == len(prefix) { 267 if readErr != nil { 268 return +1 269 } 270 return 0 271 } 272 c := buf[len(prefix)] 273 if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' { 274 return +1 275 } 276 return -1 277 } 278 279 func (p *Part) Close() error { 280 io.Copy(io.Discard, p) 281 return nil 282 } 283 284 // Reader is an iterator over parts in a MIME multipart body. 285 // Reader's underlying parser consumes its input as needed. Seeking 286 // isn't supported. 287 type Reader struct { 288 bufReader *bufio.Reader 289 290 currentPart *Part 291 partsRead int 292 293 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 294 nlDashBoundary []byte // nl + "--boundary" 295 dashBoundaryDash []byte // "--boundary--" 296 dashBoundary []byte // "--boundary" 297 } 298 299 // NextPart returns the next part in the multipart or an error. 300 // When there are no more parts, the error io.EOF is returned. 301 // 302 // As a special case, if the "Content-Transfer-Encoding" header 303 // has a value of "quoted-printable", that header is instead 304 // hidden and the body is transparently decoded during Read calls. 305 func (r *Reader) NextPart() (*Part, error) { 306 return r.nextPart(false) 307 } 308 309 // NextRawPart returns the next part in the multipart or an error. 310 // When there are no more parts, the error io.EOF is returned. 311 // 312 // Unlike NextPart, it does not have special handling for 313 // "Content-Transfer-Encoding: quoted-printable". 314 func (r *Reader) NextRawPart() (*Part, error) { 315 return r.nextPart(true) 316 } 317 318 func (r *Reader) nextPart(rawPart bool) (*Part, error) { 319 if r.currentPart != nil { 320 r.currentPart.Close() 321 } 322 if string(r.dashBoundary) == "--" { 323 return nil, fmt.Errorf("multipart: boundary is empty") 324 } 325 expectNewPart := false 326 for { 327 line, err := r.bufReader.ReadSlice('\n') 328 329 if err == io.EOF && r.isFinalBoundary(line) { 330 // If the buffer ends in "--boundary--" without the 331 // trailing "\r\n", ReadSlice will return an error 332 // (since it's missing the '\n'), but this is a valid 333 // multipart EOF so we need to return io.EOF instead of 334 // a fmt-wrapped one. 335 return nil, io.EOF 336 } 337 if err != nil { 338 return nil, fmt.Errorf("multipart: NextPart: %v", err) 339 } 340 341 if r.isBoundaryDelimiterLine(line) { 342 r.partsRead++ 343 bp, err := newPart(r, rawPart) 344 if err != nil { 345 return nil, err 346 } 347 r.currentPart = bp 348 return bp, nil 349 } 350 351 if r.isFinalBoundary(line) { 352 // Expected EOF 353 return nil, io.EOF 354 } 355 356 if expectNewPart { 357 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 358 } 359 360 if r.partsRead == 0 { 361 // skip line 362 continue 363 } 364 365 // Consume the "\n" or "\r\n" separator between the 366 // body of the previous part and the boundary line we 367 // now expect will follow. (either a new part or the 368 // end boundary) 369 if bytes.Equal(line, r.nl) { 370 expectNewPart = true 371 continue 372 } 373 374 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 375 } 376 } 377 378 // isFinalBoundary reports whether line is the final boundary line 379 // indicating that all parts are over. 380 // It matches `^--boundary--[ \t]*(\r\n)?$` 381 func (mr *Reader) isFinalBoundary(line []byte) bool { 382 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 383 return false 384 } 385 rest := line[len(mr.dashBoundaryDash):] 386 rest = skipLWSPChar(rest) 387 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 388 } 389 390 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 391 // https://tools.ietf.org/html/rfc2046#section-5.1 392 // The boundary delimiter line is then defined as a line 393 // consisting entirely of two hyphen characters ("-", 394 // decimal value 45) followed by the boundary parameter 395 // value from the Content-Type header field, optional linear 396 // whitespace, and a terminating CRLF. 397 if !bytes.HasPrefix(line, mr.dashBoundary) { 398 return false 399 } 400 rest := line[len(mr.dashBoundary):] 401 rest = skipLWSPChar(rest) 402 403 // On the first part, see our lines are ending in \n instead of \r\n 404 // and switch into that mode if so. This is a violation of the spec, 405 // but occurs in practice. 406 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 407 mr.nl = mr.nl[1:] 408 mr.nlDashBoundary = mr.nlDashBoundary[1:] 409 } 410 return bytes.Equal(rest, mr.nl) 411 } 412 413 // skipLWSPChar returns b with leading spaces and tabs removed. 414 // RFC 822 defines: 415 // LWSP-char = SPACE / HTAB 416 func skipLWSPChar(b []byte) []byte { 417 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 418 b = b[1:] 419 } 420 return b 421 }