github.com/euank/go@v0.0.0-20160829210321-495514729181/src/mime/multipart/multipart.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 */ 13 package multipart 14 15 import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "mime/quotedprintable" 23 "net/textproto" 24 ) 25 26 var emptyParams = make(map[string]string) 27 28 // This constant needs to be at least 76 for this package to work correctly. 29 // This is because \r\n--separator_of_len_70- would fill the buffer and it 30 // wouldn't be safe to consume a single byte from it. 31 const peekBufferSize = 4096 32 33 // A Part represents a single part in a multipart body. 34 type Part struct { 35 // The headers of the body, if any, with the keys canonicalized 36 // in the same fashion that the Go http.Request headers are. 37 // For example, "foo-bar" changes case to "Foo-Bar" 38 // 39 // As a special case, if the "Content-Transfer-Encoding" header 40 // has a value of "quoted-printable", that header is instead 41 // hidden from this map and the body is transparently decoded 42 // during Read calls. 43 Header textproto.MIMEHeader 44 45 buffer *bytes.Buffer 46 mr *Reader 47 bytesRead int 48 49 disposition string 50 dispositionParams map[string]string 51 52 // r is either a reader directly reading from mr, or it's a 53 // wrapper around such a reader, decoding the 54 // Content-Transfer-Encoding 55 r io.Reader 56 } 57 58 // FormName returns the name parameter if p has a Content-Disposition 59 // of type "form-data". Otherwise it returns the empty string. 60 func (p *Part) FormName() string { 61 // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF 62 // of Content-Disposition value format. 63 if p.dispositionParams == nil { 64 p.parseContentDisposition() 65 } 66 if p.disposition != "form-data" { 67 return "" 68 } 69 return p.dispositionParams["name"] 70 } 71 72 // FileName returns the filename parameter of the Part's 73 // Content-Disposition header. 74 func (p *Part) FileName() string { 75 if p.dispositionParams == nil { 76 p.parseContentDisposition() 77 } 78 return p.dispositionParams["filename"] 79 } 80 81 func (p *Part) parseContentDisposition() { 82 v := p.Header.Get("Content-Disposition") 83 var err error 84 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 85 if err != nil { 86 p.dispositionParams = emptyParams 87 } 88 } 89 90 // NewReader creates a new multipart Reader reading from r using the 91 // given MIME boundary. 92 // 93 // The boundary is usually obtained from the "boundary" parameter of 94 // the message's "Content-Type" header. Use mime.ParseMediaType to 95 // parse such headers. 96 func NewReader(r io.Reader, boundary string) *Reader { 97 b := []byte("\r\n--" + boundary + "--") 98 return &Reader{ 99 bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), 100 nl: b[:2], 101 nlDashBoundary: b[:len(b)-2], 102 dashBoundaryDash: b[2:], 103 dashBoundary: b[2 : len(b)-2], 104 } 105 } 106 107 // stickyErrorReader is an io.Reader which never calls Read on its 108 // underlying Reader once an error has been seen. (the io.Reader 109 // interface's contract promises nothing about the return values of 110 // Read calls after an error, yet this package does do multiple Reads 111 // after error) 112 type stickyErrorReader struct { 113 r io.Reader 114 err error 115 } 116 117 func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { 118 if r.err != nil { 119 return 0, r.err 120 } 121 n, r.err = r.r.Read(p) 122 return n, r.err 123 } 124 125 func newPart(mr *Reader) (*Part, error) { 126 bp := &Part{ 127 Header: make(map[string][]string), 128 mr: mr, 129 buffer: new(bytes.Buffer), 130 } 131 if err := bp.populateHeaders(); err != nil { 132 return nil, err 133 } 134 bp.r = partReader{bp} 135 const cte = "Content-Transfer-Encoding" 136 if bp.Header.Get(cte) == "quoted-printable" { 137 bp.Header.Del(cte) 138 bp.r = quotedprintable.NewReader(bp.r) 139 } 140 return bp, nil 141 } 142 143 func (bp *Part) populateHeaders() error { 144 r := textproto.NewReader(bp.mr.bufReader) 145 header, err := r.ReadMIMEHeader() 146 if err == nil { 147 bp.Header = header 148 } 149 return err 150 } 151 152 // Read reads the body of a part, after its headers and before the 153 // next part (if any) begins. 154 func (p *Part) Read(d []byte) (n int, err error) { 155 return p.r.Read(d) 156 } 157 158 // partReader implements io.Reader by reading raw bytes directly from the 159 // wrapped *Part, without doing any Transfer-Encoding decoding. 160 type partReader struct { 161 p *Part 162 } 163 164 func (pr partReader) Read(d []byte) (n int, err error) { 165 p := pr.p 166 defer func() { 167 p.bytesRead += n 168 }() 169 if p.buffer.Len() >= len(d) { 170 // Internal buffer of unconsumed data is large enough for 171 // the read request. No need to parse more at the moment. 172 return p.buffer.Read(d) 173 } 174 peek, err := p.mr.bufReader.Peek(peekBufferSize) // TODO(bradfitz): add buffer size accessor 175 176 // Look for an immediate empty part without a leading \r\n 177 // before the boundary separator. Some MIME code makes empty 178 // parts like this. Most browsers, however, write the \r\n 179 // before the subsequent boundary even for empty parts and 180 // won't hit this path. 181 if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { 182 return 0, io.EOF 183 } 184 unexpectedEOF := err == io.EOF 185 if err != nil && !unexpectedEOF { 186 return 0, fmt.Errorf("multipart: Part Read: %v", err) 187 } 188 if peek == nil { 189 panic("nil peek buf") 190 } 191 // Search the peek buffer for "\r\n--boundary". If found, 192 // consume everything up to the boundary. If not, consume only 193 // as much of the peek buffer as cannot hold the boundary 194 // string. 195 nCopy := 0 196 foundBoundary := false 197 if idx, isEnd := p.mr.peekBufferSeparatorIndex(peek); idx != -1 { 198 nCopy = idx 199 foundBoundary = isEnd 200 if !isEnd && nCopy == 0 { 201 nCopy = 1 // make some progress. 202 } 203 } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { 204 nCopy = safeCount 205 } else if unexpectedEOF { 206 // If we've run out of peek buffer and the boundary 207 // wasn't found (and can't possibly fit), we must have 208 // hit the end of the file unexpectedly. 209 return 0, io.ErrUnexpectedEOF 210 } 211 if nCopy > 0 { 212 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil { 213 return 0, err 214 } 215 } 216 n, err = p.buffer.Read(d) 217 if err == io.EOF && !foundBoundary { 218 // If the boundary hasn't been reached there's more to 219 // read, so don't pass through an EOF from the buffer 220 err = nil 221 } 222 return 223 } 224 225 func (p *Part) Close() error { 226 io.Copy(ioutil.Discard, p) 227 return nil 228 } 229 230 // Reader is an iterator over parts in a MIME multipart body. 231 // Reader's underlying parser consumes its input as needed. Seeking 232 // isn't supported. 233 type Reader struct { 234 bufReader *bufio.Reader 235 236 currentPart *Part 237 partsRead int 238 239 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 240 nlDashBoundary []byte // nl + "--boundary" 241 dashBoundaryDash []byte // "--boundary--" 242 dashBoundary []byte // "--boundary" 243 } 244 245 // NextPart returns the next part in the multipart or an error. 246 // When there are no more parts, the error io.EOF is returned. 247 func (r *Reader) NextPart() (*Part, error) { 248 if r.currentPart != nil { 249 r.currentPart.Close() 250 } 251 252 expectNewPart := false 253 for { 254 line, err := r.bufReader.ReadSlice('\n') 255 256 if err == io.EOF && r.isFinalBoundary(line) { 257 // If the buffer ends in "--boundary--" without the 258 // trailing "\r\n", ReadSlice will return an error 259 // (since it's missing the '\n'), but this is a valid 260 // multipart EOF so we need to return io.EOF instead of 261 // a fmt-wrapped one. 262 return nil, io.EOF 263 } 264 if err != nil { 265 return nil, fmt.Errorf("multipart: NextPart: %v", err) 266 } 267 268 if r.isBoundaryDelimiterLine(line) { 269 r.partsRead++ 270 bp, err := newPart(r) 271 if err != nil { 272 return nil, err 273 } 274 r.currentPart = bp 275 return bp, nil 276 } 277 278 if r.isFinalBoundary(line) { 279 // Expected EOF 280 return nil, io.EOF 281 } 282 283 if expectNewPart { 284 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 285 } 286 287 if r.partsRead == 0 { 288 // skip line 289 continue 290 } 291 292 // Consume the "\n" or "\r\n" separator between the 293 // body of the previous part and the boundary line we 294 // now expect will follow. (either a new part or the 295 // end boundary) 296 if bytes.Equal(line, r.nl) { 297 expectNewPart = true 298 continue 299 } 300 301 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 302 } 303 } 304 305 // isFinalBoundary reports whether line is the final boundary line 306 // indicating that all parts are over. 307 // It matches `^--boundary--[ \t]*(\r\n)?$` 308 func (mr *Reader) isFinalBoundary(line []byte) bool { 309 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 310 return false 311 } 312 rest := line[len(mr.dashBoundaryDash):] 313 rest = skipLWSPChar(rest) 314 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 315 } 316 317 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 318 // http://tools.ietf.org/html/rfc2046#section-5.1 319 // The boundary delimiter line is then defined as a line 320 // consisting entirely of two hyphen characters ("-", 321 // decimal value 45) followed by the boundary parameter 322 // value from the Content-Type header field, optional linear 323 // whitespace, and a terminating CRLF. 324 if !bytes.HasPrefix(line, mr.dashBoundary) { 325 return false 326 } 327 rest := line[len(mr.dashBoundary):] 328 rest = skipLWSPChar(rest) 329 330 // On the first part, see our lines are ending in \n instead of \r\n 331 // and switch into that mode if so. This is a violation of the spec, 332 // but occurs in practice. 333 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 334 mr.nl = mr.nl[1:] 335 mr.nlDashBoundary = mr.nlDashBoundary[1:] 336 } 337 return bytes.Equal(rest, mr.nl) 338 } 339 340 // peekBufferIsEmptyPart reports whether the provided peek-ahead 341 // buffer represents an empty part. It is called only if we've not 342 // already read any bytes in this part and checks for the case of MIME 343 // software not writing the \r\n on empty parts. Some does, some 344 // doesn't. 345 // 346 // This checks that what follows the "--boundary" is actually the end 347 // ("--boundary--" with optional whitespace) or optional whitespace 348 // and then a newline, so we don't catch "--boundaryFAKE", in which 349 // case the whole line is part of the data. 350 func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool { 351 // End of parts case. 352 // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)` 353 if bytes.HasPrefix(peek, mr.dashBoundaryDash) { 354 rest := peek[len(mr.dashBoundaryDash):] 355 rest = skipLWSPChar(rest) 356 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0 357 } 358 if !bytes.HasPrefix(peek, mr.dashBoundary) { 359 return false 360 } 361 // Test whether rest matches `^[ \t]*\r\n`) 362 rest := peek[len(mr.dashBoundary):] 363 rest = skipLWSPChar(rest) 364 return bytes.HasPrefix(rest, mr.nl) 365 } 366 367 // peekBufferSeparatorIndex returns the index of mr.nlDashBoundary in 368 // peek and whether it is a real boundary (and not a prefix of an 369 // unrelated separator). To be the end, the peek buffer must contain a 370 // newline after the boundary or contain the ending boundary (--separator--). 371 func (mr *Reader) peekBufferSeparatorIndex(peek []byte) (idx int, isEnd bool) { 372 idx = bytes.Index(peek, mr.nlDashBoundary) 373 if idx == -1 { 374 return 375 } 376 377 peek = peek[idx+len(mr.nlDashBoundary):] 378 if len(peek) == 0 || len(peek) == 1 && peek[0] == '-' { 379 return idx, false 380 } 381 if len(peek) > 1 && peek[0] == '-' && peek[1] == '-' { 382 return idx, true 383 } 384 peek = skipLWSPChar(peek) 385 // Don't have a complete line after the peek. 386 if bytes.IndexByte(peek, '\n') == -1 { 387 return idx, false 388 } 389 if len(peek) > 0 && peek[0] == '\n' { 390 return idx, true 391 } 392 if len(peek) > 1 && peek[0] == '\r' && peek[1] == '\n' { 393 return idx, true 394 } 395 return idx, false 396 } 397 398 // skipLWSPChar returns b with leading spaces and tabs removed. 399 // RFC 822 defines: 400 // LWSP-char = SPACE / HTAB 401 func skipLWSPChar(b []byte) []byte { 402 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 403 b = b[1:] 404 } 405 return b 406 }