github.com/d4l3k/go@v0.0.0-20151015000803-65fc379daeda/src/mime/multipart/multipart.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 */ 13 package multipart 14 15 import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "mime/quotedprintable" 23 "net/textproto" 24 ) 25 26 var emptyParams = make(map[string]string) 27 28 // This constant needs to be at least 76 for this package to work correctly. 29 // This is because \r\n--separator_of_len_70- would fill the buffer and it 30 // wouldn't be safe to consume a single byte from it. 31 const peekBufferSize = 4096 32 33 // A Part represents a single part in a multipart body. 34 type Part struct { 35 // The headers of the body, if any, with the keys canonicalized 36 // in the same fashion that the Go http.Request headers are. 37 // For example, "foo-bar" changes case to "Foo-Bar" 38 // 39 // As a special case, if the "Content-Transfer-Encoding" header 40 // has a value of "quoted-printable", that header is instead 41 // hidden from this map and the body is transparently decoded 42 // during Read calls. 43 Header textproto.MIMEHeader 44 45 buffer *bytes.Buffer 46 mr *Reader 47 bytesRead int 48 49 disposition string 50 dispositionParams map[string]string 51 52 // r is either a reader directly reading from mr, or it's a 53 // wrapper around such a reader, decoding the 54 // Content-Transfer-Encoding 55 r io.Reader 56 } 57 58 // FormName returns the name parameter if p has a Content-Disposition 59 // of type "form-data". Otherwise it returns the empty string. 60 func (p *Part) FormName() string { 61 // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF 62 // of Content-Disposition value format. 63 if p.dispositionParams == nil { 64 p.parseContentDisposition() 65 } 66 if p.disposition != "form-data" { 67 return "" 68 } 69 return p.dispositionParams["name"] 70 } 71 72 // FileName returns the filename parameter of the Part's 73 // Content-Disposition header. 74 func (p *Part) FileName() string { 75 if p.dispositionParams == nil { 76 p.parseContentDisposition() 77 } 78 return p.dispositionParams["filename"] 79 } 80 81 func (p *Part) parseContentDisposition() { 82 v := p.Header.Get("Content-Disposition") 83 var err error 84 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 85 if err != nil { 86 p.dispositionParams = emptyParams 87 } 88 } 89 90 // NewReader creates a new multipart Reader reading from r using the 91 // given MIME boundary. 92 // 93 // The boundary is usually obtained from the "boundary" parameter of 94 // the message's "Content-Type" header. Use mime.ParseMediaType to 95 // parse such headers. 96 func NewReader(r io.Reader, boundary string) *Reader { 97 b := []byte("\r\n--" + boundary + "--") 98 return &Reader{ 99 bufReader: bufio.NewReaderSize(r, peekBufferSize), 100 nl: b[:2], 101 nlDashBoundary: b[:len(b)-2], 102 dashBoundaryDash: b[2:], 103 dashBoundary: b[2 : len(b)-2], 104 } 105 } 106 107 func newPart(mr *Reader) (*Part, error) { 108 bp := &Part{ 109 Header: make(map[string][]string), 110 mr: mr, 111 buffer: new(bytes.Buffer), 112 } 113 if err := bp.populateHeaders(); err != nil { 114 return nil, err 115 } 116 bp.r = partReader{bp} 117 const cte = "Content-Transfer-Encoding" 118 if bp.Header.Get(cte) == "quoted-printable" { 119 bp.Header.Del(cte) 120 bp.r = quotedprintable.NewReader(bp.r) 121 } 122 return bp, nil 123 } 124 125 func (bp *Part) populateHeaders() error { 126 r := textproto.NewReader(bp.mr.bufReader) 127 header, err := r.ReadMIMEHeader() 128 if err == nil { 129 bp.Header = header 130 } 131 return err 132 } 133 134 // Read reads the body of a part, after its headers and before the 135 // next part (if any) begins. 136 func (p *Part) Read(d []byte) (n int, err error) { 137 return p.r.Read(d) 138 } 139 140 // partReader implements io.Reader by reading raw bytes directly from the 141 // wrapped *Part, without doing any Transfer-Encoding decoding. 142 type partReader struct { 143 p *Part 144 } 145 146 func (pr partReader) Read(d []byte) (n int, err error) { 147 p := pr.p 148 defer func() { 149 p.bytesRead += n 150 }() 151 if p.buffer.Len() >= len(d) { 152 // Internal buffer of unconsumed data is large enough for 153 // the read request. No need to parse more at the moment. 154 return p.buffer.Read(d) 155 } 156 peek, err := p.mr.bufReader.Peek(peekBufferSize) // TODO(bradfitz): add buffer size accessor 157 158 // Look for an immediate empty part without a leading \r\n 159 // before the boundary separator. Some MIME code makes empty 160 // parts like this. Most browsers, however, write the \r\n 161 // before the subsequent boundary even for empty parts and 162 // won't hit this path. 163 if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { 164 return 0, io.EOF 165 } 166 unexpectedEOF := err == io.EOF 167 if err != nil && !unexpectedEOF { 168 return 0, fmt.Errorf("multipart: Part Read: %v", err) 169 } 170 if peek == nil { 171 panic("nil peek buf") 172 } 173 // Search the peek buffer for "\r\n--boundary". If found, 174 // consume everything up to the boundary. If not, consume only 175 // as much of the peek buffer as cannot hold the boundary 176 // string. 177 nCopy := 0 178 foundBoundary := false 179 if idx, isEnd := p.mr.peekBufferSeparatorIndex(peek); idx != -1 { 180 nCopy = idx 181 foundBoundary = isEnd 182 if !isEnd && nCopy == 0 { 183 nCopy = 1 // make some progress. 184 } 185 } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { 186 nCopy = safeCount 187 } else if unexpectedEOF { 188 // If we've run out of peek buffer and the boundary 189 // wasn't found (and can't possibly fit), we must have 190 // hit the end of the file unexpectedly. 191 return 0, io.ErrUnexpectedEOF 192 } 193 if nCopy > 0 { 194 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil { 195 return 0, err 196 } 197 } 198 n, err = p.buffer.Read(d) 199 if err == io.EOF && !foundBoundary { 200 // If the boundary hasn't been reached there's more to 201 // read, so don't pass through an EOF from the buffer 202 err = nil 203 } 204 return 205 } 206 207 func (p *Part) Close() error { 208 io.Copy(ioutil.Discard, p) 209 return nil 210 } 211 212 // Reader is an iterator over parts in a MIME multipart body. 213 // Reader's underlying parser consumes its input as needed. Seeking 214 // isn't supported. 215 type Reader struct { 216 bufReader *bufio.Reader 217 218 currentPart *Part 219 partsRead int 220 221 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 222 nlDashBoundary []byte // nl + "--boundary" 223 dashBoundaryDash []byte // "--boundary--" 224 dashBoundary []byte // "--boundary" 225 } 226 227 // NextPart returns the next part in the multipart or an error. 228 // When there are no more parts, the error io.EOF is returned. 229 func (r *Reader) NextPart() (*Part, error) { 230 if r.currentPart != nil { 231 r.currentPart.Close() 232 } 233 234 expectNewPart := false 235 for { 236 line, err := r.bufReader.ReadSlice('\n') 237 238 if err == io.EOF && r.isFinalBoundary(line) { 239 // If the buffer ends in "--boundary--" without the 240 // trailing "\r\n", ReadSlice will return an error 241 // (since it's missing the '\n'), but this is a valid 242 // multipart EOF so we need to return io.EOF instead of 243 // a fmt-wrapped one. 244 return nil, io.EOF 245 } 246 if err != nil { 247 return nil, fmt.Errorf("multipart: NextPart: %v", err) 248 } 249 250 if r.isBoundaryDelimiterLine(line) { 251 r.partsRead++ 252 bp, err := newPart(r) 253 if err != nil { 254 return nil, err 255 } 256 r.currentPart = bp 257 return bp, nil 258 } 259 260 if r.isFinalBoundary(line) { 261 // Expected EOF 262 return nil, io.EOF 263 } 264 265 if expectNewPart { 266 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 267 } 268 269 if r.partsRead == 0 { 270 // skip line 271 continue 272 } 273 274 // Consume the "\n" or "\r\n" separator between the 275 // body of the previous part and the boundary line we 276 // now expect will follow. (either a new part or the 277 // end boundary) 278 if bytes.Equal(line, r.nl) { 279 expectNewPart = true 280 continue 281 } 282 283 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 284 } 285 } 286 287 // isFinalBoundary reports whether line is the final boundary line 288 // indicating that all parts are over. 289 // It matches `^--boundary--[ \t]*(\r\n)?$` 290 func (mr *Reader) isFinalBoundary(line []byte) bool { 291 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 292 return false 293 } 294 rest := line[len(mr.dashBoundaryDash):] 295 rest = skipLWSPChar(rest) 296 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 297 } 298 299 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 300 // http://tools.ietf.org/html/rfc2046#section-5.1 301 // The boundary delimiter line is then defined as a line 302 // consisting entirely of two hyphen characters ("-", 303 // decimal value 45) followed by the boundary parameter 304 // value from the Content-Type header field, optional linear 305 // whitespace, and a terminating CRLF. 306 if !bytes.HasPrefix(line, mr.dashBoundary) { 307 return false 308 } 309 rest := line[len(mr.dashBoundary):] 310 rest = skipLWSPChar(rest) 311 312 // On the first part, see our lines are ending in \n instead of \r\n 313 // and switch into that mode if so. This is a violation of the spec, 314 // but occurs in practice. 315 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 316 mr.nl = mr.nl[1:] 317 mr.nlDashBoundary = mr.nlDashBoundary[1:] 318 } 319 return bytes.Equal(rest, mr.nl) 320 } 321 322 // peekBufferIsEmptyPart reports whether the provided peek-ahead 323 // buffer represents an empty part. It is called only if we've not 324 // already read any bytes in this part and checks for the case of MIME 325 // software not writing the \r\n on empty parts. Some does, some 326 // doesn't. 327 // 328 // This checks that what follows the "--boundary" is actually the end 329 // ("--boundary--" with optional whitespace) or optional whitespace 330 // and then a newline, so we don't catch "--boundaryFAKE", in which 331 // case the whole line is part of the data. 332 func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool { 333 // End of parts case. 334 // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)` 335 if bytes.HasPrefix(peek, mr.dashBoundaryDash) { 336 rest := peek[len(mr.dashBoundaryDash):] 337 rest = skipLWSPChar(rest) 338 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0 339 } 340 if !bytes.HasPrefix(peek, mr.dashBoundary) { 341 return false 342 } 343 // Test whether rest matches `^[ \t]*\r\n`) 344 rest := peek[len(mr.dashBoundary):] 345 rest = skipLWSPChar(rest) 346 return bytes.HasPrefix(rest, mr.nl) 347 } 348 349 // peekBufferSeparatorIndex returns the index of mr.nlDashBoundary in 350 // peek and whether it is a real boundary (and not a prefix of an 351 // unrelated separator). To be the end, the peek buffer must contain a 352 // newline after the boundary or contain the ending boundary (--separator--). 353 func (mr *Reader) peekBufferSeparatorIndex(peek []byte) (idx int, isEnd bool) { 354 idx = bytes.Index(peek, mr.nlDashBoundary) 355 if idx == -1 { 356 return 357 } 358 359 peek = peek[idx+len(mr.nlDashBoundary):] 360 if len(peek) == 0 || len(peek) == 1 && peek[0] == '-' { 361 return idx, false 362 } 363 if len(peek) > 1 && peek[0] == '-' && peek[1] == '-' { 364 return idx, true 365 } 366 peek = skipLWSPChar(peek) 367 // Don't have a complete line after the peek. 368 if bytes.IndexByte(peek, '\n') == -1 { 369 return idx, false 370 } 371 if len(peek) > 0 && peek[0] == '\n' { 372 return idx, true 373 } 374 if len(peek) > 1 && peek[0] == '\r' && peek[1] == '\n' { 375 return idx, true 376 } 377 return idx, false 378 } 379 380 // skipLWSPChar returns b with leading spaces and tabs removed. 381 // RFC 822 defines: 382 // LWSP-char = SPACE / HTAB 383 func skipLWSPChar(b []byte) []byte { 384 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 385 b = b[1:] 386 } 387 return b 388 }