github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/mime/multipart/multipart.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 */ 13 package multipart 14 15 import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "mime/internal/quotedprintable" 23 "net/textproto" 24 ) 25 26 var emptyParams = make(map[string]string) 27 28 // A Part represents a single part in a multipart body. 29 type Part struct { 30 // The headers of the body, if any, with the keys canonicalized 31 // in the same fashion that the Go http.Request headers are. 32 // For example, "foo-bar" changes case to "Foo-Bar" 33 // 34 // As a special case, if the "Content-Transfer-Encoding" header 35 // has a value of "quoted-printable", that header is instead 36 // hidden from this map and the body is transparently decoded 37 // during Read calls. 38 Header textproto.MIMEHeader 39 40 buffer *bytes.Buffer 41 mr *Reader 42 bytesRead int 43 44 disposition string 45 dispositionParams map[string]string 46 47 // r is either a reader directly reading from mr, or it's a 48 // wrapper around such a reader, decoding the 49 // Content-Transfer-Encoding 50 r io.Reader 51 } 52 53 // FormName returns the name parameter if p has a Content-Disposition 54 // of type "form-data". Otherwise it returns the empty string. 55 func (p *Part) FormName() string { 56 // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF 57 // of Content-Disposition value format. 58 if p.dispositionParams == nil { 59 p.parseContentDisposition() 60 } 61 if p.disposition != "form-data" { 62 return "" 63 } 64 return p.dispositionParams["name"] 65 } 66 67 // FileName returns the filename parameter of the Part's 68 // Content-Disposition header. 69 func (p *Part) FileName() string { 70 if p.dispositionParams == nil { 71 p.parseContentDisposition() 72 } 73 return p.dispositionParams["filename"] 74 } 75 76 func (p *Part) parseContentDisposition() { 77 v := p.Header.Get("Content-Disposition") 78 var err error 79 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 80 if err != nil { 81 p.dispositionParams = emptyParams 82 } 83 } 84 85 // NewReader creates a new multipart Reader reading from r using the 86 // given MIME boundary. 87 // 88 // The boundary is usually obtained from the "boundary" parameter of 89 // the message's "Content-Type" header. Use mime.ParseMediaType to 90 // parse such headers. 91 func NewReader(r io.Reader, boundary string) *Reader { 92 b := []byte("\r\n--" + boundary + "--") 93 return &Reader{ 94 bufReader: bufio.NewReader(r), 95 nl: b[:2], 96 nlDashBoundary: b[:len(b)-2], 97 dashBoundaryDash: b[2:], 98 dashBoundary: b[2 : len(b)-2], 99 } 100 } 101 102 func newPart(mr *Reader) (*Part, error) { 103 bp := &Part{ 104 Header: make(map[string][]string), 105 mr: mr, 106 buffer: new(bytes.Buffer), 107 } 108 if err := bp.populateHeaders(); err != nil { 109 return nil, err 110 } 111 bp.r = partReader{bp} 112 const cte = "Content-Transfer-Encoding" 113 if bp.Header.Get(cte) == "quoted-printable" { 114 bp.Header.Del(cte) 115 bp.r = quotedprintable.NewReader(bp.r) 116 } 117 return bp, nil 118 } 119 120 func (bp *Part) populateHeaders() error { 121 r := textproto.NewReader(bp.mr.bufReader) 122 header, err := r.ReadMIMEHeader() 123 if err == nil { 124 bp.Header = header 125 } 126 return err 127 } 128 129 // Read reads the body of a part, after its headers and before the 130 // next part (if any) begins. 131 func (p *Part) Read(d []byte) (n int, err error) { 132 return p.r.Read(d) 133 } 134 135 // partReader implements io.Reader by reading raw bytes directly from the 136 // wrapped *Part, without doing any Transfer-Encoding decoding. 137 type partReader struct { 138 p *Part 139 } 140 141 func (pr partReader) Read(d []byte) (n int, err error) { 142 p := pr.p 143 defer func() { 144 p.bytesRead += n 145 }() 146 if p.buffer.Len() >= len(d) { 147 // Internal buffer of unconsumed data is large enough for 148 // the read request. No need to parse more at the moment. 149 return p.buffer.Read(d) 150 } 151 peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor 152 153 // Look for an immediate empty part without a leading \r\n 154 // before the boundary separator. Some MIME code makes empty 155 // parts like this. Most browsers, however, write the \r\n 156 // before the subsequent boundary even for empty parts and 157 // won't hit this path. 158 if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { 159 return 0, io.EOF 160 } 161 unexpectedEOF := err == io.EOF 162 if err != nil && !unexpectedEOF { 163 return 0, fmt.Errorf("multipart: Part Read: %v", err) 164 } 165 if peek == nil { 166 panic("nil peek buf") 167 } 168 169 // Search the peek buffer for "\r\n--boundary". If found, 170 // consume everything up to the boundary. If not, consume only 171 // as much of the peek buffer as cannot hold the boundary 172 // string. 173 nCopy := 0 174 foundBoundary := false 175 if idx := bytes.Index(peek, p.mr.nlDashBoundary); idx != -1 { 176 nCopy = idx 177 foundBoundary = true 178 } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { 179 nCopy = safeCount 180 } else if unexpectedEOF { 181 // If we've run out of peek buffer and the boundary 182 // wasn't found (and can't possibly fit), we must have 183 // hit the end of the file unexpectedly. 184 return 0, io.ErrUnexpectedEOF 185 } 186 if nCopy > 0 { 187 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil { 188 return 0, err 189 } 190 } 191 n, err = p.buffer.Read(d) 192 if err == io.EOF && !foundBoundary { 193 // If the boundary hasn't been reached there's more to 194 // read, so don't pass through an EOF from the buffer 195 err = nil 196 } 197 return 198 } 199 200 func (p *Part) Close() error { 201 io.Copy(ioutil.Discard, p) 202 return nil 203 } 204 205 // Reader is an iterator over parts in a MIME multipart body. 206 // Reader's underlying parser consumes its input as needed. Seeking 207 // isn't supported. 208 type Reader struct { 209 bufReader *bufio.Reader 210 211 currentPart *Part 212 partsRead int 213 214 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 215 nlDashBoundary []byte // nl + "--boundary" 216 dashBoundaryDash []byte // "--boundary--" 217 dashBoundary []byte // "--boundary" 218 } 219 220 // NextPart returns the next part in the multipart or an error. 221 // When there are no more parts, the error io.EOF is returned. 222 func (r *Reader) NextPart() (*Part, error) { 223 if r.currentPart != nil { 224 r.currentPart.Close() 225 } 226 227 expectNewPart := false 228 for { 229 line, err := r.bufReader.ReadSlice('\n') 230 if err == io.EOF && r.isFinalBoundary(line) { 231 // If the buffer ends in "--boundary--" without the 232 // trailing "\r\n", ReadSlice will return an error 233 // (since it's missing the '\n'), but this is a valid 234 // multipart EOF so we need to return io.EOF instead of 235 // a fmt-wrapped one. 236 return nil, io.EOF 237 } 238 if err != nil { 239 return nil, fmt.Errorf("multipart: NextPart: %v", err) 240 } 241 242 if r.isBoundaryDelimiterLine(line) { 243 r.partsRead++ 244 bp, err := newPart(r) 245 if err != nil { 246 return nil, err 247 } 248 r.currentPart = bp 249 return bp, nil 250 } 251 252 if r.isFinalBoundary(line) { 253 // Expected EOF 254 return nil, io.EOF 255 } 256 257 if expectNewPart { 258 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 259 } 260 261 if r.partsRead == 0 { 262 // skip line 263 continue 264 } 265 266 // Consume the "\n" or "\r\n" separator between the 267 // body of the previous part and the boundary line we 268 // now expect will follow. (either a new part or the 269 // end boundary) 270 if bytes.Equal(line, r.nl) { 271 expectNewPart = true 272 continue 273 } 274 275 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 276 } 277 } 278 279 // isFinalBoundary reports whether line is the final boundary line 280 // indicating that all parts are over. 281 // It matches `^--boundary--[ \t]*(\r\n)?$` 282 func (mr *Reader) isFinalBoundary(line []byte) bool { 283 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 284 return false 285 } 286 rest := line[len(mr.dashBoundaryDash):] 287 rest = skipLWSPChar(rest) 288 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 289 } 290 291 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 292 // http://tools.ietf.org/html/rfc2046#section-5.1 293 // The boundary delimiter line is then defined as a line 294 // consisting entirely of two hyphen characters ("-", 295 // decimal value 45) followed by the boundary parameter 296 // value from the Content-Type header field, optional linear 297 // whitespace, and a terminating CRLF. 298 if !bytes.HasPrefix(line, mr.dashBoundary) { 299 return false 300 } 301 rest := line[len(mr.dashBoundary):] 302 rest = skipLWSPChar(rest) 303 304 // On the first part, see our lines are ending in \n instead of \r\n 305 // and switch into that mode if so. This is a violation of the spec, 306 // but occurs in practice. 307 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 308 mr.nl = mr.nl[1:] 309 mr.nlDashBoundary = mr.nlDashBoundary[1:] 310 } 311 return bytes.Equal(rest, mr.nl) 312 } 313 314 // peekBufferIsEmptyPart reports whether the provided peek-ahead 315 // buffer represents an empty part. It is called only if we've not 316 // already read any bytes in this part and checks for the case of MIME 317 // software not writing the \r\n on empty parts. Some does, some 318 // doesn't. 319 // 320 // This checks that what follows the "--boundary" is actually the end 321 // ("--boundary--" with optional whitespace) or optional whitespace 322 // and then a newline, so we don't catch "--boundaryFAKE", in which 323 // case the whole line is part of the data. 324 func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool { 325 // End of parts case. 326 // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)` 327 if bytes.HasPrefix(peek, mr.dashBoundaryDash) { 328 rest := peek[len(mr.dashBoundaryDash):] 329 rest = skipLWSPChar(rest) 330 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0 331 } 332 if !bytes.HasPrefix(peek, mr.dashBoundary) { 333 return false 334 } 335 // Test whether rest matches `^[ \t]*\r\n`) 336 rest := peek[len(mr.dashBoundary):] 337 rest = skipLWSPChar(rest) 338 return bytes.HasPrefix(rest, mr.nl) 339 } 340 341 // skipLWSPChar returns b with leading spaces and tabs removed. 342 // RFC 822 defines: 343 // LWSP-char = SPACE / HTAB 344 func skipLWSPChar(b []byte) []byte { 345 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 346 b = b[1:] 347 } 348 return b 349 }