github.com/shijuvar/go@v0.0.0-20141209052335-e8f13700b70c/src/mime/multipart/multipart.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 */ 13 package multipart 14 15 import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "net/textproto" 23 ) 24 25 var emptyParams = make(map[string]string) 26 27 // A Part represents a single part in a multipart body. 28 type Part struct { 29 // The headers of the body, if any, with the keys canonicalized 30 // in the same fashion that the Go http.Request headers are. 31 // For example, "foo-bar" changes case to "Foo-Bar" 32 // 33 // As a special case, if the "Content-Transfer-Encoding" header 34 // has a value of "quoted-printable", that header is instead 35 // hidden from this map and the body is transparently decoded 36 // during Read calls. 37 Header textproto.MIMEHeader 38 39 buffer *bytes.Buffer 40 mr *Reader 41 bytesRead int 42 43 disposition string 44 dispositionParams map[string]string 45 46 // r is either a reader directly reading from mr, or it's a 47 // wrapper around such a reader, decoding the 48 // Content-Transfer-Encoding 49 r io.Reader 50 } 51 52 // FormName returns the name parameter if p has a Content-Disposition 53 // of type "form-data". Otherwise it returns the empty string. 54 func (p *Part) FormName() string { 55 // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF 56 // of Content-Disposition value format. 57 if p.dispositionParams == nil { 58 p.parseContentDisposition() 59 } 60 if p.disposition != "form-data" { 61 return "" 62 } 63 return p.dispositionParams["name"] 64 } 65 66 // FileName returns the filename parameter of the Part's 67 // Content-Disposition header. 68 func (p *Part) FileName() string { 69 if p.dispositionParams == nil { 70 p.parseContentDisposition() 71 } 72 return p.dispositionParams["filename"] 73 } 74 75 func (p *Part) parseContentDisposition() { 76 v := p.Header.Get("Content-Disposition") 77 var err error 78 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 79 if err != nil { 80 p.dispositionParams = emptyParams 81 } 82 } 83 84 // NewReader creates a new multipart Reader reading from r using the 85 // given MIME boundary. 86 // 87 // The boundary is usually obtained from the "boundary" parameter of 88 // the message's "Content-Type" header. Use mime.ParseMediaType to 89 // parse such headers. 90 func NewReader(r io.Reader, boundary string) *Reader { 91 b := []byte("\r\n--" + boundary + "--") 92 return &Reader{ 93 bufReader: bufio.NewReader(r), 94 nl: b[:2], 95 nlDashBoundary: b[:len(b)-2], 96 dashBoundaryDash: b[2:], 97 dashBoundary: b[2 : len(b)-2], 98 } 99 } 100 101 func newPart(mr *Reader) (*Part, error) { 102 bp := &Part{ 103 Header: make(map[string][]string), 104 mr: mr, 105 buffer: new(bytes.Buffer), 106 } 107 if err := bp.populateHeaders(); err != nil { 108 return nil, err 109 } 110 bp.r = partReader{bp} 111 const cte = "Content-Transfer-Encoding" 112 if bp.Header.Get(cte) == "quoted-printable" { 113 bp.Header.Del(cte) 114 bp.r = newQuotedPrintableReader(bp.r) 115 } 116 return bp, nil 117 } 118 119 func (bp *Part) populateHeaders() error { 120 r := textproto.NewReader(bp.mr.bufReader) 121 header, err := r.ReadMIMEHeader() 122 if err == nil { 123 bp.Header = header 124 } 125 return err 126 } 127 128 // Read reads the body of a part, after its headers and before the 129 // next part (if any) begins. 130 func (p *Part) Read(d []byte) (n int, err error) { 131 return p.r.Read(d) 132 } 133 134 // partReader implements io.Reader by reading raw bytes directly from the 135 // wrapped *Part, without doing any Transfer-Encoding decoding. 136 type partReader struct { 137 p *Part 138 } 139 140 func (pr partReader) Read(d []byte) (n int, err error) { 141 p := pr.p 142 defer func() { 143 p.bytesRead += n 144 }() 145 if p.buffer.Len() >= len(d) { 146 // Internal buffer of unconsumed data is large enough for 147 // the read request. No need to parse more at the moment. 148 return p.buffer.Read(d) 149 } 150 peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor 151 152 // Look for an immediate empty part without a leading \r\n 153 // before the boundary separator. Some MIME code makes empty 154 // parts like this. Most browsers, however, write the \r\n 155 // before the subsequent boundary even for empty parts and 156 // won't hit this path. 157 if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { 158 return 0, io.EOF 159 } 160 unexpectedEOF := err == io.EOF 161 if err != nil && !unexpectedEOF { 162 return 0, fmt.Errorf("multipart: Part Read: %v", err) 163 } 164 if peek == nil { 165 panic("nil peek buf") 166 } 167 168 // Search the peek buffer for "\r\n--boundary". If found, 169 // consume everything up to the boundary. If not, consume only 170 // as much of the peek buffer as cannot hold the boundary 171 // string. 172 nCopy := 0 173 foundBoundary := false 174 if idx := bytes.Index(peek, p.mr.nlDashBoundary); idx != -1 { 175 nCopy = idx 176 foundBoundary = true 177 } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { 178 nCopy = safeCount 179 } else if unexpectedEOF { 180 // If we've run out of peek buffer and the boundary 181 // wasn't found (and can't possibly fit), we must have 182 // hit the end of the file unexpectedly. 183 return 0, io.ErrUnexpectedEOF 184 } 185 if nCopy > 0 { 186 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil { 187 return 0, err 188 } 189 } 190 n, err = p.buffer.Read(d) 191 if err == io.EOF && !foundBoundary { 192 // If the boundary hasn't been reached there's more to 193 // read, so don't pass through an EOF from the buffer 194 err = nil 195 } 196 return 197 } 198 199 func (p *Part) Close() error { 200 io.Copy(ioutil.Discard, p) 201 return nil 202 } 203 204 // Reader is an iterator over parts in a MIME multipart body. 205 // Reader's underlying parser consumes its input as needed. Seeking 206 // isn't supported. 207 type Reader struct { 208 bufReader *bufio.Reader 209 210 currentPart *Part 211 partsRead int 212 213 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 214 nlDashBoundary []byte // nl + "--boundary" 215 dashBoundaryDash []byte // "--boundary--" 216 dashBoundary []byte // "--boundary" 217 } 218 219 // NextPart returns the next part in the multipart or an error. 220 // When there are no more parts, the error io.EOF is returned. 221 func (r *Reader) NextPart() (*Part, error) { 222 if r.currentPart != nil { 223 r.currentPart.Close() 224 } 225 226 expectNewPart := false 227 for { 228 line, err := r.bufReader.ReadSlice('\n') 229 if err == io.EOF && r.isFinalBoundary(line) { 230 // If the buffer ends in "--boundary--" without the 231 // trailing "\r\n", ReadSlice will return an error 232 // (since it's missing the '\n'), but this is a valid 233 // multipart EOF so we need to return io.EOF instead of 234 // a fmt-wrapped one. 235 return nil, io.EOF 236 } 237 if err != nil { 238 return nil, fmt.Errorf("multipart: NextPart: %v", err) 239 } 240 241 if r.isBoundaryDelimiterLine(line) { 242 r.partsRead++ 243 bp, err := newPart(r) 244 if err != nil { 245 return nil, err 246 } 247 r.currentPart = bp 248 return bp, nil 249 } 250 251 if r.isFinalBoundary(line) { 252 // Expected EOF 253 return nil, io.EOF 254 } 255 256 if expectNewPart { 257 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 258 } 259 260 if r.partsRead == 0 { 261 // skip line 262 continue 263 } 264 265 // Consume the "\n" or "\r\n" separator between the 266 // body of the previous part and the boundary line we 267 // now expect will follow. (either a new part or the 268 // end boundary) 269 if bytes.Equal(line, r.nl) { 270 expectNewPart = true 271 continue 272 } 273 274 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 275 } 276 } 277 278 // isFinalBoundary reports whether line is the final boundary line 279 // indicating that all parts are over. 280 // It matches `^--boundary--[ \t]*(\r\n)?$` 281 func (mr *Reader) isFinalBoundary(line []byte) bool { 282 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 283 return false 284 } 285 rest := line[len(mr.dashBoundaryDash):] 286 rest = skipLWSPChar(rest) 287 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 288 } 289 290 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 291 // http://tools.ietf.org/html/rfc2046#section-5.1 292 // The boundary delimiter line is then defined as a line 293 // consisting entirely of two hyphen characters ("-", 294 // decimal value 45) followed by the boundary parameter 295 // value from the Content-Type header field, optional linear 296 // whitespace, and a terminating CRLF. 297 if !bytes.HasPrefix(line, mr.dashBoundary) { 298 return false 299 } 300 rest := line[len(mr.dashBoundary):] 301 rest = skipLWSPChar(rest) 302 303 // On the first part, see our lines are ending in \n instead of \r\n 304 // and switch into that mode if so. This is a violation of the spec, 305 // but occurs in practice. 306 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 307 mr.nl = mr.nl[1:] 308 mr.nlDashBoundary = mr.nlDashBoundary[1:] 309 } 310 return bytes.Equal(rest, mr.nl) 311 } 312 313 // peekBufferIsEmptyPart reports whether the provided peek-ahead 314 // buffer represents an empty part. It is called only if we've not 315 // already read any bytes in this part and checks for the case of MIME 316 // software not writing the \r\n on empty parts. Some does, some 317 // doesn't. 318 // 319 // This checks that what follows the "--boundary" is actually the end 320 // ("--boundary--" with optional whitespace) or optional whitespace 321 // and then a newline, so we don't catch "--boundaryFAKE", in which 322 // case the whole line is part of the data. 323 func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool { 324 // End of parts case. 325 // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)` 326 if bytes.HasPrefix(peek, mr.dashBoundaryDash) { 327 rest := peek[len(mr.dashBoundaryDash):] 328 rest = skipLWSPChar(rest) 329 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0 330 } 331 if !bytes.HasPrefix(peek, mr.dashBoundary) { 332 return false 333 } 334 // Test whether rest matches `^[ \t]*\r\n`) 335 rest := peek[len(mr.dashBoundary):] 336 rest = skipLWSPChar(rest) 337 return bytes.HasPrefix(rest, mr.nl) 338 } 339 340 // skipLWSPChar returns b with leading spaces and tabs removed. 341 // RFC 822 defines: 342 // LWSP-char = SPACE / HTAB 343 func skipLWSPChar(b []byte) []byte { 344 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 345 b = b[1:] 346 } 347 return b 348 }