github.com/c12o16h1/go/src@v0.0.0-20200114212001-5a151c0f00ed/mime/multipart/multipart.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 */ 13 package multipart 14 15 import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "mime/quotedprintable" 23 "net/textproto" 24 "strings" 25 ) 26 27 var emptyParams = make(map[string]string) 28 29 // This constant needs to be at least 76 for this package to work correctly. 30 // This is because \r\n--separator_of_len_70- would fill the buffer and it 31 // wouldn't be safe to consume a single byte from it. 32 const peekBufferSize = 4096 33 34 // A Part represents a single part in a multipart body. 35 type Part struct { 36 // The headers of the body, if any, with the keys canonicalized 37 // in the same fashion that the Go http.Request headers are. 38 // For example, "foo-bar" changes case to "Foo-Bar" 39 Header textproto.MIMEHeader 40 41 mr *Reader 42 43 disposition string 44 dispositionParams map[string]string 45 46 // r is either a reader directly reading from mr, or it's a 47 // wrapper around such a reader, decoding the 48 // Content-Transfer-Encoding 49 r io.Reader 50 51 n int // known data bytes waiting in mr.bufReader 52 total int64 // total data bytes read already 53 err error // error to return when n == 0 54 readErr error // read error observed from mr.bufReader 55 } 56 57 // FormName returns the name parameter if p has a Content-Disposition 58 // of type "form-data". Otherwise it returns the empty string. 59 func (p *Part) FormName() string { 60 // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF 61 // of Content-Disposition value format. 62 if p.dispositionParams == nil { 63 p.parseContentDisposition() 64 } 65 if p.disposition != "form-data" { 66 return "" 67 } 68 return p.dispositionParams["name"] 69 } 70 71 // FileName returns the filename parameter of the Part's 72 // Content-Disposition header. 73 func (p *Part) FileName() string { 74 if p.dispositionParams == nil { 75 p.parseContentDisposition() 76 } 77 return p.dispositionParams["filename"] 78 } 79 80 func (p *Part) parseContentDisposition() { 81 v := p.Header.Get("Content-Disposition") 82 var err error 83 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 84 if err != nil { 85 p.dispositionParams = emptyParams 86 } 87 } 88 89 // NewReader creates a new multipart Reader reading from r using the 90 // given MIME boundary. 91 // 92 // The boundary is usually obtained from the "boundary" parameter of 93 // the message's "Content-Type" header. Use mime.ParseMediaType to 94 // parse such headers. 95 func NewReader(r io.Reader, boundary string) *Reader { 96 b := []byte("\r\n--" + boundary + "--") 97 return &Reader{ 98 bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), 99 nl: b[:2], 100 nlDashBoundary: b[:len(b)-2], 101 dashBoundaryDash: b[2:], 102 dashBoundary: b[2 : len(b)-2], 103 } 104 } 105 106 // stickyErrorReader is an io.Reader which never calls Read on its 107 // underlying Reader once an error has been seen. (the io.Reader 108 // interface's contract promises nothing about the return values of 109 // Read calls after an error, yet this package does do multiple Reads 110 // after error) 111 type stickyErrorReader struct { 112 r io.Reader 113 err error 114 } 115 116 func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { 117 if r.err != nil { 118 return 0, r.err 119 } 120 n, r.err = r.r.Read(p) 121 return n, r.err 122 } 123 124 func newPart(mr *Reader, rawPart bool) (*Part, error) { 125 bp := &Part{ 126 Header: make(map[string][]string), 127 mr: mr, 128 } 129 if err := bp.populateHeaders(); err != nil { 130 return nil, err 131 } 132 bp.r = partReader{bp} 133 134 // rawPart is used to switch between Part.NextPart and Part.NextRawPart. 135 if !rawPart { 136 const cte = "Content-Transfer-Encoding" 137 if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { 138 bp.Header.Del(cte) 139 bp.r = quotedprintable.NewReader(bp.r) 140 } 141 } 142 return bp, nil 143 } 144 145 func (bp *Part) populateHeaders() error { 146 r := textproto.NewReader(bp.mr.bufReader) 147 header, err := r.ReadMIMEHeader() 148 if err == nil { 149 bp.Header = header 150 } 151 return err 152 } 153 154 // Read reads the body of a part, after its headers and before the 155 // next part (if any) begins. 156 func (p *Part) Read(d []byte) (n int, err error) { 157 return p.r.Read(d) 158 } 159 160 // partReader implements io.Reader by reading raw bytes directly from the 161 // wrapped *Part, without doing any Transfer-Encoding decoding. 162 type partReader struct { 163 p *Part 164 } 165 166 func (pr partReader) Read(d []byte) (int, error) { 167 p := pr.p 168 br := p.mr.bufReader 169 170 // Read into buffer until we identify some data to return, 171 // or we find a reason to stop (boundary or read error). 172 for p.n == 0 && p.err == nil { 173 peek, _ := br.Peek(br.Buffered()) 174 p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) 175 if p.n == 0 && p.err == nil { 176 // Force buffered I/O to read more into buffer. 177 _, p.readErr = br.Peek(len(peek) + 1) 178 if p.readErr == io.EOF { 179 p.readErr = io.ErrUnexpectedEOF 180 } 181 } 182 } 183 184 // Read out from "data to return" part of buffer. 185 if p.n == 0 { 186 return 0, p.err 187 } 188 n := len(d) 189 if n > p.n { 190 n = p.n 191 } 192 n, _ = br.Read(d[:n]) 193 p.total += int64(n) 194 p.n -= n 195 if p.n == 0 { 196 return n, p.err 197 } 198 return n, nil 199 } 200 201 // scanUntilBoundary scans buf to identify how much of it can be safely 202 // returned as part of the Part body. 203 // dashBoundary is "--boundary". 204 // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. 205 // The comments below (and the name) assume "\n--boundary", but either is accepted. 206 // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. 207 // readErr is the read error, if any, that followed reading the bytes in buf. 208 // scanUntilBoundary returns the number of data bytes from buf that can be 209 // returned as part of the Part body and also the error to return (if any) 210 // once those data bytes are done. 211 func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { 212 if total == 0 { 213 // At beginning of body, allow dashBoundary. 214 if bytes.HasPrefix(buf, dashBoundary) { 215 switch matchAfterPrefix(buf, dashBoundary, readErr) { 216 case -1: 217 return len(dashBoundary), nil 218 case 0: 219 return 0, nil 220 case +1: 221 return 0, io.EOF 222 } 223 } 224 if bytes.HasPrefix(dashBoundary, buf) { 225 return 0, readErr 226 } 227 } 228 229 // Search for "\n--boundary". 230 if i := bytes.Index(buf, nlDashBoundary); i >= 0 { 231 switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { 232 case -1: 233 return i + len(nlDashBoundary), nil 234 case 0: 235 return i, nil 236 case +1: 237 return i, io.EOF 238 } 239 } 240 if bytes.HasPrefix(nlDashBoundary, buf) { 241 return 0, readErr 242 } 243 244 // Otherwise, anything up to the final \n is not part of the boundary 245 // and so must be part of the body. 246 // Also if the section from the final \n onward is not a prefix of the boundary, 247 // it too must be part of the body. 248 i := bytes.LastIndexByte(buf, nlDashBoundary[0]) 249 if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { 250 return i, nil 251 } 252 return len(buf), readErr 253 } 254 255 // matchAfterPrefix checks whether buf should be considered to match the boundary. 256 // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", 257 // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. 258 // 259 // matchAfterPrefix returns +1 if the buffer does match the boundary, 260 // meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input. 261 // It returns -1 if the buffer definitely does NOT match the boundary, 262 // meaning the prefix is followed by some other character. 263 // For example, "--foobar" does not match "--foo". 264 // It returns 0 more input needs to be read to make the decision, 265 // meaning that len(buf) == len(prefix) and readErr == nil. 266 func matchAfterPrefix(buf, prefix []byte, readErr error) int { 267 if len(buf) == len(prefix) { 268 if readErr != nil { 269 return +1 270 } 271 return 0 272 } 273 c := buf[len(prefix)] 274 if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' { 275 return +1 276 } 277 return -1 278 } 279 280 func (p *Part) Close() error { 281 io.Copy(ioutil.Discard, p) 282 return nil 283 } 284 285 // Reader is an iterator over parts in a MIME multipart body. 286 // Reader's underlying parser consumes its input as needed. Seeking 287 // isn't supported. 288 type Reader struct { 289 bufReader *bufio.Reader 290 291 currentPart *Part 292 partsRead int 293 294 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 295 nlDashBoundary []byte // nl + "--boundary" 296 dashBoundaryDash []byte // "--boundary--" 297 dashBoundary []byte // "--boundary" 298 } 299 300 // NextPart returns the next part in the multipart or an error. 301 // When there are no more parts, the error io.EOF is returned. 302 // 303 // As a special case, if the "Content-Transfer-Encoding" header 304 // has a value of "quoted-printable", that header is instead 305 // hidden and the body is transparently decoded during Read calls. 306 func (r *Reader) NextPart() (*Part, error) { 307 return r.nextPart(false) 308 } 309 310 // NextRawPart returns the next part in the multipart or an error. 311 // When there are no more parts, the error io.EOF is returned. 312 // 313 // Unlike NextPart, it does not have special handling for 314 // "Content-Transfer-Encoding: quoted-printable". 315 func (r *Reader) NextRawPart() (*Part, error) { 316 return r.nextPart(true) 317 } 318 319 func (r *Reader) nextPart(rawPart bool) (*Part, error) { 320 if r.currentPart != nil { 321 r.currentPart.Close() 322 } 323 if string(r.dashBoundary) == "--" { 324 return nil, fmt.Errorf("multipart: boundary is empty") 325 } 326 expectNewPart := false 327 for { 328 line, err := r.bufReader.ReadSlice('\n') 329 330 if err == io.EOF && r.isFinalBoundary(line) { 331 // If the buffer ends in "--boundary--" without the 332 // trailing "\r\n", ReadSlice will return an error 333 // (since it's missing the '\n'), but this is a valid 334 // multipart EOF so we need to return io.EOF instead of 335 // a fmt-wrapped one. 336 return nil, io.EOF 337 } 338 if err != nil { 339 return nil, fmt.Errorf("multipart: NextPart: %v", err) 340 } 341 342 if r.isBoundaryDelimiterLine(line) { 343 r.partsRead++ 344 bp, err := newPart(r, rawPart) 345 if err != nil { 346 return nil, err 347 } 348 r.currentPart = bp 349 return bp, nil 350 } 351 352 if r.isFinalBoundary(line) { 353 // Expected EOF 354 return nil, io.EOF 355 } 356 357 if expectNewPart { 358 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 359 } 360 361 if r.partsRead == 0 { 362 // skip line 363 continue 364 } 365 366 // Consume the "\n" or "\r\n" separator between the 367 // body of the previous part and the boundary line we 368 // now expect will follow. (either a new part or the 369 // end boundary) 370 if bytes.Equal(line, r.nl) { 371 expectNewPart = true 372 continue 373 } 374 375 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 376 } 377 } 378 379 // isFinalBoundary reports whether line is the final boundary line 380 // indicating that all parts are over. 381 // It matches `^--boundary--[ \t]*(\r\n)?$` 382 func (mr *Reader) isFinalBoundary(line []byte) bool { 383 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 384 return false 385 } 386 rest := line[len(mr.dashBoundaryDash):] 387 rest = skipLWSPChar(rest) 388 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 389 } 390 391 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 392 // https://tools.ietf.org/html/rfc2046#section-5.1 393 // The boundary delimiter line is then defined as a line 394 // consisting entirely of two hyphen characters ("-", 395 // decimal value 45) followed by the boundary parameter 396 // value from the Content-Type header field, optional linear 397 // whitespace, and a terminating CRLF. 398 if !bytes.HasPrefix(line, mr.dashBoundary) { 399 return false 400 } 401 rest := line[len(mr.dashBoundary):] 402 rest = skipLWSPChar(rest) 403 404 // On the first part, see our lines are ending in \n instead of \r\n 405 // and switch into that mode if so. This is a violation of the spec, 406 // but occurs in practice. 407 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 408 mr.nl = mr.nl[1:] 409 mr.nlDashBoundary = mr.nlDashBoundary[1:] 410 } 411 return bytes.Equal(rest, mr.nl) 412 } 413 414 // skipLWSPChar returns b with leading spaces and tabs removed. 415 // RFC 822 defines: 416 // LWSP-char = SPACE / HTAB 417 func skipLWSPChar(b []byte) []byte { 418 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 419 b = b[1:] 420 } 421 return b 422 }