github.com/ltltlt/go-source-code@v0.0.0-20190830023027-95be009773aa/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322 and 9 extended by RFC 6532. 10 Notable divergences: 11 * Obsolete address formats are not parsed, including addresses with 12 embedded route information. 13 * The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 * No unicode normalization is performed. 16 * The special characters ()[]:;@\, are allowed to appear unquoted in names. 17 */ 18 package mail 19 20 import ( 21 "bufio" 22 "bytes" 23 "errors" 24 "fmt" 25 "io" 26 "log" 27 "mime" 28 "net/textproto" 29 "strings" 30 "time" 31 "unicode/utf8" 32 ) 33 34 var debug = debugT(false) 35 36 type debugT bool 37 38 func (d debugT) Printf(format string, args ...interface{}) { 39 if d { 40 log.Printf(format, args...) 41 } 42 } 43 44 // A Message represents a parsed mail message. 45 type Message struct { 46 Header Header 47 Body io.Reader 48 } 49 50 // ReadMessage reads a message from r. 51 // The headers are parsed, and the body of the message will be available 52 // for reading from msg.Body. 53 func ReadMessage(r io.Reader) (msg *Message, err error) { 54 tp := textproto.NewReader(bufio.NewReader(r)) 55 56 hdr, err := tp.ReadMIMEHeader() 57 if err != nil { 58 return nil, err 59 } 60 61 return &Message{ 62 Header: Header(hdr), 63 Body: tp.R, 64 }, nil 65 } 66 67 // Layouts suitable for passing to time.Parse. 68 // These are tried in order. 69 var dateLayouts []string 70 71 func init() { 72 // Generate layouts based on RFC 5322, section 3.3. 73 74 dows := [...]string{"", "Mon, "} // day-of-week 75 days := [...]string{"2", "02"} // day = 1*2DIGIT 76 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 77 seconds := [...]string{":05", ""} // second 78 // "-0700 (MST)" is not in RFC 5322, but is common. 79 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 80 81 for _, dow := range dows { 82 for _, day := range days { 83 for _, year := range years { 84 for _, second := range seconds { 85 for _, zone := range zones { 86 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 87 dateLayouts = append(dateLayouts, s) 88 } 89 } 90 } 91 } 92 } 93 } 94 95 // ParseDate parses an RFC 5322 date string. 96 func ParseDate(date string) (time.Time, error) { 97 for _, layout := range dateLayouts { 98 t, err := time.Parse(layout, date) 99 if err == nil { 100 return t, nil 101 } 102 } 103 return time.Time{}, errors.New("mail: header could not be parsed") 104 } 105 106 // A Header represents the key-value pairs in a mail message header. 107 type Header map[string][]string 108 109 // Get gets the first value associated with the given key. 110 // It is case insensitive; CanonicalMIMEHeaderKey is used 111 // to canonicalize the provided key. 112 // If there are no values associated with the key, Get returns "". 113 // To access multiple values of a key, or to use non-canonical keys, 114 // access the map directly. 115 func (h Header) Get(key string) string { 116 return textproto.MIMEHeader(h).Get(key) 117 } 118 119 var ErrHeaderNotPresent = errors.New("mail: header not in message") 120 121 // Date parses the Date header field. 122 func (h Header) Date() (time.Time, error) { 123 hdr := h.Get("Date") 124 if hdr == "" { 125 return time.Time{}, ErrHeaderNotPresent 126 } 127 return ParseDate(hdr) 128 } 129 130 // AddressList parses the named header field as a list of addresses. 131 func (h Header) AddressList(key string) ([]*Address, error) { 132 hdr := h.Get(key) 133 if hdr == "" { 134 return nil, ErrHeaderNotPresent 135 } 136 return ParseAddressList(hdr) 137 } 138 139 // Address represents a single mail address. 140 // An address such as "Barry Gibbs <bg@example.com>" is represented 141 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 142 type Address struct { 143 Name string // Proper name; may be empty. 144 Address string // user@domain 145 } 146 147 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 148 func ParseAddress(address string) (*Address, error) { 149 return (&addrParser{s: address}).parseSingleAddress() 150 } 151 152 // ParseAddressList parses the given string as a list of addresses. 153 func ParseAddressList(list string) ([]*Address, error) { 154 return (&addrParser{s: list}).parseAddressList() 155 } 156 157 // An AddressParser is an RFC 5322 address parser. 158 type AddressParser struct { 159 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. 160 WordDecoder *mime.WordDecoder 161 } 162 163 // Parse parses a single RFC 5322 address of the 164 // form "Gogh Fir <gf@example.com>" or "foo@example.com". 165 func (p *AddressParser) Parse(address string) (*Address, error) { 166 return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() 167 } 168 169 // ParseList parses the given string as a list of comma-separated addresses 170 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com". 171 func (p *AddressParser) ParseList(list string) ([]*Address, error) { 172 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() 173 } 174 175 // String formats the address as a valid RFC 5322 address. 176 // If the address's name contains non-ASCII characters 177 // the name will be rendered according to RFC 2047. 178 func (a *Address) String() string { 179 // Format address local@domain 180 at := strings.LastIndex(a.Address, "@") 181 var local, domain string 182 if at < 0 { 183 // This is a malformed address ("@" is required in addr-spec); 184 // treat the whole address as local-part. 185 local = a.Address 186 } else { 187 local, domain = a.Address[:at], a.Address[at+1:] 188 } 189 190 // Add quotes if needed 191 quoteLocal := false 192 for i, r := range local { 193 if isAtext(r, false, false) { 194 continue 195 } 196 if r == '.' { 197 // Dots are okay if they are surrounded by atext. 198 // We only need to check that the previous byte is 199 // not a dot, and this isn't the end of the string. 200 if i > 0 && local[i-1] != '.' && i < len(local)-1 { 201 continue 202 } 203 } 204 quoteLocal = true 205 break 206 } 207 if quoteLocal { 208 local = quoteString(local) 209 210 } 211 212 s := "<" + local + "@" + domain + ">" 213 214 if a.Name == "" { 215 return s 216 } 217 218 // If every character is printable ASCII, quoting is simple. 219 allPrintable := true 220 for _, r := range a.Name { 221 // isWSP here should actually be isFWS, 222 // but we don't support folding yet. 223 if !isVchar(r) && !isWSP(r) || isMultibyte(r) { 224 allPrintable = false 225 break 226 } 227 } 228 if allPrintable { 229 return quoteString(a.Name) + " " + s 230 } 231 232 // Text in an encoded-word in a display-name must not contain certain 233 // characters like quotes or parentheses (see RFC 2047 section 5.3). 234 // When this is the case encode the name using base64 encoding. 235 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { 236 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s 237 } 238 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s 239 } 240 241 type addrParser struct { 242 s string 243 dec *mime.WordDecoder // may be nil 244 } 245 246 func (p *addrParser) parseAddressList() ([]*Address, error) { 247 var list []*Address 248 for { 249 p.skipSpace() 250 addrs, err := p.parseAddress(true) 251 if err != nil { 252 return nil, err 253 } 254 list = append(list, addrs...) 255 256 if !p.skipCFWS() { 257 return nil, errors.New("mail: misformatted parenthetical comment") 258 } 259 if p.empty() { 260 break 261 } 262 if !p.consume(',') { 263 return nil, errors.New("mail: expected comma") 264 } 265 } 266 return list, nil 267 } 268 269 func (p *addrParser) parseSingleAddress() (*Address, error) { 270 addrs, err := p.parseAddress(true) 271 if err != nil { 272 return nil, err 273 } 274 if !p.skipCFWS() { 275 return nil, errors.New("mail: misformatted parenthetical comment") 276 } 277 if !p.empty() { 278 return nil, fmt.Errorf("mail: expected single address, got %q", p.s) 279 } 280 if len(addrs) == 0 { 281 return nil, errors.New("mail: empty group") 282 } 283 if len(addrs) > 1 { 284 return nil, errors.New("mail: group with multiple addresses") 285 } 286 return addrs[0], nil 287 } 288 289 // parseAddress parses a single RFC 5322 address at the start of p. 290 func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) { 291 debug.Printf("parseAddress: %q", p.s) 292 p.skipSpace() 293 if p.empty() { 294 return nil, errors.New("mail: no address") 295 } 296 297 // address = mailbox / group 298 // mailbox = name-addr / addr-spec 299 // group = display-name ":" [group-list] ";" [CFWS] 300 301 // addr-spec has a more restricted grammar than name-addr, 302 // so try parsing it first, and fallback to name-addr. 303 // TODO(dsymonds): Is this really correct? 304 spec, err := p.consumeAddrSpec() 305 if err == nil { 306 var displayName string 307 p.skipSpace() 308 if !p.empty() && p.peek() == '(' { 309 displayName, err = p.consumeDisplayNameComment() 310 if err != nil { 311 return nil, err 312 } 313 } 314 315 return []*Address{{ 316 Name: displayName, 317 Address: spec, 318 }}, err 319 } 320 debug.Printf("parseAddress: not an addr-spec: %v", err) 321 debug.Printf("parseAddress: state is now %q", p.s) 322 323 // display-name 324 var displayName string 325 if p.peek() != '<' { 326 displayName, err = p.consumePhrase() 327 if err != nil { 328 return nil, err 329 } 330 } 331 debug.Printf("parseAddress: displayName=%q", displayName) 332 333 p.skipSpace() 334 if handleGroup { 335 if p.consume(':') { 336 return p.consumeGroupList() 337 } 338 } 339 // angle-addr = "<" addr-spec ">" 340 if !p.consume('<') { 341 return nil, errors.New("mail: no angle-addr") 342 } 343 spec, err = p.consumeAddrSpec() 344 if err != nil { 345 return nil, err 346 } 347 if !p.consume('>') { 348 return nil, errors.New("mail: unclosed angle-addr") 349 } 350 debug.Printf("parseAddress: spec=%q", spec) 351 352 return []*Address{{ 353 Name: displayName, 354 Address: spec, 355 }}, nil 356 } 357 358 func (p *addrParser) consumeGroupList() ([]*Address, error) { 359 var group []*Address 360 // handle empty group. 361 p.skipSpace() 362 if p.consume(';') { 363 p.skipCFWS() 364 return group, nil 365 } 366 367 for { 368 p.skipSpace() 369 // embedded groups not allowed. 370 addrs, err := p.parseAddress(false) 371 if err != nil { 372 return nil, err 373 } 374 group = append(group, addrs...) 375 376 if !p.skipCFWS() { 377 return nil, errors.New("mail: misformatted parenthetical comment") 378 } 379 if p.consume(';') { 380 p.skipCFWS() 381 break 382 } 383 if !p.consume(',') { 384 return nil, errors.New("mail: expected comma") 385 } 386 } 387 return group, nil 388 } 389 390 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 391 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 392 debug.Printf("consumeAddrSpec: %q", p.s) 393 394 orig := *p 395 defer func() { 396 if err != nil { 397 *p = orig 398 } 399 }() 400 401 // local-part = dot-atom / quoted-string 402 var localPart string 403 p.skipSpace() 404 if p.empty() { 405 return "", errors.New("mail: no addr-spec") 406 } 407 if p.peek() == '"' { 408 // quoted-string 409 debug.Printf("consumeAddrSpec: parsing quoted-string") 410 localPart, err = p.consumeQuotedString() 411 if localPart == "" { 412 err = errors.New("mail: empty quoted string in addr-spec") 413 } 414 } else { 415 // dot-atom 416 debug.Printf("consumeAddrSpec: parsing dot-atom") 417 localPart, err = p.consumeAtom(true, false) 418 } 419 if err != nil { 420 debug.Printf("consumeAddrSpec: failed: %v", err) 421 return "", err 422 } 423 424 if !p.consume('@') { 425 return "", errors.New("mail: missing @ in addr-spec") 426 } 427 428 // domain = dot-atom / domain-literal 429 var domain string 430 p.skipSpace() 431 if p.empty() { 432 return "", errors.New("mail: no domain in addr-spec") 433 } 434 // TODO(dsymonds): Handle domain-literal 435 domain, err = p.consumeAtom(true, false) 436 if err != nil { 437 return "", err 438 } 439 440 return localPart + "@" + domain, nil 441 } 442 443 // consumePhrase parses the RFC 5322 phrase at the start of p. 444 func (p *addrParser) consumePhrase() (phrase string, err error) { 445 debug.Printf("consumePhrase: [%s]", p.s) 446 // phrase = 1*word 447 var words []string 448 var isPrevEncoded bool 449 for { 450 // word = atom / quoted-string 451 var word string 452 p.skipSpace() 453 if p.empty() { 454 break 455 } 456 isEncoded := false 457 if p.peek() == '"' { 458 // quoted-string 459 word, err = p.consumeQuotedString() 460 } else { 461 // atom 462 // We actually parse dot-atom here to be more permissive 463 // than what RFC 5322 specifies. 464 word, err = p.consumeAtom(true, true) 465 if err == nil { 466 word, isEncoded, err = p.decodeRFC2047Word(word) 467 } 468 } 469 470 if err != nil { 471 break 472 } 473 debug.Printf("consumePhrase: consumed %q", word) 474 if isPrevEncoded && isEncoded { 475 words[len(words)-1] += word 476 } else { 477 words = append(words, word) 478 } 479 isPrevEncoded = isEncoded 480 } 481 // Ignore any error if we got at least one word. 482 if err != nil && len(words) == 0 { 483 debug.Printf("consumePhrase: hit err: %v", err) 484 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 485 } 486 phrase = strings.Join(words, " ") 487 return phrase, nil 488 } 489 490 // consumeQuotedString parses the quoted string at the start of p. 491 func (p *addrParser) consumeQuotedString() (qs string, err error) { 492 // Assume first byte is '"'. 493 i := 1 494 qsb := make([]rune, 0, 10) 495 496 escaped := false 497 498 Loop: 499 for { 500 r, size := utf8.DecodeRuneInString(p.s[i:]) 501 502 switch { 503 case size == 0: 504 return "", errors.New("mail: unclosed quoted-string") 505 506 case size == 1 && r == utf8.RuneError: 507 return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s) 508 509 case escaped: 510 // quoted-pair = ("\" (VCHAR / WSP)) 511 512 if !isVchar(r) && !isWSP(r) { 513 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 514 } 515 516 qsb = append(qsb, r) 517 escaped = false 518 519 case isQtext(r) || isWSP(r): 520 // qtext (printable US-ASCII excluding " and \), or 521 // FWS (almost; we're ignoring CRLF) 522 qsb = append(qsb, r) 523 524 case r == '"': 525 break Loop 526 527 case r == '\\': 528 escaped = true 529 530 default: 531 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 532 533 } 534 535 i += size 536 } 537 p.s = p.s[i+1:] 538 return string(qsb), nil 539 } 540 541 // consumeAtom parses an RFC 5322 atom at the start of p. 542 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 543 // If permissive is true, consumeAtom will not fail on: 544 // - leading/trailing/double dots in the atom (see golang.org/issue/4938) 545 // - special characters (RFC 5322 3.2.3) except '<', '>', ':' and '"' (see golang.org/issue/21018) 546 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { 547 i := 0 548 549 Loop: 550 for { 551 r, size := utf8.DecodeRuneInString(p.s[i:]) 552 switch { 553 case size == 1 && r == utf8.RuneError: 554 return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s) 555 556 case size == 0 || !isAtext(r, dot, permissive): 557 break Loop 558 559 default: 560 i += size 561 562 } 563 } 564 565 if i == 0 { 566 return "", errors.New("mail: invalid string") 567 } 568 atom, p.s = p.s[:i], p.s[i:] 569 if !permissive { 570 if strings.HasPrefix(atom, ".") { 571 return "", errors.New("mail: leading dot in atom") 572 } 573 if strings.Contains(atom, "..") { 574 return "", errors.New("mail: double dot in atom") 575 } 576 if strings.HasSuffix(atom, ".") { 577 return "", errors.New("mail: trailing dot in atom") 578 } 579 } 580 return atom, nil 581 } 582 583 func (p *addrParser) consumeDisplayNameComment() (string, error) { 584 if !p.consume('(') { 585 return "", errors.New("mail: comment does not start with (") 586 } 587 comment, ok := p.consumeComment() 588 if !ok { 589 return "", errors.New("mail: misformatted parenthetical comment") 590 } 591 592 // TODO(stapelberg): parse quoted-string within comment 593 words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' }) 594 for idx, word := range words { 595 decoded, isEncoded, err := p.decodeRFC2047Word(word) 596 if err != nil { 597 return "", err 598 } 599 if isEncoded { 600 words[idx] = decoded 601 } 602 } 603 604 return strings.Join(words, " "), nil 605 } 606 607 func (p *addrParser) consume(c byte) bool { 608 if p.empty() || p.peek() != c { 609 return false 610 } 611 p.s = p.s[1:] 612 return true 613 } 614 615 // skipSpace skips the leading space and tab characters. 616 func (p *addrParser) skipSpace() { 617 p.s = strings.TrimLeft(p.s, " \t") 618 } 619 620 func (p *addrParser) peek() byte { 621 return p.s[0] 622 } 623 624 func (p *addrParser) empty() bool { 625 return p.len() == 0 626 } 627 628 func (p *addrParser) len() int { 629 return len(p.s) 630 } 631 632 // skipCFWS skips CFWS as defined in RFC5322. 633 func (p *addrParser) skipCFWS() bool { 634 p.skipSpace() 635 636 for { 637 if !p.consume('(') { 638 break 639 } 640 641 if _, ok := p.consumeComment(); !ok { 642 return false 643 } 644 645 p.skipSpace() 646 } 647 648 return true 649 } 650 651 func (p *addrParser) consumeComment() (string, bool) { 652 // '(' already consumed. 653 depth := 1 654 655 var comment string 656 for { 657 if p.empty() || depth == 0 { 658 break 659 } 660 661 if p.peek() == '\\' && p.len() > 1 { 662 p.s = p.s[1:] 663 } else if p.peek() == '(' { 664 depth++ 665 } else if p.peek() == ')' { 666 depth-- 667 } 668 if depth > 0 { 669 comment += p.s[:1] 670 } 671 p.s = p.s[1:] 672 } 673 674 return comment, depth == 0 675 } 676 677 func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) { 678 if p.dec != nil { 679 word, err = p.dec.Decode(s) 680 } else { 681 word, err = rfc2047Decoder.Decode(s) 682 } 683 684 if err == nil { 685 return word, true, nil 686 } 687 688 if _, ok := err.(charsetError); ok { 689 return s, true, err 690 } 691 692 // Ignore invalid RFC 2047 encoded-word errors. 693 return s, false, nil 694 } 695 696 var rfc2047Decoder = mime.WordDecoder{ 697 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { 698 return nil, charsetError(charset) 699 }, 700 } 701 702 type charsetError string 703 704 func (e charsetError) Error() string { 705 return fmt.Sprintf("charset not supported: %q", string(e)) 706 } 707 708 // isAtext reports whether r is an RFC 5322 atext character. 709 // If dot is true, period is included. 710 // If permissive is true, RFC 5322 3.2.3 specials is included, 711 // except '<', '>', ':' and '"'. 712 func isAtext(r rune, dot, permissive bool) bool { 713 switch r { 714 case '.': 715 return dot 716 717 // RFC 5322 3.2.3. specials 718 case '(', ')', '[', ']', ';', '@', '\\', ',': 719 return permissive 720 721 case '<', '>', '"', ':': 722 return false 723 } 724 return isVchar(r) 725 } 726 727 // isQtext reports whether r is an RFC 5322 qtext character. 728 func isQtext(r rune) bool { 729 // Printable US-ASCII, excluding backslash or quote. 730 if r == '\\' || r == '"' { 731 return false 732 } 733 return isVchar(r) 734 } 735 736 // quoteString renders a string as an RFC 5322 quoted-string. 737 func quoteString(s string) string { 738 var buf bytes.Buffer 739 buf.WriteByte('"') 740 for _, r := range s { 741 if isQtext(r) || isWSP(r) { 742 buf.WriteRune(r) 743 } else if isVchar(r) { 744 buf.WriteByte('\\') 745 buf.WriteRune(r) 746 } 747 } 748 buf.WriteByte('"') 749 return buf.String() 750 } 751 752 // isVchar reports whether r is an RFC 5322 VCHAR character. 753 func isVchar(r rune) bool { 754 // Visible (printing) characters. 755 return '!' <= r && r <= '~' || isMultibyte(r) 756 } 757 758 // isMultibyte reports whether r is a multi-byte UTF-8 character 759 // as supported by RFC 6532 760 func isMultibyte(r rune) bool { 761 return r >= utf8.RuneSelf 762 } 763 764 // isWSP reports whether r is a WSP (white space). 765 // WSP is a space or horizontal tab (RFC 5234 Appendix B). 766 func isWSP(r rune) bool { 767 return r == ' ' || r == '\t' 768 }