github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322 and 9 extended by RFC 6532. 10 Notable divergences: 11 * Obsolete address formats are not parsed, including addresses with 12 embedded route information. 13 * The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 * No unicode normalization is performed. 16 * The special characters ()[]:;@\, are allowed to appear unquoted in names. 17 */ 18 package mail 19 20 import ( 21 "bufio" 22 "errors" 23 "fmt" 24 "io" 25 "log" 26 "mime" 27 "net/textproto" 28 "strings" 29 "sync" 30 "time" 31 "unicode/utf8" 32 ) 33 34 var debug = debugT(false) 35 36 type debugT bool 37 38 func (d debugT) Printf(format string, args ...interface{}) { 39 if d { 40 log.Printf(format, args...) 41 } 42 } 43 44 // A Message represents a parsed mail message. 45 type Message struct { 46 Header Header 47 Body io.Reader 48 } 49 50 // ReadMessage reads a message from r. 51 // The headers are parsed, and the body of the message will be available 52 // for reading from msg.Body. 53 func ReadMessage(r io.Reader) (msg *Message, err error) { 54 tp := textproto.NewReader(bufio.NewReader(r)) 55 56 hdr, err := tp.ReadMIMEHeader() 57 if err != nil { 58 return nil, err 59 } 60 61 return &Message{ 62 Header: Header(hdr), 63 Body: tp.R, 64 }, nil 65 } 66 67 // Layouts suitable for passing to time.Parse. 68 // These are tried in order. 69 var ( 70 dateLayoutsBuildOnce sync.Once 71 dateLayouts []string 72 ) 73 74 func buildDateLayouts() { 75 // Generate layouts based on RFC 5322, section 3.3. 76 77 dows := [...]string{"", "Mon, "} // day-of-week 78 days := [...]string{"2", "02"} // day = 1*2DIGIT 79 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 80 seconds := [...]string{":05", ""} // second 81 // "-0700 (MST)" is not in RFC 5322, but is common. 82 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 83 84 for _, dow := range dows { 85 for _, day := range days { 86 for _, year := range years { 87 for _, second := range seconds { 88 for _, zone := range zones { 89 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 90 dateLayouts = append(dateLayouts, s) 91 } 92 } 93 } 94 } 95 } 96 } 97 98 // ParseDate parses an RFC 5322 date string. 99 func ParseDate(date string) (time.Time, error) { 100 dateLayoutsBuildOnce.Do(buildDateLayouts) 101 for _, layout := range dateLayouts { 102 t, err := time.Parse(layout, date) 103 if err == nil { 104 return t, nil 105 } 106 } 107 return time.Time{}, errors.New("mail: header could not be parsed") 108 } 109 110 // A Header represents the key-value pairs in a mail message header. 111 type Header map[string][]string 112 113 // Get gets the first value associated with the given key. 114 // It is case insensitive; CanonicalMIMEHeaderKey is used 115 // to canonicalize the provided key. 116 // If there are no values associated with the key, Get returns "". 117 // To access multiple values of a key, or to use non-canonical keys, 118 // access the map directly. 119 func (h Header) Get(key string) string { 120 return textproto.MIMEHeader(h).Get(key) 121 } 122 123 var ErrHeaderNotPresent = errors.New("mail: header not in message") 124 125 // Date parses the Date header field. 126 func (h Header) Date() (time.Time, error) { 127 hdr := h.Get("Date") 128 if hdr == "" { 129 return time.Time{}, ErrHeaderNotPresent 130 } 131 return ParseDate(hdr) 132 } 133 134 // AddressList parses the named header field as a list of addresses. 135 func (h Header) AddressList(key string) ([]*Address, error) { 136 hdr := h.Get(key) 137 if hdr == "" { 138 return nil, ErrHeaderNotPresent 139 } 140 return ParseAddressList(hdr) 141 } 142 143 // Address represents a single mail address. 144 // An address such as "Barry Gibbs <bg@example.com>" is represented 145 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 146 type Address struct { 147 Name string // Proper name; may be empty. 148 Address string // user@domain 149 } 150 151 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 152 func ParseAddress(address string) (*Address, error) { 153 return (&addrParser{s: address}).parseSingleAddress() 154 } 155 156 // ParseAddressList parses the given string as a list of addresses. 157 func ParseAddressList(list string) ([]*Address, error) { 158 return (&addrParser{s: list}).parseAddressList() 159 } 160 161 // An AddressParser is an RFC 5322 address parser. 162 type AddressParser struct { 163 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. 164 WordDecoder *mime.WordDecoder 165 } 166 167 // Parse parses a single RFC 5322 address of the 168 // form "Gogh Fir <gf@example.com>" or "foo@example.com". 169 func (p *AddressParser) Parse(address string) (*Address, error) { 170 return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() 171 } 172 173 // ParseList parses the given string as a list of comma-separated addresses 174 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com". 175 func (p *AddressParser) ParseList(list string) ([]*Address, error) { 176 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() 177 } 178 179 // String formats the address as a valid RFC 5322 address. 180 // If the address's name contains non-ASCII characters 181 // the name will be rendered according to RFC 2047. 182 func (a *Address) String() string { 183 // Format address local@domain 184 at := strings.LastIndex(a.Address, "@") 185 var local, domain string 186 if at < 0 { 187 // This is a malformed address ("@" is required in addr-spec); 188 // treat the whole address as local-part. 189 local = a.Address 190 } else { 191 local, domain = a.Address[:at], a.Address[at+1:] 192 } 193 194 // Add quotes if needed 195 quoteLocal := false 196 for i, r := range local { 197 if isAtext(r, false, false) { 198 continue 199 } 200 if r == '.' { 201 // Dots are okay if they are surrounded by atext. 202 // We only need to check that the previous byte is 203 // not a dot, and this isn't the end of the string. 204 if i > 0 && local[i-1] != '.' && i < len(local)-1 { 205 continue 206 } 207 } 208 quoteLocal = true 209 break 210 } 211 if quoteLocal { 212 local = quoteString(local) 213 214 } 215 216 s := "<" + local + "@" + domain + ">" 217 218 if a.Name == "" { 219 return s 220 } 221 222 // If every character is printable ASCII, quoting is simple. 223 allPrintable := true 224 for _, r := range a.Name { 225 // isWSP here should actually be isFWS, 226 // but we don't support folding yet. 227 if !isVchar(r) && !isWSP(r) || isMultibyte(r) { 228 allPrintable = false 229 break 230 } 231 } 232 if allPrintable { 233 return quoteString(a.Name) + " " + s 234 } 235 236 // Text in an encoded-word in a display-name must not contain certain 237 // characters like quotes or parentheses (see RFC 2047 section 5.3). 238 // When this is the case encode the name using base64 encoding. 239 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { 240 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s 241 } 242 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s 243 } 244 245 type addrParser struct { 246 s string 247 dec *mime.WordDecoder // may be nil 248 } 249 250 func (p *addrParser) parseAddressList() ([]*Address, error) { 251 var list []*Address 252 for { 253 p.skipSpace() 254 addrs, err := p.parseAddress(true) 255 if err != nil { 256 return nil, err 257 } 258 list = append(list, addrs...) 259 260 if !p.skipCFWS() { 261 return nil, errors.New("mail: misformatted parenthetical comment") 262 } 263 if p.empty() { 264 break 265 } 266 if !p.consume(',') { 267 return nil, errors.New("mail: expected comma") 268 } 269 } 270 return list, nil 271 } 272 273 func (p *addrParser) parseSingleAddress() (*Address, error) { 274 addrs, err := p.parseAddress(true) 275 if err != nil { 276 return nil, err 277 } 278 if !p.skipCFWS() { 279 return nil, errors.New("mail: misformatted parenthetical comment") 280 } 281 if !p.empty() { 282 return nil, fmt.Errorf("mail: expected single address, got %q", p.s) 283 } 284 if len(addrs) == 0 { 285 return nil, errors.New("mail: empty group") 286 } 287 if len(addrs) > 1 { 288 return nil, errors.New("mail: group with multiple addresses") 289 } 290 return addrs[0], nil 291 } 292 293 // parseAddress parses a single RFC 5322 address at the start of p. 294 func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) { 295 debug.Printf("parseAddress: %q", p.s) 296 p.skipSpace() 297 if p.empty() { 298 return nil, errors.New("mail: no address") 299 } 300 301 // address = mailbox / group 302 // mailbox = name-addr / addr-spec 303 // group = display-name ":" [group-list] ";" [CFWS] 304 305 // addr-spec has a more restricted grammar than name-addr, 306 // so try parsing it first, and fallback to name-addr. 307 // TODO(dsymonds): Is this really correct? 308 spec, err := p.consumeAddrSpec() 309 if err == nil { 310 var displayName string 311 p.skipSpace() 312 if !p.empty() && p.peek() == '(' { 313 displayName, err = p.consumeDisplayNameComment() 314 if err != nil { 315 return nil, err 316 } 317 } 318 319 return []*Address{{ 320 Name: displayName, 321 Address: spec, 322 }}, err 323 } 324 debug.Printf("parseAddress: not an addr-spec: %v", err) 325 debug.Printf("parseAddress: state is now %q", p.s) 326 327 // display-name 328 var displayName string 329 if p.peek() != '<' { 330 displayName, err = p.consumePhrase() 331 if err != nil { 332 return nil, err 333 } 334 } 335 debug.Printf("parseAddress: displayName=%q", displayName) 336 337 p.skipSpace() 338 if handleGroup { 339 if p.consume(':') { 340 return p.consumeGroupList() 341 } 342 } 343 // angle-addr = "<" addr-spec ">" 344 if !p.consume('<') { 345 return nil, errors.New("mail: no angle-addr") 346 } 347 spec, err = p.consumeAddrSpec() 348 if err != nil { 349 return nil, err 350 } 351 if !p.consume('>') { 352 return nil, errors.New("mail: unclosed angle-addr") 353 } 354 debug.Printf("parseAddress: spec=%q", spec) 355 356 return []*Address{{ 357 Name: displayName, 358 Address: spec, 359 }}, nil 360 } 361 362 func (p *addrParser) consumeGroupList() ([]*Address, error) { 363 var group []*Address 364 // handle empty group. 365 p.skipSpace() 366 if p.consume(';') { 367 p.skipCFWS() 368 return group, nil 369 } 370 371 for { 372 p.skipSpace() 373 // embedded groups not allowed. 374 addrs, err := p.parseAddress(false) 375 if err != nil { 376 return nil, err 377 } 378 group = append(group, addrs...) 379 380 if !p.skipCFWS() { 381 return nil, errors.New("mail: misformatted parenthetical comment") 382 } 383 if p.consume(';') { 384 p.skipCFWS() 385 break 386 } 387 if !p.consume(',') { 388 return nil, errors.New("mail: expected comma") 389 } 390 } 391 return group, nil 392 } 393 394 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 395 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 396 debug.Printf("consumeAddrSpec: %q", p.s) 397 398 orig := *p 399 defer func() { 400 if err != nil { 401 *p = orig 402 } 403 }() 404 405 // local-part = dot-atom / quoted-string 406 var localPart string 407 p.skipSpace() 408 if p.empty() { 409 return "", errors.New("mail: no addr-spec") 410 } 411 if p.peek() == '"' { 412 // quoted-string 413 debug.Printf("consumeAddrSpec: parsing quoted-string") 414 localPart, err = p.consumeQuotedString() 415 if localPart == "" { 416 err = errors.New("mail: empty quoted string in addr-spec") 417 } 418 } else { 419 // dot-atom 420 debug.Printf("consumeAddrSpec: parsing dot-atom") 421 localPart, err = p.consumeAtom(true, false) 422 } 423 if err != nil { 424 debug.Printf("consumeAddrSpec: failed: %v", err) 425 return "", err 426 } 427 428 if !p.consume('@') { 429 return "", errors.New("mail: missing @ in addr-spec") 430 } 431 432 // domain = dot-atom / domain-literal 433 var domain string 434 p.skipSpace() 435 if p.empty() { 436 return "", errors.New("mail: no domain in addr-spec") 437 } 438 // TODO(dsymonds): Handle domain-literal 439 domain, err = p.consumeAtom(true, false) 440 if err != nil { 441 return "", err 442 } 443 444 return localPart + "@" + domain, nil 445 } 446 447 // consumePhrase parses the RFC 5322 phrase at the start of p. 448 func (p *addrParser) consumePhrase() (phrase string, err error) { 449 debug.Printf("consumePhrase: [%s]", p.s) 450 // phrase = 1*word 451 var words []string 452 var isPrevEncoded bool 453 for { 454 // word = atom / quoted-string 455 var word string 456 p.skipSpace() 457 if p.empty() { 458 break 459 } 460 isEncoded := false 461 if p.peek() == '"' { 462 // quoted-string 463 word, err = p.consumeQuotedString() 464 } else { 465 // atom 466 // We actually parse dot-atom here to be more permissive 467 // than what RFC 5322 specifies. 468 word, err = p.consumeAtom(true, true) 469 if err == nil { 470 word, isEncoded, err = p.decodeRFC2047Word(word) 471 } 472 } 473 474 if err != nil { 475 break 476 } 477 debug.Printf("consumePhrase: consumed %q", word) 478 if isPrevEncoded && isEncoded { 479 words[len(words)-1] += word 480 } else { 481 words = append(words, word) 482 } 483 isPrevEncoded = isEncoded 484 } 485 // Ignore any error if we got at least one word. 486 if err != nil && len(words) == 0 { 487 debug.Printf("consumePhrase: hit err: %v", err) 488 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 489 } 490 phrase = strings.Join(words, " ") 491 return phrase, nil 492 } 493 494 // consumeQuotedString parses the quoted string at the start of p. 495 func (p *addrParser) consumeQuotedString() (qs string, err error) { 496 // Assume first byte is '"'. 497 i := 1 498 qsb := make([]rune, 0, 10) 499 500 escaped := false 501 502 Loop: 503 for { 504 r, size := utf8.DecodeRuneInString(p.s[i:]) 505 506 switch { 507 case size == 0: 508 return "", errors.New("mail: unclosed quoted-string") 509 510 case size == 1 && r == utf8.RuneError: 511 return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s) 512 513 case escaped: 514 // quoted-pair = ("\" (VCHAR / WSP)) 515 516 if !isVchar(r) && !isWSP(r) { 517 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 518 } 519 520 qsb = append(qsb, r) 521 escaped = false 522 523 case isQtext(r) || isWSP(r): 524 // qtext (printable US-ASCII excluding " and \), or 525 // FWS (almost; we're ignoring CRLF) 526 qsb = append(qsb, r) 527 528 case r == '"': 529 break Loop 530 531 case r == '\\': 532 escaped = true 533 534 default: 535 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 536 537 } 538 539 i += size 540 } 541 p.s = p.s[i+1:] 542 return string(qsb), nil 543 } 544 545 // consumeAtom parses an RFC 5322 atom at the start of p. 546 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 547 // If permissive is true, consumeAtom will not fail on: 548 // - leading/trailing/double dots in the atom (see golang.org/issue/4938) 549 // - special characters (RFC 5322 3.2.3) except '<', '>', ':' and '"' (see golang.org/issue/21018) 550 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { 551 i := 0 552 553 Loop: 554 for { 555 r, size := utf8.DecodeRuneInString(p.s[i:]) 556 switch { 557 case size == 1 && r == utf8.RuneError: 558 return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s) 559 560 case size == 0 || !isAtext(r, dot, permissive): 561 break Loop 562 563 default: 564 i += size 565 566 } 567 } 568 569 if i == 0 { 570 return "", errors.New("mail: invalid string") 571 } 572 atom, p.s = p.s[:i], p.s[i:] 573 if !permissive { 574 if strings.HasPrefix(atom, ".") { 575 return "", errors.New("mail: leading dot in atom") 576 } 577 if strings.Contains(atom, "..") { 578 return "", errors.New("mail: double dot in atom") 579 } 580 if strings.HasSuffix(atom, ".") { 581 return "", errors.New("mail: trailing dot in atom") 582 } 583 } 584 return atom, nil 585 } 586 587 func (p *addrParser) consumeDisplayNameComment() (string, error) { 588 if !p.consume('(') { 589 return "", errors.New("mail: comment does not start with (") 590 } 591 comment, ok := p.consumeComment() 592 if !ok { 593 return "", errors.New("mail: misformatted parenthetical comment") 594 } 595 596 // TODO(stapelberg): parse quoted-string within comment 597 words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' }) 598 for idx, word := range words { 599 decoded, isEncoded, err := p.decodeRFC2047Word(word) 600 if err != nil { 601 return "", err 602 } 603 if isEncoded { 604 words[idx] = decoded 605 } 606 } 607 608 return strings.Join(words, " "), nil 609 } 610 611 func (p *addrParser) consume(c byte) bool { 612 if p.empty() || p.peek() != c { 613 return false 614 } 615 p.s = p.s[1:] 616 return true 617 } 618 619 // skipSpace skips the leading space and tab characters. 620 func (p *addrParser) skipSpace() { 621 p.s = strings.TrimLeft(p.s, " \t") 622 } 623 624 func (p *addrParser) peek() byte { 625 return p.s[0] 626 } 627 628 func (p *addrParser) empty() bool { 629 return p.len() == 0 630 } 631 632 func (p *addrParser) len() int { 633 return len(p.s) 634 } 635 636 // skipCFWS skips CFWS as defined in RFC5322. 637 func (p *addrParser) skipCFWS() bool { 638 p.skipSpace() 639 640 for { 641 if !p.consume('(') { 642 break 643 } 644 645 if _, ok := p.consumeComment(); !ok { 646 return false 647 } 648 649 p.skipSpace() 650 } 651 652 return true 653 } 654 655 func (p *addrParser) consumeComment() (string, bool) { 656 // '(' already consumed. 657 depth := 1 658 659 var comment string 660 for { 661 if p.empty() || depth == 0 { 662 break 663 } 664 665 if p.peek() == '\\' && p.len() > 1 { 666 p.s = p.s[1:] 667 } else if p.peek() == '(' { 668 depth++ 669 } else if p.peek() == ')' { 670 depth-- 671 } 672 if depth > 0 { 673 comment += p.s[:1] 674 } 675 p.s = p.s[1:] 676 } 677 678 return comment, depth == 0 679 } 680 681 func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) { 682 if p.dec != nil { 683 word, err = p.dec.Decode(s) 684 } else { 685 word, err = rfc2047Decoder.Decode(s) 686 } 687 688 if err == nil { 689 return word, true, nil 690 } 691 692 if _, ok := err.(charsetError); ok { 693 return s, true, err 694 } 695 696 // Ignore invalid RFC 2047 encoded-word errors. 697 return s, false, nil 698 } 699 700 var rfc2047Decoder = mime.WordDecoder{ 701 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { 702 return nil, charsetError(charset) 703 }, 704 } 705 706 type charsetError string 707 708 func (e charsetError) Error() string { 709 return fmt.Sprintf("charset not supported: %q", string(e)) 710 } 711 712 // isAtext reports whether r is an RFC 5322 atext character. 713 // If dot is true, period is included. 714 // If permissive is true, RFC 5322 3.2.3 specials is included, 715 // except '<', '>', ':' and '"'. 716 func isAtext(r rune, dot, permissive bool) bool { 717 switch r { 718 case '.': 719 return dot 720 721 // RFC 5322 3.2.3. specials 722 case '(', ')', '[', ']', ';', '@', '\\', ',': 723 return permissive 724 725 case '<', '>', '"', ':': 726 return false 727 } 728 return isVchar(r) 729 } 730 731 // isQtext reports whether r is an RFC 5322 qtext character. 732 func isQtext(r rune) bool { 733 // Printable US-ASCII, excluding backslash or quote. 734 if r == '\\' || r == '"' { 735 return false 736 } 737 return isVchar(r) 738 } 739 740 // quoteString renders a string as an RFC 5322 quoted-string. 741 func quoteString(s string) string { 742 var buf strings.Builder 743 buf.WriteByte('"') 744 for _, r := range s { 745 if isQtext(r) || isWSP(r) { 746 buf.WriteRune(r) 747 } else if isVchar(r) { 748 buf.WriteByte('\\') 749 buf.WriteRune(r) 750 } 751 } 752 buf.WriteByte('"') 753 return buf.String() 754 } 755 756 // isVchar reports whether r is an RFC 5322 VCHAR character. 757 func isVchar(r rune) bool { 758 // Visible (printing) characters. 759 return '!' <= r && r <= '~' || isMultibyte(r) 760 } 761 762 // isMultibyte reports whether r is a multi-byte UTF-8 character 763 // as supported by RFC 6532 764 func isMultibyte(r rune) bool { 765 return r >= utf8.RuneSelf 766 } 767 768 // isWSP reports whether r is a WSP (white space). 769 // WSP is a space or horizontal tab (RFC 5234 Appendix B). 770 func isWSP(r rune) bool { 771 return r == ' ' || r == '\t' 772 }