github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322 and 9 extended by RFC 6532. 10 Notable divergences: 11 * Obsolete address formats are not parsed, including addresses with 12 embedded route information. 13 * Group addresses are not parsed. 14 * The full range of spacing (the CFWS syntax element) is not supported, 15 such as breaking addresses across lines. 16 * No unicode normalization is performed. 17 * Address with some RFC 5322 3.2.3 specials without quotes are parsed. 18 */ 19 package mail 20 21 import ( 22 "bufio" 23 "bytes" 24 "errors" 25 "fmt" 26 "io" 27 "log" 28 "mime" 29 "net/textproto" 30 "strings" 31 "time" 32 "unicode/utf8" 33 ) 34 35 var debug = debugT(false) 36 37 type debugT bool 38 39 func (d debugT) Printf(format string, args ...interface{}) { 40 if d { 41 log.Printf(format, args...) 42 } 43 } 44 45 // A Message represents a parsed mail message. 46 type Message struct { 47 Header Header 48 Body io.Reader 49 } 50 51 // ReadMessage reads a message from r. 52 // The headers are parsed, and the body of the message will be available 53 // for reading from msg.Body. 54 func ReadMessage(r io.Reader) (msg *Message, err error) { 55 tp := textproto.NewReader(bufio.NewReader(r)) 56 57 hdr, err := tp.ReadMIMEHeader() 58 if err != nil { 59 return nil, err 60 } 61 62 return &Message{ 63 Header: Header(hdr), 64 Body: tp.R, 65 }, nil 66 } 67 68 // Layouts suitable for passing to time.Parse. 69 // These are tried in order. 70 var dateLayouts []string 71 72 func init() { 73 // Generate layouts based on RFC 5322, section 3.3. 74 75 dows := [...]string{"", "Mon, "} // day-of-week 76 days := [...]string{"2", "02"} // day = 1*2DIGIT 77 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 78 seconds := [...]string{":05", ""} // second 79 // "-0700 (MST)" is not in RFC 5322, but is common. 80 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 81 82 for _, dow := range dows { 83 for _, day := range days { 84 for _, year := range years { 85 for _, second := range seconds { 86 for _, zone := range zones { 87 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 88 dateLayouts = append(dateLayouts, s) 89 } 90 } 91 } 92 } 93 } 94 } 95 96 // ParseDate parses an RFC 5322 date string. 97 func ParseDate(date string) (time.Time, error) { 98 for _, layout := range dateLayouts { 99 t, err := time.Parse(layout, date) 100 if err == nil { 101 return t, nil 102 } 103 } 104 return time.Time{}, errors.New("mail: header could not be parsed") 105 } 106 107 // A Header represents the key-value pairs in a mail message header. 108 type Header map[string][]string 109 110 // Get gets the first value associated with the given key. 111 // It is case insensitive; CanonicalMIMEHeaderKey is used 112 // to canonicalize the provided key. 113 // If there are no values associated with the key, Get returns "". 114 // To access multiple values of a key, or to use non-canonical keys, 115 // access the map directly. 116 func (h Header) Get(key string) string { 117 return textproto.MIMEHeader(h).Get(key) 118 } 119 120 var ErrHeaderNotPresent = errors.New("mail: header not in message") 121 122 // Date parses the Date header field. 123 func (h Header) Date() (time.Time, error) { 124 hdr := h.Get("Date") 125 if hdr == "" { 126 return time.Time{}, ErrHeaderNotPresent 127 } 128 return ParseDate(hdr) 129 } 130 131 // AddressList parses the named header field as a list of addresses. 132 func (h Header) AddressList(key string) ([]*Address, error) { 133 hdr := h.Get(key) 134 if hdr == "" { 135 return nil, ErrHeaderNotPresent 136 } 137 return ParseAddressList(hdr) 138 } 139 140 // Address represents a single mail address. 141 // An address such as "Barry Gibbs <bg@example.com>" is represented 142 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 143 type Address struct { 144 Name string // Proper name; may be empty. 145 Address string // user@domain 146 } 147 148 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 149 func ParseAddress(address string) (*Address, error) { 150 return (&addrParser{s: address}).parseSingleAddress() 151 } 152 153 // ParseAddressList parses the given string as a list of addresses. 154 func ParseAddressList(list string) ([]*Address, error) { 155 return (&addrParser{s: list}).parseAddressList() 156 } 157 158 // An AddressParser is an RFC 5322 address parser. 159 type AddressParser struct { 160 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. 161 WordDecoder *mime.WordDecoder 162 } 163 164 // Parse parses a single RFC 5322 address of the 165 // form "Gogh Fir <gf@example.com>" or "foo@example.com". 166 func (p *AddressParser) Parse(address string) (*Address, error) { 167 return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() 168 } 169 170 // ParseList parses the given string as a list of comma-separated addresses 171 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com". 172 func (p *AddressParser) ParseList(list string) ([]*Address, error) { 173 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() 174 } 175 176 // String formats the address as a valid RFC 5322 address. 177 // If the address's name contains non-ASCII characters 178 // the name will be rendered according to RFC 2047. 179 func (a *Address) String() string { 180 // Format address local@domain 181 at := strings.LastIndex(a.Address, "@") 182 var local, domain string 183 if at < 0 { 184 // This is a malformed address ("@" is required in addr-spec); 185 // treat the whole address as local-part. 186 local = a.Address 187 } else { 188 local, domain = a.Address[:at], a.Address[at+1:] 189 } 190 191 // Add quotes if needed 192 quoteLocal := false 193 for i, r := range local { 194 if isAtext(r, false, false) { 195 continue 196 } 197 if r == '.' { 198 // Dots are okay if they are surrounded by atext. 199 // We only need to check that the previous byte is 200 // not a dot, and this isn't the end of the string. 201 if i > 0 && local[i-1] != '.' && i < len(local)-1 { 202 continue 203 } 204 } 205 quoteLocal = true 206 break 207 } 208 if quoteLocal { 209 local = quoteString(local) 210 211 } 212 213 s := "<" + local + "@" + domain + ">" 214 215 if a.Name == "" { 216 return s 217 } 218 219 // If every character is printable ASCII, quoting is simple. 220 allPrintable := true 221 for _, r := range a.Name { 222 // isWSP here should actually be isFWS, 223 // but we don't support folding yet. 224 if !isVchar(r) && !isWSP(r) || isMultibyte(r) { 225 allPrintable = false 226 break 227 } 228 } 229 if allPrintable { 230 return quoteString(a.Name) + " " + s 231 } 232 233 // Text in an encoded-word in a display-name must not contain certain 234 // characters like quotes or parentheses (see RFC 2047 section 5.3). 235 // When this is the case encode the name using base64 encoding. 236 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { 237 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s 238 } 239 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s 240 } 241 242 type addrParser struct { 243 s string 244 dec *mime.WordDecoder // may be nil 245 } 246 247 func (p *addrParser) parseAddressList() ([]*Address, error) { 248 var list []*Address 249 for { 250 p.skipSpace() 251 addr, err := p.parseAddress() 252 if err != nil { 253 return nil, err 254 } 255 list = append(list, addr) 256 257 p.skipSpace() 258 if p.empty() { 259 break 260 } 261 if !p.consume(',') { 262 return nil, errors.New("mail: expected comma") 263 } 264 } 265 return list, nil 266 } 267 268 func (p *addrParser) parseSingleAddress() (*Address, error) { 269 addr, err := p.parseAddress() 270 if err != nil { 271 return nil, err 272 } 273 p.skipSpace() 274 if !p.empty() { 275 return nil, fmt.Errorf("mail: expected single address, got %q", p.s) 276 } 277 return addr, nil 278 } 279 280 // parseAddress parses a single RFC 5322 address at the start of p. 281 func (p *addrParser) parseAddress() (addr *Address, err error) { 282 debug.Printf("parseAddress: %q", p.s) 283 p.skipSpace() 284 if p.empty() { 285 return nil, errors.New("mail: no address") 286 } 287 288 // address = name-addr / addr-spec 289 // TODO(dsymonds): Support parsing group address. 290 291 // addr-spec has a more restricted grammar than name-addr, 292 // so try parsing it first, and fallback to name-addr. 293 // TODO(dsymonds): Is this really correct? 294 spec, err := p.consumeAddrSpec() 295 if err == nil { 296 return &Address{ 297 Address: spec, 298 }, err 299 } 300 debug.Printf("parseAddress: not an addr-spec: %v", err) 301 debug.Printf("parseAddress: state is now %q", p.s) 302 303 // display-name 304 var displayName string 305 if p.peek() != '<' { 306 displayName, err = p.consumePhrase() 307 if err != nil { 308 return nil, err 309 } 310 } 311 debug.Printf("parseAddress: displayName=%q", displayName) 312 313 // angle-addr = "<" addr-spec ">" 314 p.skipSpace() 315 if !p.consume('<') { 316 return nil, errors.New("mail: no angle-addr") 317 } 318 spec, err = p.consumeAddrSpec() 319 if err != nil { 320 return nil, err 321 } 322 if !p.consume('>') { 323 return nil, errors.New("mail: unclosed angle-addr") 324 } 325 debug.Printf("parseAddress: spec=%q", spec) 326 327 return &Address{ 328 Name: displayName, 329 Address: spec, 330 }, nil 331 } 332 333 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 334 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 335 debug.Printf("consumeAddrSpec: %q", p.s) 336 337 orig := *p 338 defer func() { 339 if err != nil { 340 *p = orig 341 } 342 }() 343 344 // local-part = dot-atom / quoted-string 345 var localPart string 346 p.skipSpace() 347 if p.empty() { 348 return "", errors.New("mail: no addr-spec") 349 } 350 if p.peek() == '"' { 351 // quoted-string 352 debug.Printf("consumeAddrSpec: parsing quoted-string") 353 localPart, err = p.consumeQuotedString() 354 if localPart == "" { 355 err = errors.New("mail: empty quoted string in addr-spec") 356 } 357 } else { 358 // dot-atom 359 debug.Printf("consumeAddrSpec: parsing dot-atom") 360 localPart, err = p.consumeAtom(true, false) 361 } 362 if err != nil { 363 debug.Printf("consumeAddrSpec: failed: %v", err) 364 return "", err 365 } 366 367 if !p.consume('@') { 368 return "", errors.New("mail: missing @ in addr-spec") 369 } 370 371 // domain = dot-atom / domain-literal 372 var domain string 373 p.skipSpace() 374 if p.empty() { 375 return "", errors.New("mail: no domain in addr-spec") 376 } 377 // TODO(dsymonds): Handle domain-literal 378 domain, err = p.consumeAtom(true, false) 379 if err != nil { 380 return "", err 381 } 382 383 return localPart + "@" + domain, nil 384 } 385 386 // consumePhrase parses the RFC 5322 phrase at the start of p. 387 func (p *addrParser) consumePhrase() (phrase string, err error) { 388 debug.Printf("consumePhrase: [%s]", p.s) 389 // phrase = 1*word 390 var words []string 391 var isPrevEncoded bool 392 for { 393 // word = atom / quoted-string 394 var word string 395 p.skipSpace() 396 if p.empty() { 397 break 398 } 399 isEncoded := false 400 if p.peek() == '"' { 401 // quoted-string 402 word, err = p.consumeQuotedString() 403 } else { 404 // atom 405 // We actually parse dot-atom here to be more permissive 406 // than what RFC 5322 specifies. 407 word, err = p.consumeAtom(true, true) 408 if err == nil { 409 word, isEncoded, err = p.decodeRFC2047Word(word) 410 } 411 } 412 413 if err != nil { 414 break 415 } 416 debug.Printf("consumePhrase: consumed %q", word) 417 if isPrevEncoded && isEncoded { 418 words[len(words)-1] += word 419 } else { 420 words = append(words, word) 421 } 422 isPrevEncoded = isEncoded 423 } 424 // Ignore any error if we got at least one word. 425 if err != nil && len(words) == 0 { 426 debug.Printf("consumePhrase: hit err: %v", err) 427 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 428 } 429 phrase = strings.Join(words, " ") 430 return phrase, nil 431 } 432 433 // consumeQuotedString parses the quoted string at the start of p. 434 func (p *addrParser) consumeQuotedString() (qs string, err error) { 435 // Assume first byte is '"'. 436 i := 1 437 qsb := make([]rune, 0, 10) 438 439 escaped := false 440 441 Loop: 442 for { 443 r, size := utf8.DecodeRuneInString(p.s[i:]) 444 445 switch { 446 case size == 0: 447 return "", errors.New("mail: unclosed quoted-string") 448 449 case size == 1 && r == utf8.RuneError: 450 return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s) 451 452 case escaped: 453 // quoted-pair = ("\" (VCHAR / WSP)) 454 455 if !isVchar(r) && !isWSP(r) { 456 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 457 } 458 459 qsb = append(qsb, r) 460 escaped = false 461 462 case isQtext(r) || isWSP(r): 463 // qtext (printable US-ASCII excluding " and \), or 464 // FWS (almost; we're ignoring CRLF) 465 qsb = append(qsb, r) 466 467 case r == '"': 468 break Loop 469 470 case r == '\\': 471 escaped = true 472 473 default: 474 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 475 476 } 477 478 i += size 479 } 480 p.s = p.s[i+1:] 481 return string(qsb), nil 482 } 483 484 // consumeAtom parses an RFC 5322 atom at the start of p. 485 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 486 // If permissive is true, consumeAtom will not fail on: 487 // - leading/trailing/double dots in the atom (see golang.org/issue/4938) 488 // - special characters (RFC 5322 3.2.3) except '<', '>' and '"' (see golang.org/issue/21018) 489 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { 490 i := 0 491 492 Loop: 493 for { 494 r, size := utf8.DecodeRuneInString(p.s[i:]) 495 switch { 496 case size == 1 && r == utf8.RuneError: 497 return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s) 498 499 case size == 0 || !isAtext(r, dot, permissive): 500 break Loop 501 502 default: 503 i += size 504 505 } 506 } 507 508 if i == 0 { 509 return "", errors.New("mail: invalid string") 510 } 511 atom, p.s = p.s[:i], p.s[i:] 512 if !permissive { 513 if strings.HasPrefix(atom, ".") { 514 return "", errors.New("mail: leading dot in atom") 515 } 516 if strings.Contains(atom, "..") { 517 return "", errors.New("mail: double dot in atom") 518 } 519 if strings.HasSuffix(atom, ".") { 520 return "", errors.New("mail: trailing dot in atom") 521 } 522 } 523 return atom, nil 524 } 525 526 func (p *addrParser) consume(c byte) bool { 527 if p.empty() || p.peek() != c { 528 return false 529 } 530 p.s = p.s[1:] 531 return true 532 } 533 534 // skipSpace skips the leading space and tab characters. 535 func (p *addrParser) skipSpace() { 536 p.s = strings.TrimLeft(p.s, " \t") 537 } 538 539 func (p *addrParser) peek() byte { 540 return p.s[0] 541 } 542 543 func (p *addrParser) empty() bool { 544 return p.len() == 0 545 } 546 547 func (p *addrParser) len() int { 548 return len(p.s) 549 } 550 551 func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) { 552 if p.dec != nil { 553 word, err = p.dec.Decode(s) 554 } else { 555 word, err = rfc2047Decoder.Decode(s) 556 } 557 558 if err == nil { 559 return word, true, nil 560 } 561 562 if _, ok := err.(charsetError); ok { 563 return s, true, err 564 } 565 566 // Ignore invalid RFC 2047 encoded-word errors. 567 return s, false, nil 568 } 569 570 var rfc2047Decoder = mime.WordDecoder{ 571 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { 572 return nil, charsetError(charset) 573 }, 574 } 575 576 type charsetError string 577 578 func (e charsetError) Error() string { 579 return fmt.Sprintf("charset not supported: %q", string(e)) 580 } 581 582 // isAtext reports whether r is an RFC 5322 atext character. 583 // If dot is true, period is included. 584 // If permissive is true, RFC 5322 3.2.3 specials is included, 585 // except '<', '>' and '"'. 586 func isAtext(r rune, dot, permissive bool) bool { 587 switch r { 588 case '.': 589 return dot 590 591 // RFC 5322 3.2.3. specials 592 case '(', ')', '[', ']', ':', ';', '@', '\\', ',': 593 return permissive 594 595 case '<', '>', '"': 596 return false 597 } 598 return isVchar(r) 599 } 600 601 // isQtext reports whether r is an RFC 5322 qtext character. 602 func isQtext(r rune) bool { 603 // Printable US-ASCII, excluding backslash or quote. 604 if r == '\\' || r == '"' { 605 return false 606 } 607 return isVchar(r) 608 } 609 610 // quoteString renders a string as an RFC 5322 quoted-string. 611 func quoteString(s string) string { 612 var buf bytes.Buffer 613 buf.WriteByte('"') 614 for _, r := range s { 615 if isQtext(r) || isWSP(r) { 616 buf.WriteRune(r) 617 } else if isVchar(r) { 618 buf.WriteByte('\\') 619 buf.WriteRune(r) 620 } 621 } 622 buf.WriteByte('"') 623 return buf.String() 624 } 625 626 // isVchar reports whether r is an RFC 5322 VCHAR character. 627 func isVchar(r rune) bool { 628 // Visible (printing) characters. 629 return '!' <= r && r <= '~' || isMultibyte(r) 630 } 631 632 // isMultibyte reports whether r is a multi-byte UTF-8 character 633 // as supported by RFC 6532 634 func isMultibyte(r rune) bool { 635 return r >= utf8.RuneSelf 636 } 637 638 // isWSP reports whether r is a WSP (white space). 639 // WSP is a space or horizontal tab (RFC 5234 Appendix B). 640 func isWSP(r rune) bool { 641 return r == ' ' || r == '\t' 642 }