github.com/panjjo/go@v0.0.0-20161104043856-d62b31386338/src/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322 and 9 extended by RFC 6532. 10 Notable divergences: 11 * Obsolete address formats are not parsed, including addresses with 12 embedded route information. 13 * Group addresses are not parsed. 14 * The full range of spacing (the CFWS syntax element) is not supported, 15 such as breaking addresses across lines. 16 * No unicode normalization is performed. 17 */ 18 package mail 19 20 import ( 21 "bufio" 22 "bytes" 23 "errors" 24 "fmt" 25 "io" 26 "log" 27 "mime" 28 "net/textproto" 29 "strings" 30 "time" 31 "unicode/utf8" 32 ) 33 34 var debug = debugT(false) 35 36 type debugT bool 37 38 func (d debugT) Printf(format string, args ...interface{}) { 39 if d { 40 log.Printf(format, args...) 41 } 42 } 43 44 // A Message represents a parsed mail message. 45 type Message struct { 46 Header Header 47 Body io.Reader 48 } 49 50 // ReadMessage reads a message from r. 51 // The headers are parsed, and the body of the message will be available 52 // for reading from r. 53 func ReadMessage(r io.Reader) (msg *Message, err error) { 54 tp := textproto.NewReader(bufio.NewReader(r)) 55 56 hdr, err := tp.ReadMIMEHeader() 57 if err != nil { 58 return nil, err 59 } 60 61 return &Message{ 62 Header: Header(hdr), 63 Body: tp.R, 64 }, nil 65 } 66 67 // Layouts suitable for passing to time.Parse. 68 // These are tried in order. 69 var dateLayouts []string 70 71 func init() { 72 // Generate layouts based on RFC 5322, section 3.3. 73 74 dows := [...]string{"", "Mon, "} // day-of-week 75 days := [...]string{"2", "02"} // day = 1*2DIGIT 76 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 77 seconds := [...]string{":05", ""} // second 78 // "-0700 (MST)" is not in RFC 5322, but is common. 79 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 80 81 for _, dow := range dows { 82 for _, day := range days { 83 for _, year := range years { 84 for _, second := range seconds { 85 for _, zone := range zones { 86 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 87 dateLayouts = append(dateLayouts, s) 88 } 89 } 90 } 91 } 92 } 93 } 94 95 // ParseDate parses an RFC 5322 date string. 96 func ParseDate(date string) (time.Time, error) { 97 for _, layout := range dateLayouts { 98 t, err := time.Parse(layout, date) 99 if err == nil { 100 return t, nil 101 } 102 } 103 return time.Time{}, errors.New("mail: header could not be parsed") 104 } 105 106 // A Header represents the key-value pairs in a mail message header. 107 type Header map[string][]string 108 109 // Get gets the first value associated with the given key. 110 // If there are no values associated with the key, Get returns "". 111 func (h Header) Get(key string) string { 112 return textproto.MIMEHeader(h).Get(key) 113 } 114 115 var ErrHeaderNotPresent = errors.New("mail: header not in message") 116 117 // Date parses the Date header field. 118 func (h Header) Date() (time.Time, error) { 119 hdr := h.Get("Date") 120 if hdr == "" { 121 return time.Time{}, ErrHeaderNotPresent 122 } 123 return ParseDate(hdr) 124 } 125 126 // AddressList parses the named header field as a list of addresses. 127 func (h Header) AddressList(key string) ([]*Address, error) { 128 hdr := h.Get(key) 129 if hdr == "" { 130 return nil, ErrHeaderNotPresent 131 } 132 return ParseAddressList(hdr) 133 } 134 135 // Address represents a single mail address. 136 // An address such as "Barry Gibbs <bg@example.com>" is represented 137 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 138 type Address struct { 139 Name string // Proper name; may be empty. 140 Address string // user@domain 141 } 142 143 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 144 func ParseAddress(address string) (*Address, error) { 145 return (&addrParser{s: address}).parseSingleAddress() 146 } 147 148 // ParseAddressList parses the given string as a list of addresses. 149 func ParseAddressList(list string) ([]*Address, error) { 150 return (&addrParser{s: list}).parseAddressList() 151 } 152 153 // An AddressParser is an RFC 5322 address parser. 154 type AddressParser struct { 155 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. 156 WordDecoder *mime.WordDecoder 157 } 158 159 // Parse parses a single RFC 5322 address of the 160 // form "Gogh Fir <gf@example.com>" or "foo@example.com". 161 func (p *AddressParser) Parse(address string) (*Address, error) { 162 return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() 163 } 164 165 // ParseList parses the given string as a list of comma-separated addresses 166 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com". 167 func (p *AddressParser) ParseList(list string) ([]*Address, error) { 168 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() 169 } 170 171 // String formats the address as a valid RFC 5322 address. 172 // If the address's name contains non-ASCII characters 173 // the name will be rendered according to RFC 2047. 174 func (a *Address) String() string { 175 // Format address local@domain 176 at := strings.LastIndex(a.Address, "@") 177 var local, domain string 178 if at < 0 { 179 // This is a malformed address ("@" is required in addr-spec); 180 // treat the whole address as local-part. 181 local = a.Address 182 } else { 183 local, domain = a.Address[:at], a.Address[at+1:] 184 } 185 186 // Add quotes if needed 187 quoteLocal := false 188 for i, r := range local { 189 if isAtext(r, false) { 190 continue 191 } 192 if r == '.' { 193 // Dots are okay if they are surrounded by atext. 194 // We only need to check that the previous byte is 195 // not a dot, and this isn't the end of the string. 196 if i > 0 && local[i-1] != '.' && i < len(local)-1 { 197 continue 198 } 199 } 200 quoteLocal = true 201 break 202 } 203 if quoteLocal { 204 local = quoteString(local) 205 206 } 207 208 s := "<" + local + "@" + domain + ">" 209 210 if a.Name == "" { 211 return s 212 } 213 214 // If every character is printable ASCII, quoting is simple. 215 allPrintable := true 216 for _, r := range a.Name { 217 // isWSP here should actually be isFWS, 218 // but we don't support folding yet. 219 if !isVchar(r) && !isWSP(r) || isMultibyte(r) { 220 allPrintable = false 221 break 222 } 223 } 224 if allPrintable { 225 return quoteString(a.Name) + " " + s 226 } 227 228 // Text in an encoded-word in a display-name must not contain certain 229 // characters like quotes or parentheses (see RFC 2047 section 5.3). 230 // When this is the case encode the name using base64 encoding. 231 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { 232 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s 233 } 234 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s 235 } 236 237 type addrParser struct { 238 s string 239 dec *mime.WordDecoder // may be nil 240 } 241 242 func (p *addrParser) parseAddressList() ([]*Address, error) { 243 var list []*Address 244 for { 245 p.skipSpace() 246 addr, err := p.parseAddress() 247 if err != nil { 248 return nil, err 249 } 250 list = append(list, addr) 251 252 p.skipSpace() 253 if p.empty() { 254 break 255 } 256 if !p.consume(',') { 257 return nil, errors.New("mail: expected comma") 258 } 259 } 260 return list, nil 261 } 262 263 func (p *addrParser) parseSingleAddress() (*Address, error) { 264 addr, err := p.parseAddress() 265 if err != nil { 266 return nil, err 267 } 268 p.skipSpace() 269 if !p.empty() { 270 return nil, fmt.Errorf("mail: expected single address, got %q", p.s) 271 } 272 return addr, nil 273 } 274 275 // parseAddress parses a single RFC 5322 address at the start of p. 276 func (p *addrParser) parseAddress() (addr *Address, err error) { 277 debug.Printf("parseAddress: %q", p.s) 278 p.skipSpace() 279 if p.empty() { 280 return nil, errors.New("mail: no address") 281 } 282 283 // address = name-addr / addr-spec 284 // TODO(dsymonds): Support parsing group address. 285 286 // addr-spec has a more restricted grammar than name-addr, 287 // so try parsing it first, and fallback to name-addr. 288 // TODO(dsymonds): Is this really correct? 289 spec, err := p.consumeAddrSpec() 290 if err == nil { 291 return &Address{ 292 Address: spec, 293 }, err 294 } 295 debug.Printf("parseAddress: not an addr-spec: %v", err) 296 debug.Printf("parseAddress: state is now %q", p.s) 297 298 // display-name 299 var displayName string 300 if p.peek() != '<' { 301 displayName, err = p.consumePhrase() 302 if err != nil { 303 return nil, err 304 } 305 } 306 debug.Printf("parseAddress: displayName=%q", displayName) 307 308 // angle-addr = "<" addr-spec ">" 309 p.skipSpace() 310 if !p.consume('<') { 311 return nil, errors.New("mail: no angle-addr") 312 } 313 spec, err = p.consumeAddrSpec() 314 if err != nil { 315 return nil, err 316 } 317 if !p.consume('>') { 318 return nil, errors.New("mail: unclosed angle-addr") 319 } 320 debug.Printf("parseAddress: spec=%q", spec) 321 322 return &Address{ 323 Name: displayName, 324 Address: spec, 325 }, nil 326 } 327 328 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 329 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 330 debug.Printf("consumeAddrSpec: %q", p.s) 331 332 orig := *p 333 defer func() { 334 if err != nil { 335 *p = orig 336 } 337 }() 338 339 // local-part = dot-atom / quoted-string 340 var localPart string 341 p.skipSpace() 342 if p.empty() { 343 return "", errors.New("mail: no addr-spec") 344 } 345 if p.peek() == '"' { 346 // quoted-string 347 debug.Printf("consumeAddrSpec: parsing quoted-string") 348 localPart, err = p.consumeQuotedString() 349 if localPart == "" { 350 err = errors.New("mail: empty quoted string in addr-spec") 351 } 352 } else { 353 // dot-atom 354 debug.Printf("consumeAddrSpec: parsing dot-atom") 355 localPart, err = p.consumeAtom(true, false) 356 } 357 if err != nil { 358 debug.Printf("consumeAddrSpec: failed: %v", err) 359 return "", err 360 } 361 362 if !p.consume('@') { 363 return "", errors.New("mail: missing @ in addr-spec") 364 } 365 366 // domain = dot-atom / domain-literal 367 var domain string 368 p.skipSpace() 369 if p.empty() { 370 return "", errors.New("mail: no domain in addr-spec") 371 } 372 // TODO(dsymonds): Handle domain-literal 373 domain, err = p.consumeAtom(true, false) 374 if err != nil { 375 return "", err 376 } 377 378 return localPart + "@" + domain, nil 379 } 380 381 // consumePhrase parses the RFC 5322 phrase at the start of p. 382 func (p *addrParser) consumePhrase() (phrase string, err error) { 383 debug.Printf("consumePhrase: [%s]", p.s) 384 // phrase = 1*word 385 var words []string 386 for { 387 // word = atom / quoted-string 388 var word string 389 p.skipSpace() 390 if p.empty() { 391 return "", errors.New("mail: missing phrase") 392 } 393 if p.peek() == '"' { 394 // quoted-string 395 word, err = p.consumeQuotedString() 396 } else { 397 // atom 398 // We actually parse dot-atom here to be more permissive 399 // than what RFC 5322 specifies. 400 word, err = p.consumeAtom(true, true) 401 if err == nil { 402 word, err = p.decodeRFC2047Word(word) 403 } 404 } 405 406 if err != nil { 407 break 408 } 409 debug.Printf("consumePhrase: consumed %q", word) 410 words = append(words, word) 411 } 412 // Ignore any error if we got at least one word. 413 if err != nil && len(words) == 0 { 414 debug.Printf("consumePhrase: hit err: %v", err) 415 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 416 } 417 phrase = strings.Join(words, " ") 418 return phrase, nil 419 } 420 421 // consumeQuotedString parses the quoted string at the start of p. 422 func (p *addrParser) consumeQuotedString() (qs string, err error) { 423 // Assume first byte is '"'. 424 i := 1 425 qsb := make([]rune, 0, 10) 426 427 escaped := false 428 429 Loop: 430 for { 431 r, size := utf8.DecodeRuneInString(p.s[i:]) 432 433 switch { 434 case size == 0: 435 return "", errors.New("mail: unclosed quoted-string") 436 437 case size == 1 && r == utf8.RuneError: 438 return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s) 439 440 case escaped: 441 // quoted-pair = ("\" (VCHAR / WSP)) 442 443 if !isVchar(r) && !isWSP(r) { 444 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 445 } 446 447 qsb = append(qsb, r) 448 escaped = false 449 450 case isQtext(r) || isWSP(r): 451 // qtext (printable US-ASCII excluding " and \), or 452 // FWS (almost; we're ignoring CRLF) 453 qsb = append(qsb, r) 454 455 case r == '"': 456 break Loop 457 458 case r == '\\': 459 escaped = true 460 461 default: 462 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 463 464 } 465 466 i += size 467 } 468 p.s = p.s[i+1:] 469 return string(qsb), nil 470 } 471 472 // consumeAtom parses an RFC 5322 atom at the start of p. 473 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 474 // If permissive is true, consumeAtom will not fail on 475 // leading/trailing/double dots in the atom (see golang.org/issue/4938). 476 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { 477 i := 0 478 479 Loop: 480 for { 481 r, size := utf8.DecodeRuneInString(p.s[i:]) 482 483 switch { 484 case size == 1 && r == utf8.RuneError: 485 return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s) 486 487 case size == 0 || !isAtext(r, dot): 488 break Loop 489 490 default: 491 i += size 492 493 } 494 } 495 496 if i == 0 { 497 return "", errors.New("mail: invalid string") 498 } 499 atom, p.s = p.s[:i], p.s[i:] 500 if !permissive { 501 if strings.HasPrefix(atom, ".") { 502 return "", errors.New("mail: leading dot in atom") 503 } 504 if strings.Contains(atom, "..") { 505 return "", errors.New("mail: double dot in atom") 506 } 507 if strings.HasSuffix(atom, ".") { 508 return "", errors.New("mail: trailing dot in atom") 509 } 510 } 511 return atom, nil 512 } 513 514 func (p *addrParser) consume(c byte) bool { 515 if p.empty() || p.peek() != c { 516 return false 517 } 518 p.s = p.s[1:] 519 return true 520 } 521 522 // skipSpace skips the leading space and tab characters. 523 func (p *addrParser) skipSpace() { 524 p.s = strings.TrimLeft(p.s, " \t") 525 } 526 527 func (p *addrParser) peek() byte { 528 return p.s[0] 529 } 530 531 func (p *addrParser) empty() bool { 532 return p.len() == 0 533 } 534 535 func (p *addrParser) len() int { 536 return len(p.s) 537 } 538 539 func (p *addrParser) decodeRFC2047Word(s string) (string, error) { 540 if p.dec != nil { 541 return p.dec.DecodeHeader(s) 542 } 543 544 dec, err := rfc2047Decoder.Decode(s) 545 if err == nil { 546 return dec, nil 547 } 548 549 if _, ok := err.(charsetError); ok { 550 return s, err 551 } 552 553 // Ignore invalid RFC 2047 encoded-word errors. 554 return s, nil 555 } 556 557 var rfc2047Decoder = mime.WordDecoder{ 558 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { 559 return nil, charsetError(charset) 560 }, 561 } 562 563 type charsetError string 564 565 func (e charsetError) Error() string { 566 return fmt.Sprintf("charset not supported: %q", string(e)) 567 } 568 569 // isAtext reports whether r is an RFC 5322 atext character. 570 // If dot is true, period is included. 571 func isAtext(r rune, dot bool) bool { 572 switch r { 573 case '.': 574 return dot 575 576 case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials 577 return false 578 } 579 return isVchar(r) 580 } 581 582 // isQtext reports whether r is an RFC 5322 qtext character. 583 func isQtext(r rune) bool { 584 // Printable US-ASCII, excluding backslash or quote. 585 if r == '\\' || r == '"' { 586 return false 587 } 588 return isVchar(r) 589 } 590 591 // quoteString renders a string as an RFC 5322 quoted-string. 592 func quoteString(s string) string { 593 var buf bytes.Buffer 594 buf.WriteByte('"') 595 for _, r := range s { 596 if isQtext(r) || isWSP(r) { 597 buf.WriteRune(r) 598 } else if isVchar(r) { 599 buf.WriteByte('\\') 600 buf.WriteRune(r) 601 } 602 } 603 buf.WriteByte('"') 604 return buf.String() 605 } 606 607 // isVchar reports whether r is an RFC 5322 VCHAR character. 608 func isVchar(r rune) bool { 609 // Visible (printing) characters. 610 return '!' <= r && r <= '~' || isMultibyte(r) 611 } 612 613 // isMultibyte reports whether r is a multi-byte UTF-8 character 614 // as supported by RFC 6532 615 func isMultibyte(r rune) bool { 616 return r >= utf8.RuneSelf 617 } 618 619 // isWSP reports whether r is a WSP (white space). 620 // WSP is a space or horizontal tab (RFC 5234 Appendix B). 621 func isWSP(r rune) bool { 622 return r == ' ' || r == '\t' 623 }