github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322 and 9 extended by RFC 6532. 10 Notable divergences: 11 * Obsolete address formats are not parsed, including addresses with 12 embedded route information. 13 * Group addresses are not parsed. 14 * The full range of spacing (the CFWS syntax element) is not supported, 15 such as breaking addresses across lines. 16 * No unicode normalization is performed. 17 */ 18 package mail 19 20 import ( 21 "bufio" 22 "bytes" 23 "errors" 24 "fmt" 25 "io" 26 "log" 27 "mime" 28 "net/textproto" 29 "strings" 30 "time" 31 "unicode/utf8" 32 ) 33 34 var debug = debugT(false) 35 36 type debugT bool 37 38 func (d debugT) Printf(format string, args ...interface{}) { 39 if d { 40 log.Printf(format, args...) 41 } 42 } 43 44 // A Message represents a parsed mail message. 45 type Message struct { 46 Header Header 47 Body io.Reader 48 } 49 50 // ReadMessage reads a message from r. 51 // The headers are parsed, and the body of the message will be available 52 // for reading from r. 53 func ReadMessage(r io.Reader) (msg *Message, err error) { 54 tp := textproto.NewReader(bufio.NewReader(r)) 55 56 hdr, err := tp.ReadMIMEHeader() 57 if err != nil { 58 return nil, err 59 } 60 61 return &Message{ 62 Header: Header(hdr), 63 Body: tp.R, 64 }, nil 65 } 66 67 // Layouts suitable for passing to time.Parse. 68 // These are tried in order. 69 var dateLayouts []string 70 71 func init() { 72 // Generate layouts based on RFC 5322, section 3.3. 73 74 dows := [...]string{"", "Mon, "} // day-of-week 75 days := [...]string{"2", "02"} // day = 1*2DIGIT 76 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 77 seconds := [...]string{":05", ""} // second 78 // "-0700 (MST)" is not in RFC 5322, but is common. 79 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 80 81 for _, dow := range dows { 82 for _, day := range days { 83 for _, year := range years { 84 for _, second := range seconds { 85 for _, zone := range zones { 86 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 87 dateLayouts = append(dateLayouts, s) 88 } 89 } 90 } 91 } 92 } 93 } 94 95 func parseDate(date string) (time.Time, error) { 96 for _, layout := range dateLayouts { 97 t, err := time.Parse(layout, date) 98 if err == nil { 99 return t, nil 100 } 101 } 102 return time.Time{}, errors.New("mail: header could not be parsed") 103 } 104 105 // A Header represents the key-value pairs in a mail message header. 106 type Header map[string][]string 107 108 // Get gets the first value associated with the given key. 109 // If there are no values associated with the key, Get returns "". 110 func (h Header) Get(key string) string { 111 return textproto.MIMEHeader(h).Get(key) 112 } 113 114 var ErrHeaderNotPresent = errors.New("mail: header not in message") 115 116 // Date parses the Date header field. 117 func (h Header) Date() (time.Time, error) { 118 hdr := h.Get("Date") 119 if hdr == "" { 120 return time.Time{}, ErrHeaderNotPresent 121 } 122 return parseDate(hdr) 123 } 124 125 // AddressList parses the named header field as a list of addresses. 126 func (h Header) AddressList(key string) ([]*Address, error) { 127 hdr := h.Get(key) 128 if hdr == "" { 129 return nil, ErrHeaderNotPresent 130 } 131 return ParseAddressList(hdr) 132 } 133 134 // Address represents a single mail address. 135 // An address such as "Barry Gibbs <bg@example.com>" is represented 136 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 137 type Address struct { 138 Name string // Proper name; may be empty. 139 Address string // user@domain 140 } 141 142 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 143 func ParseAddress(address string) (*Address, error) { 144 return (&addrParser{s: address}).parseSingleAddress() 145 } 146 147 // ParseAddressList parses the given string as a list of addresses. 148 func ParseAddressList(list string) ([]*Address, error) { 149 return (&addrParser{s: list}).parseAddressList() 150 } 151 152 // An AddressParser is an RFC 5322 address parser. 153 type AddressParser struct { 154 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. 155 WordDecoder *mime.WordDecoder 156 } 157 158 // Parse parses a single RFC 5322 address of the 159 // form "Gogh Fir <gf@example.com>" or "foo@example.com". 160 func (p *AddressParser) Parse(address string) (*Address, error) { 161 return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() 162 } 163 164 // ParseList parses the given string as a list of comma-separated addresses 165 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com". 166 func (p *AddressParser) ParseList(list string) ([]*Address, error) { 167 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() 168 } 169 170 // String formats the address as a valid RFC 5322 address. 171 // If the address's name contains non-ASCII characters 172 // the name will be rendered according to RFC 2047. 173 func (a *Address) String() string { 174 // Format address local@domain 175 at := strings.LastIndex(a.Address, "@") 176 var local, domain string 177 if at < 0 { 178 // This is a malformed address ("@" is required in addr-spec); 179 // treat the whole address as local-part. 180 local = a.Address 181 } else { 182 local, domain = a.Address[:at], a.Address[at+1:] 183 } 184 185 // Add quotes if needed 186 quoteLocal := false 187 for i, r := range local { 188 if isAtext(r, false) { 189 continue 190 } 191 if r == '.' { 192 // Dots are okay if they are surrounded by atext. 193 // We only need to check that the previous byte is 194 // not a dot, and this isn't the end of the string. 195 if i > 0 && local[i-1] != '.' && i < len(local)-1 { 196 continue 197 } 198 } 199 quoteLocal = true 200 break 201 } 202 if quoteLocal { 203 local = quoteString(local) 204 205 } 206 207 s := "<" + local + "@" + domain + ">" 208 209 if a.Name == "" { 210 return s 211 } 212 213 // If every character is printable ASCII, quoting is simple. 214 allPrintable := true 215 for _, r := range a.Name { 216 // isWSP here should actually be isFWS, 217 // but we don't support folding yet. 218 if !isVchar(r) && !isWSP(r) || isMultibyte(r) { 219 allPrintable = false 220 break 221 } 222 } 223 if allPrintable { 224 return quoteString(a.Name) + " " + s 225 } 226 227 // Text in an encoded-word in a display-name must not contain certain 228 // characters like quotes or parentheses (see RFC 2047 section 5.3). 229 // When this is the case encode the name using base64 encoding. 230 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { 231 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s 232 } 233 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s 234 } 235 236 type addrParser struct { 237 s string 238 dec *mime.WordDecoder // may be nil 239 } 240 241 func (p *addrParser) parseAddressList() ([]*Address, error) { 242 var list []*Address 243 for { 244 p.skipSpace() 245 addr, err := p.parseAddress() 246 if err != nil { 247 return nil, err 248 } 249 list = append(list, addr) 250 251 p.skipSpace() 252 if p.empty() { 253 break 254 } 255 if !p.consume(',') { 256 return nil, errors.New("mail: expected comma") 257 } 258 } 259 return list, nil 260 } 261 262 func (p *addrParser) parseSingleAddress() (*Address, error) { 263 addr, err := p.parseAddress() 264 if err != nil { 265 return nil, err 266 } 267 p.skipSpace() 268 if !p.empty() { 269 return nil, fmt.Errorf("mail: expected single address, got %q", p.s) 270 } 271 return addr, nil 272 } 273 274 // parseAddress parses a single RFC 5322 address at the start of p. 275 func (p *addrParser) parseAddress() (addr *Address, err error) { 276 debug.Printf("parseAddress: %q", p.s) 277 p.skipSpace() 278 if p.empty() { 279 return nil, errors.New("mail: no address") 280 } 281 282 // address = name-addr / addr-spec 283 // TODO(dsymonds): Support parsing group address. 284 285 // addr-spec has a more restricted grammar than name-addr, 286 // so try parsing it first, and fallback to name-addr. 287 // TODO(dsymonds): Is this really correct? 288 spec, err := p.consumeAddrSpec() 289 if err == nil { 290 return &Address{ 291 Address: spec, 292 }, err 293 } 294 debug.Printf("parseAddress: not an addr-spec: %v", err) 295 debug.Printf("parseAddress: state is now %q", p.s) 296 297 // display-name 298 var displayName string 299 if p.peek() != '<' { 300 displayName, err = p.consumePhrase() 301 if err != nil { 302 return nil, err 303 } 304 } 305 debug.Printf("parseAddress: displayName=%q", displayName) 306 307 // angle-addr = "<" addr-spec ">" 308 p.skipSpace() 309 if !p.consume('<') { 310 return nil, errors.New("mail: no angle-addr") 311 } 312 spec, err = p.consumeAddrSpec() 313 if err != nil { 314 return nil, err 315 } 316 if !p.consume('>') { 317 return nil, errors.New("mail: unclosed angle-addr") 318 } 319 debug.Printf("parseAddress: spec=%q", spec) 320 321 return &Address{ 322 Name: displayName, 323 Address: spec, 324 }, nil 325 } 326 327 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 328 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 329 debug.Printf("consumeAddrSpec: %q", p.s) 330 331 orig := *p 332 defer func() { 333 if err != nil { 334 *p = orig 335 } 336 }() 337 338 // local-part = dot-atom / quoted-string 339 var localPart string 340 p.skipSpace() 341 if p.empty() { 342 return "", errors.New("mail: no addr-spec") 343 } 344 if p.peek() == '"' { 345 // quoted-string 346 debug.Printf("consumeAddrSpec: parsing quoted-string") 347 localPart, err = p.consumeQuotedString() 348 } else { 349 // dot-atom 350 debug.Printf("consumeAddrSpec: parsing dot-atom") 351 localPart, err = p.consumeAtom(true, false) 352 } 353 if err != nil { 354 debug.Printf("consumeAddrSpec: failed: %v", err) 355 return "", err 356 } 357 358 if !p.consume('@') { 359 return "", errors.New("mail: missing @ in addr-spec") 360 } 361 362 // domain = dot-atom / domain-literal 363 var domain string 364 p.skipSpace() 365 if p.empty() { 366 return "", errors.New("mail: no domain in addr-spec") 367 } 368 // TODO(dsymonds): Handle domain-literal 369 domain, err = p.consumeAtom(true, false) 370 if err != nil { 371 return "", err 372 } 373 374 return localPart + "@" + domain, nil 375 } 376 377 // consumePhrase parses the RFC 5322 phrase at the start of p. 378 func (p *addrParser) consumePhrase() (phrase string, err error) { 379 debug.Printf("consumePhrase: [%s]", p.s) 380 // phrase = 1*word 381 var words []string 382 for { 383 // word = atom / quoted-string 384 var word string 385 p.skipSpace() 386 if p.empty() { 387 return "", errors.New("mail: missing phrase") 388 } 389 if p.peek() == '"' { 390 // quoted-string 391 word, err = p.consumeQuotedString() 392 } else { 393 // atom 394 // We actually parse dot-atom here to be more permissive 395 // than what RFC 5322 specifies. 396 word, err = p.consumeAtom(true, true) 397 if err == nil { 398 word, err = p.decodeRFC2047Word(word) 399 } 400 } 401 402 if err != nil { 403 break 404 } 405 debug.Printf("consumePhrase: consumed %q", word) 406 words = append(words, word) 407 } 408 // Ignore any error if we got at least one word. 409 if err != nil && len(words) == 0 { 410 debug.Printf("consumePhrase: hit err: %v", err) 411 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 412 } 413 phrase = strings.Join(words, " ") 414 return phrase, nil 415 } 416 417 // consumeQuotedString parses the quoted string at the start of p. 418 func (p *addrParser) consumeQuotedString() (qs string, err error) { 419 // Assume first byte is '"'. 420 i := 1 421 qsb := make([]rune, 0, 10) 422 423 escaped := false 424 425 Loop: 426 for { 427 r, size := utf8.DecodeRuneInString(p.s[i:]) 428 429 switch { 430 case size == 0: 431 return "", errors.New("mail: unclosed quoted-string") 432 433 case size == 1 && r == utf8.RuneError: 434 return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s) 435 436 case escaped: 437 // quoted-pair = ("\" (VCHAR / WSP)) 438 439 if !isVchar(r) && !isWSP(r) { 440 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 441 } 442 443 qsb = append(qsb, r) 444 escaped = false 445 446 case isQtext(r) || isWSP(r): 447 // qtext (printable US-ASCII excluding " and \), or 448 // FWS (almost; we're ignoring CRLF) 449 qsb = append(qsb, r) 450 451 case r == '"': 452 break Loop 453 454 case r == '\\': 455 escaped = true 456 457 default: 458 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) 459 460 } 461 462 i += size 463 } 464 p.s = p.s[i+1:] 465 if len(qsb) == 0 { 466 return "", errors.New("mail: empty quoted-string") 467 } 468 return string(qsb), nil 469 } 470 471 // consumeAtom parses an RFC 5322 atom at the start of p. 472 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 473 // If permissive is true, consumeAtom will not fail on 474 // leading/trailing/double dots in the atom (see golang.org/issue/4938). 475 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { 476 i := 0 477 478 Loop: 479 for { 480 r, size := utf8.DecodeRuneInString(p.s[i:]) 481 482 switch { 483 case size == 1 && r == utf8.RuneError: 484 return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s) 485 486 case size == 0 || !isAtext(r, dot): 487 break Loop 488 489 default: 490 i += size 491 492 } 493 } 494 495 if i == 0 { 496 return "", errors.New("mail: invalid string") 497 } 498 atom, p.s = p.s[:i], p.s[i:] 499 if !permissive { 500 if strings.HasPrefix(atom, ".") { 501 return "", errors.New("mail: leading dot in atom") 502 } 503 if strings.Contains(atom, "..") { 504 return "", errors.New("mail: double dot in atom") 505 } 506 if strings.HasSuffix(atom, ".") { 507 return "", errors.New("mail: trailing dot in atom") 508 } 509 } 510 return atom, nil 511 } 512 513 func (p *addrParser) consume(c byte) bool { 514 if p.empty() || p.peek() != c { 515 return false 516 } 517 p.s = p.s[1:] 518 return true 519 } 520 521 // skipSpace skips the leading space and tab characters. 522 func (p *addrParser) skipSpace() { 523 p.s = strings.TrimLeft(p.s, " \t") 524 } 525 526 func (p *addrParser) peek() byte { 527 return p.s[0] 528 } 529 530 func (p *addrParser) empty() bool { 531 return p.len() == 0 532 } 533 534 func (p *addrParser) len() int { 535 return len(p.s) 536 } 537 538 func (p *addrParser) decodeRFC2047Word(s string) (string, error) { 539 if p.dec != nil { 540 return p.dec.DecodeHeader(s) 541 } 542 543 dec, err := rfc2047Decoder.Decode(s) 544 if err == nil { 545 return dec, nil 546 } 547 548 if _, ok := err.(charsetError); ok { 549 return s, err 550 } 551 552 // Ignore invalid RFC 2047 encoded-word errors. 553 return s, nil 554 } 555 556 var rfc2047Decoder = mime.WordDecoder{ 557 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { 558 return nil, charsetError(charset) 559 }, 560 } 561 562 type charsetError string 563 564 func (e charsetError) Error() string { 565 return fmt.Sprintf("charset not supported: %q", string(e)) 566 } 567 568 // isAtext reports whether r is an RFC 5322 atext character. 569 // If dot is true, period is included. 570 func isAtext(r rune, dot bool) bool { 571 switch r { 572 case '.': 573 return dot 574 575 case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials 576 return false 577 } 578 return isVchar(r) 579 } 580 581 // isQtext reports whether r is an RFC 5322 qtext character. 582 func isQtext(r rune) bool { 583 // Printable US-ASCII, excluding backslash or quote. 584 if r == '\\' || r == '"' { 585 return false 586 } 587 return isVchar(r) 588 } 589 590 // quoteString renders a string as an RFC 5322 quoted-string. 591 func quoteString(s string) string { 592 var buf bytes.Buffer 593 buf.WriteByte('"') 594 for _, r := range s { 595 if isQtext(r) || isWSP(r) { 596 buf.WriteRune(r) 597 } else if isVchar(r) { 598 buf.WriteByte('\\') 599 buf.WriteRune(r) 600 } 601 } 602 buf.WriteByte('"') 603 return buf.String() 604 } 605 606 // isVchar reports whether r is an RFC 5322 VCHAR character. 607 func isVchar(r rune) bool { 608 // Visible (printing) characters. 609 return '!' <= r && r <= '~' || isMultibyte(r) 610 } 611 612 // isMultibyte reports whether r is a multi-byte UTF-8 character 613 // as supported by RFC 6532 614 func isMultibyte(r rune) bool { 615 return r >= utf8.RuneSelf 616 } 617 618 // isWSP reports whether r is a WSP (white space). 619 // WSP is a space or horizontal tab (RFC 5234 Appendix B). 620 func isWSP(r rune) bool { 621 return r == ' ' || r == '\t' 622 }