github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322. 9 Notable divergences: 10 * Obsolete address formats are not parsed, including addresses with 11 embedded route information. 12 * Group addresses are not parsed. 13 * The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 */ 16 package mail 17 18 import ( 19 "bufio" 20 "bytes" 21 "errors" 22 "fmt" 23 "io" 24 "log" 25 "mime" 26 "net/textproto" 27 "strings" 28 "time" 29 ) 30 31 var debug = debugT(false) 32 33 type debugT bool 34 35 func (d debugT) Printf(format string, args ...interface{}) { 36 if d { 37 log.Printf(format, args...) 38 } 39 } 40 41 // A Message represents a parsed mail message. 42 type Message struct { 43 Header Header 44 Body io.Reader 45 } 46 47 // ReadMessage reads a message from r. 48 // The headers are parsed, and the body of the message will be available 49 // for reading from r. 50 func ReadMessage(r io.Reader) (msg *Message, err error) { 51 tp := textproto.NewReader(bufio.NewReader(r)) 52 53 hdr, err := tp.ReadMIMEHeader() 54 if err != nil { 55 return nil, err 56 } 57 58 return &Message{ 59 Header: Header(hdr), 60 Body: tp.R, 61 }, nil 62 } 63 64 // Layouts suitable for passing to time.Parse. 65 // These are tried in order. 66 var dateLayouts []string 67 68 func init() { 69 // Generate layouts based on RFC 5322, section 3.3. 70 71 dows := [...]string{"", "Mon, "} // day-of-week 72 days := [...]string{"2", "02"} // day = 1*2DIGIT 73 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 74 seconds := [...]string{":05", ""} // second 75 // "-0700 (MST)" is not in RFC 5322, but is common. 76 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 77 78 for _, dow := range dows { 79 for _, day := range days { 80 for _, year := range years { 81 for _, second := range seconds { 82 for _, zone := range zones { 83 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 84 dateLayouts = append(dateLayouts, s) 85 } 86 } 87 } 88 } 89 } 90 } 91 92 func parseDate(date string) (time.Time, error) { 93 for _, layout := range dateLayouts { 94 t, err := time.Parse(layout, date) 95 if err == nil { 96 return t, nil 97 } 98 } 99 return time.Time{}, errors.New("mail: header could not be parsed") 100 } 101 102 // A Header represents the key-value pairs in a mail message header. 103 type Header map[string][]string 104 105 // Get gets the first value associated with the given key. 106 // If there are no values associated with the key, Get returns "". 107 func (h Header) Get(key string) string { 108 return textproto.MIMEHeader(h).Get(key) 109 } 110 111 var ErrHeaderNotPresent = errors.New("mail: header not in message") 112 113 // Date parses the Date header field. 114 func (h Header) Date() (time.Time, error) { 115 hdr := h.Get("Date") 116 if hdr == "" { 117 return time.Time{}, ErrHeaderNotPresent 118 } 119 return parseDate(hdr) 120 } 121 122 // AddressList parses the named header field as a list of addresses. 123 func (h Header) AddressList(key string) ([]*Address, error) { 124 hdr := h.Get(key) 125 if hdr == "" { 126 return nil, ErrHeaderNotPresent 127 } 128 return ParseAddressList(hdr) 129 } 130 131 // Address represents a single mail address. 132 // An address such as "Barry Gibbs <bg@example.com>" is represented 133 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 134 type Address struct { 135 Name string // Proper name; may be empty. 136 Address string // user@domain 137 } 138 139 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 140 func ParseAddress(address string) (*Address, error) { 141 return (&addrParser{s: address}).parseAddress() 142 } 143 144 // ParseAddressList parses the given string as a list of addresses. 145 func ParseAddressList(list string) ([]*Address, error) { 146 return (&addrParser{s: list}).parseAddressList() 147 } 148 149 // An AddressParser is an RFC 5322 address parser. 150 type AddressParser struct { 151 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. 152 WordDecoder *mime.WordDecoder 153 } 154 155 // Parse parses a single RFC 5322 address of the 156 // form "Gogh Fir <gf@example.com>" or "foo@example.com". 157 func (p *AddressParser) Parse(address string) (*Address, error) { 158 return (&addrParser{s: address, dec: p.WordDecoder}).parseAddress() 159 } 160 161 // ParseList parses the given string as a list of comma-separated addresses 162 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com". 163 func (p *AddressParser) ParseList(list string) ([]*Address, error) { 164 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() 165 } 166 167 // String formats the address as a valid RFC 5322 address. 168 // If the address's name contains non-ASCII characters 169 // the name will be rendered according to RFC 2047. 170 func (a *Address) String() string { 171 172 // Format address local@domain 173 at := strings.LastIndex(a.Address, "@") 174 var local, domain string 175 if at < 0 { 176 // This is a malformed address ("@" is required in addr-spec); 177 // treat the whole address as local-part. 178 local = a.Address 179 } else { 180 local, domain = a.Address[:at], a.Address[at+1:] 181 } 182 183 // Add quotes if needed 184 // TODO: rendering quoted local part and rendering printable name 185 // should be merged in helper function. 186 quoteLocal := false 187 for i := 0; i < len(local); i++ { 188 ch := local[i] 189 if isAtext(ch, false) { 190 continue 191 } 192 if ch == '.' { 193 // Dots are okay if they are surrounded by atext. 194 // We only need to check that the previous byte is 195 // not a dot, and this isn't the end of the string. 196 if i > 0 && local[i-1] != '.' && i < len(local)-1 { 197 continue 198 } 199 } 200 quoteLocal = true 201 break 202 } 203 if quoteLocal { 204 local = quoteString(local) 205 206 } 207 208 s := "<" + local + "@" + domain + ">" 209 210 if a.Name == "" { 211 return s 212 } 213 214 // If every character is printable ASCII, quoting is simple. 215 allPrintable := true 216 for i := 0; i < len(a.Name); i++ { 217 // isWSP here should actually be isFWS, 218 // but we don't support folding yet. 219 if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) { 220 allPrintable = false 221 break 222 } 223 } 224 if allPrintable { 225 b := bytes.NewBufferString(`"`) 226 for i := 0; i < len(a.Name); i++ { 227 if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) { 228 b.WriteByte('\\') 229 } 230 b.WriteByte(a.Name[i]) 231 } 232 b.WriteString(`" `) 233 b.WriteString(s) 234 return b.String() 235 } 236 237 // Text in an encoded-word in a display-name must not contain certain 238 // characters like quotes or parentheses (see RFC 2047 section 5.3). 239 // When this is the case encode the name using base64 encoding. 240 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { 241 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s 242 } 243 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s 244 } 245 246 type addrParser struct { 247 s string 248 dec *mime.WordDecoder // may be nil 249 } 250 251 func (p *addrParser) parseAddressList() ([]*Address, error) { 252 var list []*Address 253 for { 254 p.skipSpace() 255 addr, err := p.parseAddress() 256 if err != nil { 257 return nil, err 258 } 259 list = append(list, addr) 260 261 p.skipSpace() 262 if p.empty() { 263 break 264 } 265 if !p.consume(',') { 266 return nil, errors.New("mail: expected comma") 267 } 268 } 269 return list, nil 270 } 271 272 // parseAddress parses a single RFC 5322 address at the start of p. 273 func (p *addrParser) parseAddress() (addr *Address, err error) { 274 debug.Printf("parseAddress: %q", p.s) 275 p.skipSpace() 276 if p.empty() { 277 return nil, errors.New("mail: no address") 278 } 279 280 // address = name-addr / addr-spec 281 // TODO(dsymonds): Support parsing group address. 282 283 // addr-spec has a more restricted grammar than name-addr, 284 // so try parsing it first, and fallback to name-addr. 285 // TODO(dsymonds): Is this really correct? 286 spec, err := p.consumeAddrSpec() 287 if err == nil { 288 return &Address{ 289 Address: spec, 290 }, err 291 } 292 debug.Printf("parseAddress: not an addr-spec: %v", err) 293 debug.Printf("parseAddress: state is now %q", p.s) 294 295 // display-name 296 var displayName string 297 if p.peek() != '<' { 298 displayName, err = p.consumePhrase() 299 if err != nil { 300 return nil, err 301 } 302 } 303 debug.Printf("parseAddress: displayName=%q", displayName) 304 305 // angle-addr = "<" addr-spec ">" 306 p.skipSpace() 307 if !p.consume('<') { 308 return nil, errors.New("mail: no angle-addr") 309 } 310 spec, err = p.consumeAddrSpec() 311 if err != nil { 312 return nil, err 313 } 314 if !p.consume('>') { 315 return nil, errors.New("mail: unclosed angle-addr") 316 } 317 debug.Printf("parseAddress: spec=%q", spec) 318 319 return &Address{ 320 Name: displayName, 321 Address: spec, 322 }, nil 323 } 324 325 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 326 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 327 debug.Printf("consumeAddrSpec: %q", p.s) 328 329 orig := *p 330 defer func() { 331 if err != nil { 332 *p = orig 333 } 334 }() 335 336 // local-part = dot-atom / quoted-string 337 var localPart string 338 p.skipSpace() 339 if p.empty() { 340 return "", errors.New("mail: no addr-spec") 341 } 342 if p.peek() == '"' { 343 // quoted-string 344 debug.Printf("consumeAddrSpec: parsing quoted-string") 345 localPart, err = p.consumeQuotedString() 346 } else { 347 // dot-atom 348 debug.Printf("consumeAddrSpec: parsing dot-atom") 349 localPart, err = p.consumeAtom(true, false) 350 } 351 if err != nil { 352 debug.Printf("consumeAddrSpec: failed: %v", err) 353 return "", err 354 } 355 356 if !p.consume('@') { 357 return "", errors.New("mail: missing @ in addr-spec") 358 } 359 360 // domain = dot-atom / domain-literal 361 var domain string 362 p.skipSpace() 363 if p.empty() { 364 return "", errors.New("mail: no domain in addr-spec") 365 } 366 // TODO(dsymonds): Handle domain-literal 367 domain, err = p.consumeAtom(true, false) 368 if err != nil { 369 return "", err 370 } 371 372 return localPart + "@" + domain, nil 373 } 374 375 // consumePhrase parses the RFC 5322 phrase at the start of p. 376 func (p *addrParser) consumePhrase() (phrase string, err error) { 377 debug.Printf("consumePhrase: [%s]", p.s) 378 // phrase = 1*word 379 var words []string 380 for { 381 // word = atom / quoted-string 382 var word string 383 p.skipSpace() 384 if p.empty() { 385 return "", errors.New("mail: missing phrase") 386 } 387 if p.peek() == '"' { 388 // quoted-string 389 word, err = p.consumeQuotedString() 390 } else { 391 // atom 392 // We actually parse dot-atom here to be more permissive 393 // than what RFC 5322 specifies. 394 word, err = p.consumeAtom(true, true) 395 if err == nil { 396 word, err = p.decodeRFC2047Word(word) 397 } 398 } 399 400 if err != nil { 401 break 402 } 403 debug.Printf("consumePhrase: consumed %q", word) 404 words = append(words, word) 405 } 406 // Ignore any error if we got at least one word. 407 if err != nil && len(words) == 0 { 408 debug.Printf("consumePhrase: hit err: %v", err) 409 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 410 } 411 phrase = strings.Join(words, " ") 412 return phrase, nil 413 } 414 415 // consumeQuotedString parses the quoted string at the start of p. 416 func (p *addrParser) consumeQuotedString() (qs string, err error) { 417 // Assume first byte is '"'. 418 i := 1 419 qsb := make([]byte, 0, 10) 420 Loop: 421 for { 422 if i >= p.len() { 423 return "", errors.New("mail: unclosed quoted-string") 424 } 425 switch c := p.s[i]; { 426 case c == '"': 427 break Loop 428 case c == '\\': 429 if i+1 == p.len() { 430 return "", errors.New("mail: unclosed quoted-string") 431 } 432 qsb = append(qsb, p.s[i+1]) 433 i += 2 434 case isQtext(c), c == ' ': 435 // qtext (printable US-ASCII excluding " and \), or 436 // FWS (almost; we're ignoring CRLF) 437 qsb = append(qsb, c) 438 i++ 439 default: 440 return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) 441 } 442 } 443 p.s = p.s[i+1:] 444 if len(qsb) == 0 { 445 return "", errors.New("mail: empty quoted-string") 446 } 447 return string(qsb), nil 448 } 449 450 var errNonASCII = errors.New("mail: unencoded non-ASCII text in address") 451 452 // consumeAtom parses an RFC 5322 atom at the start of p. 453 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 454 // If permissive is true, consumeAtom will not fail on 455 // leading/trailing/double dots in the atom (see golang.org/issue/4938). 456 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { 457 if c := p.peek(); !isAtext(c, false) { 458 if c > 127 { 459 return "", errNonASCII 460 } 461 return "", errors.New("mail: invalid string") 462 } 463 i := 1 464 for ; i < p.len() && isAtext(p.s[i], dot); i++ { 465 } 466 if i < p.len() && p.s[i] > 127 { 467 return "", errNonASCII 468 } 469 atom, p.s = string(p.s[:i]), p.s[i:] 470 if !permissive { 471 if strings.HasPrefix(atom, ".") { 472 return "", errors.New("mail: leading dot in atom") 473 } 474 if strings.Contains(atom, "..") { 475 return "", errors.New("mail: double dot in atom") 476 } 477 if strings.HasSuffix(atom, ".") { 478 return "", errors.New("mail: trailing dot in atom") 479 } 480 } 481 return atom, nil 482 } 483 484 func (p *addrParser) consume(c byte) bool { 485 if p.empty() || p.peek() != c { 486 return false 487 } 488 p.s = p.s[1:] 489 return true 490 } 491 492 // skipSpace skips the leading space and tab characters. 493 func (p *addrParser) skipSpace() { 494 p.s = strings.TrimLeft(p.s, " \t") 495 } 496 497 func (p *addrParser) peek() byte { 498 return p.s[0] 499 } 500 501 func (p *addrParser) empty() bool { 502 return p.len() == 0 503 } 504 505 func (p *addrParser) len() int { 506 return len(p.s) 507 } 508 509 func (p *addrParser) decodeRFC2047Word(s string) (string, error) { 510 if p.dec != nil { 511 return p.dec.DecodeHeader(s) 512 } 513 514 dec, err := rfc2047Decoder.Decode(s) 515 if err == nil { 516 return dec, nil 517 } 518 519 if _, ok := err.(charsetError); ok { 520 return s, err 521 } 522 523 // Ignore invalid RFC 2047 encoded-word errors. 524 return s, nil 525 } 526 527 var rfc2047Decoder = mime.WordDecoder{ 528 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { 529 return nil, charsetError(charset) 530 }, 531 } 532 533 type charsetError string 534 535 func (e charsetError) Error() string { 536 return fmt.Sprintf("charset not supported: %q", string(e)) 537 } 538 539 var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 540 "abcdefghijklmnopqrstuvwxyz" + 541 "0123456789" + 542 "!#$%&'*+-/=?^_`{|}~") 543 544 // isAtext reports whether c is an RFC 5322 atext character. 545 // If dot is true, period is included. 546 func isAtext(c byte, dot bool) bool { 547 if dot && c == '.' { 548 return true 549 } 550 return bytes.IndexByte(atextChars, c) >= 0 551 } 552 553 // isQtext reports whether c is an RFC 5322 qtext character. 554 func isQtext(c byte) bool { 555 // Printable US-ASCII, excluding backslash or quote. 556 if c == '\\' || c == '"' { 557 return false 558 } 559 return '!' <= c && c <= '~' 560 } 561 562 // quoteString renders a string as a RFC5322 quoted-string. 563 func quoteString(s string) string { 564 var buf bytes.Buffer 565 buf.WriteByte('"') 566 for _, c := range s { 567 ch := byte(c) 568 if isQtext(ch) || isWSP(ch) { 569 buf.WriteByte(ch) 570 } else if isVchar(ch) { 571 buf.WriteByte('\\') 572 buf.WriteByte(ch) 573 } 574 } 575 buf.WriteByte('"') 576 return buf.String() 577 } 578 579 // isVchar reports whether c is an RFC 5322 VCHAR character. 580 func isVchar(c byte) bool { 581 // Visible (printing) characters. 582 return '!' <= c && c <= '~' 583 } 584 585 // isWSP reports whether c is a WSP (white space). 586 // WSP is a space or horizontal tab (RFC5234 Appendix B). 587 func isWSP(c byte) bool { 588 return c == ' ' || c == '\t' 589 }