github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322. 9 Notable divergences: 10 * Obsolete address formats are not parsed, including addresses with 11 embedded route information. 12 * Group addresses are not parsed. 13 * The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 */ 16 package mail 17 18 import ( 19 "bufio" 20 "bytes" 21 "errors" 22 "fmt" 23 "io" 24 "log" 25 "mime" 26 "net/textproto" 27 "strings" 28 "time" 29 ) 30 31 var debug = debugT(false) 32 33 type debugT bool 34 35 func (d debugT) Printf(format string, args ...interface{}) { 36 if d { 37 log.Printf(format, args...) 38 } 39 } 40 41 // A Message represents a parsed mail message. 42 type Message struct { 43 Header Header 44 Body io.Reader 45 } 46 47 // ReadMessage reads a message from r. 48 // The headers are parsed, and the body of the message will be available 49 // for reading from r. 50 func ReadMessage(r io.Reader) (msg *Message, err error) { 51 tp := textproto.NewReader(bufio.NewReader(r)) 52 53 hdr, err := tp.ReadMIMEHeader() 54 if err != nil { 55 return nil, err 56 } 57 58 return &Message{ 59 Header: Header(hdr), 60 Body: tp.R, 61 }, nil 62 } 63 64 // Layouts suitable for passing to time.Parse. 65 // These are tried in order. 66 var dateLayouts []string 67 68 func init() { 69 // Generate layouts based on RFC 5322, section 3.3. 70 71 dows := [...]string{"", "Mon, "} // day-of-week 72 days := [...]string{"2", "02"} // day = 1*2DIGIT 73 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 74 seconds := [...]string{":05", ""} // second 75 // "-0700 (MST)" is not in RFC 5322, but is common. 76 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 77 78 for _, dow := range dows { 79 for _, day := range days { 80 for _, year := range years { 81 for _, second := range seconds { 82 for _, zone := range zones { 83 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 84 dateLayouts = append(dateLayouts, s) 85 } 86 } 87 } 88 } 89 } 90 } 91 92 func parseDate(date string) (time.Time, error) { 93 for _, layout := range dateLayouts { 94 t, err := time.Parse(layout, date) 95 if err == nil { 96 return t, nil 97 } 98 } 99 return time.Time{}, errors.New("mail: header could not be parsed") 100 } 101 102 // A Header represents the key-value pairs in a mail message header. 103 type Header map[string][]string 104 105 // Get gets the first value associated with the given key. 106 // If there are no values associated with the key, Get returns "". 107 func (h Header) Get(key string) string { 108 return textproto.MIMEHeader(h).Get(key) 109 } 110 111 var ErrHeaderNotPresent = errors.New("mail: header not in message") 112 113 // Date parses the Date header field. 114 func (h Header) Date() (time.Time, error) { 115 hdr := h.Get("Date") 116 if hdr == "" { 117 return time.Time{}, ErrHeaderNotPresent 118 } 119 return parseDate(hdr) 120 } 121 122 // AddressList parses the named header field as a list of addresses. 123 func (h Header) AddressList(key string) ([]*Address, error) { 124 hdr := h.Get(key) 125 if hdr == "" { 126 return nil, ErrHeaderNotPresent 127 } 128 return ParseAddressList(hdr) 129 } 130 131 // Address represents a single mail address. 132 // An address such as "Barry Gibbs <bg@example.com>" is represented 133 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 134 type Address struct { 135 Name string // Proper name; may be empty. 136 Address string // user@domain 137 } 138 139 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 140 func ParseAddress(address string) (*Address, error) { 141 return (&addrParser{s: address}).parseSingleAddress() 142 } 143 144 // ParseAddressList parses the given string as a list of addresses. 145 func ParseAddressList(list string) ([]*Address, error) { 146 return (&addrParser{s: list}).parseAddressList() 147 } 148 149 // An AddressParser is an RFC 5322 address parser. 150 type AddressParser struct { 151 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. 152 WordDecoder *mime.WordDecoder 153 } 154 155 // Parse parses a single RFC 5322 address of the 156 // form "Gogh Fir <gf@example.com>" or "foo@example.com". 157 func (p *AddressParser) Parse(address string) (*Address, error) { 158 return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() 159 } 160 161 // ParseList parses the given string as a list of comma-separated addresses 162 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com". 163 func (p *AddressParser) ParseList(list string) ([]*Address, error) { 164 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() 165 } 166 167 // String formats the address as a valid RFC 5322 address. 168 // If the address's name contains non-ASCII characters 169 // the name will be rendered according to RFC 2047. 170 func (a *Address) String() string { 171 // Format address local@domain 172 at := strings.LastIndex(a.Address, "@") 173 var local, domain string 174 if at < 0 { 175 // This is a malformed address ("@" is required in addr-spec); 176 // treat the whole address as local-part. 177 local = a.Address 178 } else { 179 local, domain = a.Address[:at], a.Address[at+1:] 180 } 181 182 // Add quotes if needed 183 // TODO: rendering quoted local part and rendering printable name 184 // should be merged in helper function. 185 quoteLocal := false 186 for i := 0; i < len(local); i++ { 187 ch := local[i] 188 if isAtext(ch, false) { 189 continue 190 } 191 if ch == '.' { 192 // Dots are okay if they are surrounded by atext. 193 // We only need to check that the previous byte is 194 // not a dot, and this isn't the end of the string. 195 if i > 0 && local[i-1] != '.' && i < len(local)-1 { 196 continue 197 } 198 } 199 quoteLocal = true 200 break 201 } 202 if quoteLocal { 203 local = quoteString(local) 204 205 } 206 207 s := "<" + local + "@" + domain + ">" 208 209 if a.Name == "" { 210 return s 211 } 212 213 // If every character is printable ASCII, quoting is simple. 214 allPrintable := true 215 for i := 0; i < len(a.Name); i++ { 216 // isWSP here should actually be isFWS, 217 // but we don't support folding yet. 218 if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) { 219 allPrintable = false 220 break 221 } 222 } 223 if allPrintable { 224 b := bytes.NewBufferString(`"`) 225 for i := 0; i < len(a.Name); i++ { 226 if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) { 227 b.WriteByte('\\') 228 } 229 b.WriteByte(a.Name[i]) 230 } 231 b.WriteString(`" `) 232 b.WriteString(s) 233 return b.String() 234 } 235 236 // Text in an encoded-word in a display-name must not contain certain 237 // characters like quotes or parentheses (see RFC 2047 section 5.3). 238 // When this is the case encode the name using base64 encoding. 239 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { 240 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s 241 } 242 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s 243 } 244 245 type addrParser struct { 246 s string 247 dec *mime.WordDecoder // may be nil 248 } 249 250 func (p *addrParser) parseAddressList() ([]*Address, error) { 251 var list []*Address 252 for { 253 p.skipSpace() 254 addr, err := p.parseAddress() 255 if err != nil { 256 return nil, err 257 } 258 list = append(list, addr) 259 260 p.skipSpace() 261 if p.empty() { 262 break 263 } 264 if !p.consume(',') { 265 return nil, errors.New("mail: expected comma") 266 } 267 } 268 return list, nil 269 } 270 271 func (p *addrParser) parseSingleAddress() (*Address, error) { 272 addr, err := p.parseAddress() 273 if err != nil { 274 return nil, err 275 } 276 p.skipSpace() 277 if !p.empty() { 278 return nil, fmt.Errorf("mail: expected single address, got %q", p.s) 279 } 280 return addr, nil 281 } 282 283 // parseAddress parses a single RFC 5322 address at the start of p. 284 func (p *addrParser) parseAddress() (addr *Address, err error) { 285 debug.Printf("parseAddress: %q", p.s) 286 p.skipSpace() 287 if p.empty() { 288 return nil, errors.New("mail: no address") 289 } 290 291 // address = name-addr / addr-spec 292 // TODO(dsymonds): Support parsing group address. 293 294 // addr-spec has a more restricted grammar than name-addr, 295 // so try parsing it first, and fallback to name-addr. 296 // TODO(dsymonds): Is this really correct? 297 spec, err := p.consumeAddrSpec() 298 if err == nil { 299 return &Address{ 300 Address: spec, 301 }, err 302 } 303 debug.Printf("parseAddress: not an addr-spec: %v", err) 304 debug.Printf("parseAddress: state is now %q", p.s) 305 306 // display-name 307 var displayName string 308 if p.peek() != '<' { 309 displayName, err = p.consumePhrase() 310 if err != nil { 311 return nil, err 312 } 313 } 314 debug.Printf("parseAddress: displayName=%q", displayName) 315 316 // angle-addr = "<" addr-spec ">" 317 p.skipSpace() 318 if !p.consume('<') { 319 return nil, errors.New("mail: no angle-addr") 320 } 321 spec, err = p.consumeAddrSpec() 322 if err != nil { 323 return nil, err 324 } 325 if !p.consume('>') { 326 return nil, errors.New("mail: unclosed angle-addr") 327 } 328 debug.Printf("parseAddress: spec=%q", spec) 329 330 return &Address{ 331 Name: displayName, 332 Address: spec, 333 }, nil 334 } 335 336 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 337 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 338 debug.Printf("consumeAddrSpec: %q", p.s) 339 340 orig := *p 341 defer func() { 342 if err != nil { 343 *p = orig 344 } 345 }() 346 347 // local-part = dot-atom / quoted-string 348 var localPart string 349 p.skipSpace() 350 if p.empty() { 351 return "", errors.New("mail: no addr-spec") 352 } 353 if p.peek() == '"' { 354 // quoted-string 355 debug.Printf("consumeAddrSpec: parsing quoted-string") 356 localPart, err = p.consumeQuotedString() 357 } else { 358 // dot-atom 359 debug.Printf("consumeAddrSpec: parsing dot-atom") 360 localPart, err = p.consumeAtom(true, false) 361 } 362 if err != nil { 363 debug.Printf("consumeAddrSpec: failed: %v", err) 364 return "", err 365 } 366 367 if !p.consume('@') { 368 return "", errors.New("mail: missing @ in addr-spec") 369 } 370 371 // domain = dot-atom / domain-literal 372 var domain string 373 p.skipSpace() 374 if p.empty() { 375 return "", errors.New("mail: no domain in addr-spec") 376 } 377 // TODO(dsymonds): Handle domain-literal 378 domain, err = p.consumeAtom(true, false) 379 if err != nil { 380 return "", err 381 } 382 383 return localPart + "@" + domain, nil 384 } 385 386 // consumePhrase parses the RFC 5322 phrase at the start of p. 387 func (p *addrParser) consumePhrase() (phrase string, err error) { 388 debug.Printf("consumePhrase: [%s]", p.s) 389 // phrase = 1*word 390 var words []string 391 for { 392 // word = atom / quoted-string 393 var word string 394 p.skipSpace() 395 if p.empty() { 396 return "", errors.New("mail: missing phrase") 397 } 398 if p.peek() == '"' { 399 // quoted-string 400 word, err = p.consumeQuotedString() 401 } else { 402 // atom 403 // We actually parse dot-atom here to be more permissive 404 // than what RFC 5322 specifies. 405 word, err = p.consumeAtom(true, true) 406 if err == nil { 407 word, err = p.decodeRFC2047Word(word) 408 } 409 } 410 411 if err != nil { 412 break 413 } 414 debug.Printf("consumePhrase: consumed %q", word) 415 words = append(words, word) 416 } 417 // Ignore any error if we got at least one word. 418 if err != nil && len(words) == 0 { 419 debug.Printf("consumePhrase: hit err: %v", err) 420 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 421 } 422 phrase = strings.Join(words, " ") 423 return phrase, nil 424 } 425 426 // consumeQuotedString parses the quoted string at the start of p. 427 func (p *addrParser) consumeQuotedString() (qs string, err error) { 428 // Assume first byte is '"'. 429 i := 1 430 qsb := make([]byte, 0, 10) 431 Loop: 432 for { 433 if i >= p.len() { 434 return "", errors.New("mail: unclosed quoted-string") 435 } 436 switch c := p.s[i]; { 437 case c == '"': 438 break Loop 439 case c == '\\': 440 if i+1 == p.len() { 441 return "", errors.New("mail: unclosed quoted-string") 442 } 443 qsb = append(qsb, p.s[i+1]) 444 i += 2 445 case isQtext(c), c == ' ': 446 // qtext (printable US-ASCII excluding " and \), or 447 // FWS (almost; we're ignoring CRLF) 448 qsb = append(qsb, c) 449 i++ 450 default: 451 return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) 452 } 453 } 454 p.s = p.s[i+1:] 455 if len(qsb) == 0 { 456 return "", errors.New("mail: empty quoted-string") 457 } 458 return string(qsb), nil 459 } 460 461 var errNonASCII = errors.New("mail: unencoded non-ASCII text in address") 462 463 // consumeAtom parses an RFC 5322 atom at the start of p. 464 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 465 // If permissive is true, consumeAtom will not fail on 466 // leading/trailing/double dots in the atom (see golang.org/issue/4938). 467 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { 468 if c := p.peek(); !isAtext(c, false) { 469 if c > 127 { 470 return "", errNonASCII 471 } 472 return "", errors.New("mail: invalid string") 473 } 474 i := 1 475 for ; i < p.len() && isAtext(p.s[i], dot); i++ { 476 } 477 if i < p.len() && p.s[i] > 127 { 478 return "", errNonASCII 479 } 480 atom, p.s = string(p.s[:i]), p.s[i:] 481 if !permissive { 482 if strings.HasPrefix(atom, ".") { 483 return "", errors.New("mail: leading dot in atom") 484 } 485 if strings.Contains(atom, "..") { 486 return "", errors.New("mail: double dot in atom") 487 } 488 if strings.HasSuffix(atom, ".") { 489 return "", errors.New("mail: trailing dot in atom") 490 } 491 } 492 return atom, nil 493 } 494 495 func (p *addrParser) consume(c byte) bool { 496 if p.empty() || p.peek() != c { 497 return false 498 } 499 p.s = p.s[1:] 500 return true 501 } 502 503 // skipSpace skips the leading space and tab characters. 504 func (p *addrParser) skipSpace() { 505 p.s = strings.TrimLeft(p.s, " \t") 506 } 507 508 func (p *addrParser) peek() byte { 509 return p.s[0] 510 } 511 512 func (p *addrParser) empty() bool { 513 return p.len() == 0 514 } 515 516 func (p *addrParser) len() int { 517 return len(p.s) 518 } 519 520 func (p *addrParser) decodeRFC2047Word(s string) (string, error) { 521 if p.dec != nil { 522 return p.dec.DecodeHeader(s) 523 } 524 525 dec, err := rfc2047Decoder.Decode(s) 526 if err == nil { 527 return dec, nil 528 } 529 530 if _, ok := err.(charsetError); ok { 531 return s, err 532 } 533 534 // Ignore invalid RFC 2047 encoded-word errors. 535 return s, nil 536 } 537 538 var rfc2047Decoder = mime.WordDecoder{ 539 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { 540 return nil, charsetError(charset) 541 }, 542 } 543 544 type charsetError string 545 546 func (e charsetError) Error() string { 547 return fmt.Sprintf("charset not supported: %q", string(e)) 548 } 549 550 var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 551 "abcdefghijklmnopqrstuvwxyz" + 552 "0123456789" + 553 "!#$%&'*+-/=?^_`{|}~") 554 555 // isAtext reports whether c is an RFC 5322 atext character. 556 // If dot is true, period is included. 557 func isAtext(c byte, dot bool) bool { 558 if dot && c == '.' { 559 return true 560 } 561 return bytes.IndexByte(atextChars, c) >= 0 562 } 563 564 // isQtext reports whether c is an RFC 5322 qtext character. 565 func isQtext(c byte) bool { 566 // Printable US-ASCII, excluding backslash or quote. 567 if c == '\\' || c == '"' { 568 return false 569 } 570 return '!' <= c && c <= '~' 571 } 572 573 // quoteString renders a string as a RFC5322 quoted-string. 574 func quoteString(s string) string { 575 var buf bytes.Buffer 576 buf.WriteByte('"') 577 for _, c := range s { 578 ch := byte(c) 579 if isQtext(ch) || isWSP(ch) { 580 buf.WriteByte(ch) 581 } else if isVchar(ch) { 582 buf.WriteByte('\\') 583 buf.WriteByte(ch) 584 } 585 } 586 buf.WriteByte('"') 587 return buf.String() 588 } 589 590 // isVchar reports whether c is an RFC 5322 VCHAR character. 591 func isVchar(c byte) bool { 592 // Visible (printing) characters. 593 return '!' <= c && c <= '~' 594 } 595 596 // isWSP reports whether c is a WSP (white space). 597 // WSP is a space or horizontal tab (RFC5234 Appendix B). 598 func isWSP(c byte) bool { 599 return c == ' ' || c == '\t' 600 }