github.com/roboticscm/goman@v0.0.0-20210203095141-87c07b4a0a55/src/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322. 9 Notable divergences: 10 * Obsolete address formats are not parsed, including addresses with 11 embedded route information. 12 * Group addresses are not parsed. 13 * The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 */ 16 package mail 17 18 import ( 19 "bufio" 20 "bytes" 21 "encoding/base64" 22 "errors" 23 "fmt" 24 "io" 25 "io/ioutil" 26 "log" 27 "net/textproto" 28 "strconv" 29 "strings" 30 "time" 31 "unicode" 32 ) 33 34 var debug = debugT(false) 35 36 type debugT bool 37 38 func (d debugT) Printf(format string, args ...interface{}) { 39 if d { 40 log.Printf(format, args...) 41 } 42 } 43 44 // A Message represents a parsed mail message. 45 type Message struct { 46 Header Header 47 Body io.Reader 48 } 49 50 // ReadMessage reads a message from r. 51 // The headers are parsed, and the body of the message will be available 52 // for reading from r. 53 func ReadMessage(r io.Reader) (msg *Message, err error) { 54 tp := textproto.NewReader(bufio.NewReader(r)) 55 56 hdr, err := tp.ReadMIMEHeader() 57 if err != nil { 58 return nil, err 59 } 60 61 return &Message{ 62 Header: Header(hdr), 63 Body: tp.R, 64 }, nil 65 } 66 67 // Layouts suitable for passing to time.Parse. 68 // These are tried in order. 69 var dateLayouts []string 70 71 func init() { 72 // Generate layouts based on RFC 5322, section 3.3. 73 74 dows := [...]string{"", "Mon, "} // day-of-week 75 days := [...]string{"2", "02"} // day = 1*2DIGIT 76 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 77 seconds := [...]string{":05", ""} // second 78 // "-0700 (MST)" is not in RFC 5322, but is common. 79 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 80 81 for _, dow := range dows { 82 for _, day := range days { 83 for _, year := range years { 84 for _, second := range seconds { 85 for _, zone := range zones { 86 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 87 dateLayouts = append(dateLayouts, s) 88 } 89 } 90 } 91 } 92 } 93 } 94 95 func parseDate(date string) (time.Time, error) { 96 for _, layout := range dateLayouts { 97 t, err := time.Parse(layout, date) 98 if err == nil { 99 return t, nil 100 } 101 } 102 return time.Time{}, errors.New("mail: header could not be parsed") 103 } 104 105 // A Header represents the key-value pairs in a mail message header. 106 type Header map[string][]string 107 108 // Get gets the first value associated with the given key. 109 // If there are no values associated with the key, Get returns "". 110 func (h Header) Get(key string) string { 111 return textproto.MIMEHeader(h).Get(key) 112 } 113 114 var ErrHeaderNotPresent = errors.New("mail: header not in message") 115 116 // Date parses the Date header field. 117 func (h Header) Date() (time.Time, error) { 118 hdr := h.Get("Date") 119 if hdr == "" { 120 return time.Time{}, ErrHeaderNotPresent 121 } 122 return parseDate(hdr) 123 } 124 125 // AddressList parses the named header field as a list of addresses. 126 func (h Header) AddressList(key string) ([]*Address, error) { 127 hdr := h.Get(key) 128 if hdr == "" { 129 return nil, ErrHeaderNotPresent 130 } 131 return ParseAddressList(hdr) 132 } 133 134 // Address represents a single mail address. 135 // An address such as "Barry Gibbs <bg@example.com>" is represented 136 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 137 type Address struct { 138 Name string // Proper name; may be empty. 139 Address string // user@domain 140 } 141 142 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 143 func ParseAddress(address string) (*Address, error) { 144 return newAddrParser(address).parseAddress() 145 } 146 147 // ParseAddressList parses the given string as a list of addresses. 148 func ParseAddressList(list string) ([]*Address, error) { 149 return newAddrParser(list).parseAddressList() 150 } 151 152 // String formats the address as a valid RFC 5322 address. 153 // If the address's name contains non-ASCII characters 154 // the name will be rendered according to RFC 2047. 155 func (a *Address) String() string { 156 s := "<" + a.Address + ">" 157 if a.Name == "" { 158 return s 159 } 160 // If every character is printable ASCII, quoting is simple. 161 allPrintable := true 162 for i := 0; i < len(a.Name); i++ { 163 // isWSP here should actually be isFWS, 164 // but we don't support folding yet. 165 if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) { 166 allPrintable = false 167 break 168 } 169 } 170 if allPrintable { 171 b := bytes.NewBufferString(`"`) 172 for i := 0; i < len(a.Name); i++ { 173 if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) { 174 b.WriteByte('\\') 175 } 176 b.WriteByte(a.Name[i]) 177 } 178 b.WriteString(`" `) 179 b.WriteString(s) 180 return b.String() 181 } 182 183 // UTF-8 "Q" encoding 184 b := bytes.NewBufferString("=?utf-8?q?") 185 for i := 0; i < len(a.Name); i++ { 186 switch c := a.Name[i]; { 187 case c == ' ': 188 b.WriteByte('_') 189 case isVchar(c) && c != '=' && c != '?' && c != '_': 190 b.WriteByte(c) 191 default: 192 fmt.Fprintf(b, "=%02X", c) 193 } 194 } 195 b.WriteString("?= ") 196 b.WriteString(s) 197 return b.String() 198 } 199 200 type addrParser []byte 201 202 func newAddrParser(s string) *addrParser { 203 p := addrParser(s) 204 return &p 205 } 206 207 func (p *addrParser) parseAddressList() ([]*Address, error) { 208 var list []*Address 209 for { 210 p.skipSpace() 211 addr, err := p.parseAddress() 212 if err != nil { 213 return nil, err 214 } 215 list = append(list, addr) 216 217 p.skipSpace() 218 if p.empty() { 219 break 220 } 221 if !p.consume(',') { 222 return nil, errors.New("mail: expected comma") 223 } 224 } 225 return list, nil 226 } 227 228 // parseAddress parses a single RFC 5322 address at the start of p. 229 func (p *addrParser) parseAddress() (addr *Address, err error) { 230 debug.Printf("parseAddress: %q", *p) 231 p.skipSpace() 232 if p.empty() { 233 return nil, errors.New("mail: no address") 234 } 235 236 // address = name-addr / addr-spec 237 // TODO(dsymonds): Support parsing group address. 238 239 // addr-spec has a more restricted grammar than name-addr, 240 // so try parsing it first, and fallback to name-addr. 241 // TODO(dsymonds): Is this really correct? 242 spec, err := p.consumeAddrSpec() 243 if err == nil { 244 return &Address{ 245 Address: spec, 246 }, err 247 } 248 debug.Printf("parseAddress: not an addr-spec: %v", err) 249 debug.Printf("parseAddress: state is now %q", *p) 250 251 // display-name 252 var displayName string 253 if p.peek() != '<' { 254 displayName, err = p.consumePhrase() 255 if err != nil { 256 return nil, err 257 } 258 } 259 debug.Printf("parseAddress: displayName=%q", displayName) 260 261 // angle-addr = "<" addr-spec ">" 262 p.skipSpace() 263 if !p.consume('<') { 264 return nil, errors.New("mail: no angle-addr") 265 } 266 spec, err = p.consumeAddrSpec() 267 if err != nil { 268 return nil, err 269 } 270 if !p.consume('>') { 271 return nil, errors.New("mail: unclosed angle-addr") 272 } 273 debug.Printf("parseAddress: spec=%q", spec) 274 275 return &Address{ 276 Name: displayName, 277 Address: spec, 278 }, nil 279 } 280 281 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 282 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 283 debug.Printf("consumeAddrSpec: %q", *p) 284 285 orig := *p 286 defer func() { 287 if err != nil { 288 *p = orig 289 } 290 }() 291 292 // local-part = dot-atom / quoted-string 293 var localPart string 294 p.skipSpace() 295 if p.empty() { 296 return "", errors.New("mail: no addr-spec") 297 } 298 if p.peek() == '"' { 299 // quoted-string 300 debug.Printf("consumeAddrSpec: parsing quoted-string") 301 localPart, err = p.consumeQuotedString() 302 } else { 303 // dot-atom 304 debug.Printf("consumeAddrSpec: parsing dot-atom") 305 localPart, err = p.consumeAtom(true) 306 } 307 if err != nil { 308 debug.Printf("consumeAddrSpec: failed: %v", err) 309 return "", err 310 } 311 312 if !p.consume('@') { 313 return "", errors.New("mail: missing @ in addr-spec") 314 } 315 316 // domain = dot-atom / domain-literal 317 var domain string 318 p.skipSpace() 319 if p.empty() { 320 return "", errors.New("mail: no domain in addr-spec") 321 } 322 // TODO(dsymonds): Handle domain-literal 323 domain, err = p.consumeAtom(true) 324 if err != nil { 325 return "", err 326 } 327 328 return localPart + "@" + domain, nil 329 } 330 331 // consumePhrase parses the RFC 5322 phrase at the start of p. 332 func (p *addrParser) consumePhrase() (phrase string, err error) { 333 debug.Printf("consumePhrase: [%s]", *p) 334 // phrase = 1*word 335 var words []string 336 for { 337 // word = atom / quoted-string 338 var word string 339 p.skipSpace() 340 if p.empty() { 341 return "", errors.New("mail: missing phrase") 342 } 343 if p.peek() == '"' { 344 // quoted-string 345 word, err = p.consumeQuotedString() 346 } else { 347 // atom 348 // We actually parse dot-atom here to be more permissive 349 // than what RFC 5322 specifies. 350 word, err = p.consumeAtom(true) 351 } 352 353 // RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s. 354 if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 { 355 word, err = decodeRFC2047Word(word) 356 } 357 358 if err != nil { 359 break 360 } 361 debug.Printf("consumePhrase: consumed %q", word) 362 words = append(words, word) 363 } 364 // Ignore any error if we got at least one word. 365 if err != nil && len(words) == 0 { 366 debug.Printf("consumePhrase: hit err: %v", err) 367 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 368 } 369 phrase = strings.Join(words, " ") 370 return phrase, nil 371 } 372 373 // consumeQuotedString parses the quoted string at the start of p. 374 func (p *addrParser) consumeQuotedString() (qs string, err error) { 375 // Assume first byte is '"'. 376 i := 1 377 qsb := make([]byte, 0, 10) 378 Loop: 379 for { 380 if i >= p.len() { 381 return "", errors.New("mail: unclosed quoted-string") 382 } 383 switch c := (*p)[i]; { 384 case c == '"': 385 break Loop 386 case c == '\\': 387 if i+1 == p.len() { 388 return "", errors.New("mail: unclosed quoted-string") 389 } 390 qsb = append(qsb, (*p)[i+1]) 391 i += 2 392 case isQtext(c), c == ' ' || c == '\t': 393 // qtext (printable US-ASCII excluding " and \), or 394 // FWS (almost; we're ignoring CRLF) 395 qsb = append(qsb, c) 396 i++ 397 default: 398 return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) 399 } 400 } 401 *p = (*p)[i+1:] 402 return string(qsb), nil 403 } 404 405 // consumeAtom parses an RFC 5322 atom at the start of p. 406 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 407 func (p *addrParser) consumeAtom(dot bool) (atom string, err error) { 408 if !isAtext(p.peek(), false) { 409 return "", errors.New("mail: invalid string") 410 } 411 i := 1 412 for ; i < p.len() && isAtext((*p)[i], dot); i++ { 413 } 414 atom, *p = string((*p)[:i]), (*p)[i:] 415 return atom, nil 416 } 417 418 func (p *addrParser) consume(c byte) bool { 419 if p.empty() || p.peek() != c { 420 return false 421 } 422 *p = (*p)[1:] 423 return true 424 } 425 426 // skipSpace skips the leading space and tab characters. 427 func (p *addrParser) skipSpace() { 428 *p = bytes.TrimLeft(*p, " \t") 429 } 430 431 func (p *addrParser) peek() byte { 432 return (*p)[0] 433 } 434 435 func (p *addrParser) empty() bool { 436 return p.len() == 0 437 } 438 439 func (p *addrParser) len() int { 440 return len(*p) 441 } 442 443 func decodeRFC2047Word(s string) (string, error) { 444 fields := strings.Split(s, "?") 445 if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" { 446 return "", errors.New("address not RFC 2047 encoded") 447 } 448 charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2]) 449 if charset != "us-ascii" && charset != "iso-8859-1" && charset != "utf-8" { 450 return "", fmt.Errorf("charset not supported: %q", charset) 451 } 452 453 in := bytes.NewBufferString(fields[3]) 454 var r io.Reader 455 switch enc { 456 case "b": 457 r = base64.NewDecoder(base64.StdEncoding, in) 458 case "q": 459 r = qDecoder{r: in} 460 default: 461 return "", fmt.Errorf("RFC 2047 encoding not supported: %q", enc) 462 } 463 464 dec, err := ioutil.ReadAll(r) 465 if err != nil { 466 return "", err 467 } 468 469 switch charset { 470 case "us-ascii": 471 b := new(bytes.Buffer) 472 for _, c := range dec { 473 if c >= 0x80 { 474 b.WriteRune(unicode.ReplacementChar) 475 } else { 476 b.WriteRune(rune(c)) 477 } 478 } 479 return b.String(), nil 480 case "iso-8859-1": 481 b := new(bytes.Buffer) 482 for _, c := range dec { 483 b.WriteRune(rune(c)) 484 } 485 return b.String(), nil 486 case "utf-8": 487 return string(dec), nil 488 } 489 panic("unreachable") 490 } 491 492 type qDecoder struct { 493 r io.Reader 494 scratch [2]byte 495 } 496 497 func (qd qDecoder) Read(p []byte) (n int, err error) { 498 // This method writes at most one byte into p. 499 if len(p) == 0 { 500 return 0, nil 501 } 502 if _, err := qd.r.Read(qd.scratch[:1]); err != nil { 503 return 0, err 504 } 505 switch c := qd.scratch[0]; { 506 case c == '=': 507 if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil { 508 return 0, err 509 } 510 x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64) 511 if err != nil { 512 return 0, fmt.Errorf("mail: invalid RFC 2047 encoding: %q", qd.scratch[:2]) 513 } 514 p[0] = byte(x) 515 case c == '_': 516 p[0] = ' ' 517 default: 518 p[0] = c 519 } 520 return 1, nil 521 } 522 523 var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 524 "abcdefghijklmnopqrstuvwxyz" + 525 "0123456789" + 526 "!#$%&'*+-/=?^_`{|}~") 527 528 // isAtext returns true if c is an RFC 5322 atext character. 529 // If dot is true, period is included. 530 func isAtext(c byte, dot bool) bool { 531 if dot && c == '.' { 532 return true 533 } 534 return bytes.IndexByte(atextChars, c) >= 0 535 } 536 537 // isQtext returns true if c is an RFC 5322 qtext character. 538 func isQtext(c byte) bool { 539 // Printable US-ASCII, excluding backslash or quote. 540 if c == '\\' || c == '"' { 541 return false 542 } 543 return '!' <= c && c <= '~' 544 } 545 546 // isVchar returns true if c is an RFC 5322 VCHAR character. 547 func isVchar(c byte) bool { 548 // Visible (printing) characters. 549 return '!' <= c && c <= '~' 550 } 551 552 // isWSP returns true if c is a WSP (white space). 553 // WSP is a space or horizontal tab (RFC5234 Appendix B). 554 func isWSP(c byte) bool { 555 return c == ' ' || c == '\t' 556 }