github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322. 9 Notable divergences: 10 * Obsolete address formats are not parsed, including addresses with 11 embedded route information. 12 * Group addresses are not parsed. 13 * The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 */ 16 package mail 17 18 import ( 19 "bufio" 20 "bytes" 21 "errors" 22 "fmt" 23 "internal/mime" 24 "io" 25 "log" 26 "net/textproto" 27 "strings" 28 "time" 29 ) 30 31 var debug = debugT(false) 32 33 type debugT bool 34 35 func (d debugT) Printf(format string, args ...interface{}) { 36 if d { 37 log.Printf(format, args...) 38 } 39 } 40 41 // A Message represents a parsed mail message. 42 type Message struct { 43 Header Header 44 Body io.Reader 45 } 46 47 // ReadMessage reads a message from r. 48 // The headers are parsed, and the body of the message will be available 49 // for reading from r. 50 func ReadMessage(r io.Reader) (msg *Message, err error) { 51 tp := textproto.NewReader(bufio.NewReader(r)) 52 53 hdr, err := tp.ReadMIMEHeader() 54 if err != nil { 55 return nil, err 56 } 57 58 return &Message{ 59 Header: Header(hdr), 60 Body: tp.R, 61 }, nil 62 } 63 64 // Layouts suitable for passing to time.Parse. 65 // These are tried in order. 66 var dateLayouts []string 67 68 func init() { 69 // Generate layouts based on RFC 5322, section 3.3. 70 71 dows := [...]string{"", "Mon, "} // day-of-week 72 days := [...]string{"2", "02"} // day = 1*2DIGIT 73 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 74 seconds := [...]string{":05", ""} // second 75 // "-0700 (MST)" is not in RFC 5322, but is common. 76 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 77 78 for _, dow := range dows { 79 for _, day := range days { 80 for _, year := range years { 81 for _, second := range seconds { 82 for _, zone := range zones { 83 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 84 dateLayouts = append(dateLayouts, s) 85 } 86 } 87 } 88 } 89 } 90 } 91 92 func parseDate(date string) (time.Time, error) { 93 for _, layout := range dateLayouts { 94 t, err := time.Parse(layout, date) 95 if err == nil { 96 return t, nil 97 } 98 } 99 return time.Time{}, errors.New("mail: header could not be parsed") 100 } 101 102 // A Header represents the key-value pairs in a mail message header. 103 type Header map[string][]string 104 105 // Get gets the first value associated with the given key. 106 // If there are no values associated with the key, Get returns "". 107 func (h Header) Get(key string) string { 108 return textproto.MIMEHeader(h).Get(key) 109 } 110 111 var ErrHeaderNotPresent = errors.New("mail: header not in message") 112 113 // Date parses the Date header field. 114 func (h Header) Date() (time.Time, error) { 115 hdr := h.Get("Date") 116 if hdr == "" { 117 return time.Time{}, ErrHeaderNotPresent 118 } 119 return parseDate(hdr) 120 } 121 122 // AddressList parses the named header field as a list of addresses. 123 func (h Header) AddressList(key string) ([]*Address, error) { 124 hdr := h.Get(key) 125 if hdr == "" { 126 return nil, ErrHeaderNotPresent 127 } 128 return ParseAddressList(hdr) 129 } 130 131 // Address represents a single mail address. 132 // An address such as "Barry Gibbs <bg@example.com>" is represented 133 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 134 type Address struct { 135 Name string // Proper name; may be empty. 136 Address string // user@domain 137 } 138 139 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 140 func ParseAddress(address string) (*Address, error) { 141 return newAddrParser(address).parseAddress() 142 } 143 144 // ParseAddressList parses the given string as a list of addresses. 145 func ParseAddressList(list string) ([]*Address, error) { 146 return newAddrParser(list).parseAddressList() 147 } 148 149 // String formats the address as a valid RFC 5322 address. 150 // If the address's name contains non-ASCII characters 151 // the name will be rendered according to RFC 2047. 152 func (a *Address) String() string { 153 s := "<" + a.Address + ">" 154 if a.Name == "" { 155 return s 156 } 157 // If every character is printable ASCII, quoting is simple. 158 allPrintable := true 159 for i := 0; i < len(a.Name); i++ { 160 // isWSP here should actually be isFWS, 161 // but we don't support folding yet. 162 if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) { 163 allPrintable = false 164 break 165 } 166 } 167 if allPrintable { 168 b := bytes.NewBufferString(`"`) 169 for i := 0; i < len(a.Name); i++ { 170 if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) { 171 b.WriteByte('\\') 172 } 173 b.WriteByte(a.Name[i]) 174 } 175 b.WriteString(`" `) 176 b.WriteString(s) 177 return b.String() 178 } 179 180 return mime.EncodeWord(a.Name) + " " + s 181 } 182 183 type addrParser []byte 184 185 func newAddrParser(s string) *addrParser { 186 p := addrParser(s) 187 return &p 188 } 189 190 func (p *addrParser) parseAddressList() ([]*Address, error) { 191 var list []*Address 192 for { 193 p.skipSpace() 194 addr, err := p.parseAddress() 195 if err != nil { 196 return nil, err 197 } 198 list = append(list, addr) 199 200 p.skipSpace() 201 if p.empty() { 202 break 203 } 204 if !p.consume(',') { 205 return nil, errors.New("mail: expected comma") 206 } 207 } 208 return list, nil 209 } 210 211 // parseAddress parses a single RFC 5322 address at the start of p. 212 func (p *addrParser) parseAddress() (addr *Address, err error) { 213 debug.Printf("parseAddress: %q", *p) 214 p.skipSpace() 215 if p.empty() { 216 return nil, errors.New("mail: no address") 217 } 218 219 // address = name-addr / addr-spec 220 // TODO(dsymonds): Support parsing group address. 221 222 // addr-spec has a more restricted grammar than name-addr, 223 // so try parsing it first, and fallback to name-addr. 224 // TODO(dsymonds): Is this really correct? 225 spec, err := p.consumeAddrSpec() 226 if err == nil { 227 return &Address{ 228 Address: spec, 229 }, err 230 } 231 debug.Printf("parseAddress: not an addr-spec: %v", err) 232 debug.Printf("parseAddress: state is now %q", *p) 233 234 // display-name 235 var displayName string 236 if p.peek() != '<' { 237 displayName, err = p.consumePhrase() 238 if err != nil { 239 return nil, err 240 } 241 } 242 debug.Printf("parseAddress: displayName=%q", displayName) 243 244 // angle-addr = "<" addr-spec ">" 245 p.skipSpace() 246 if !p.consume('<') { 247 return nil, errors.New("mail: no angle-addr") 248 } 249 spec, err = p.consumeAddrSpec() 250 if err != nil { 251 return nil, err 252 } 253 if !p.consume('>') { 254 return nil, errors.New("mail: unclosed angle-addr") 255 } 256 debug.Printf("parseAddress: spec=%q", spec) 257 258 return &Address{ 259 Name: displayName, 260 Address: spec, 261 }, nil 262 } 263 264 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 265 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 266 debug.Printf("consumeAddrSpec: %q", *p) 267 268 orig := *p 269 defer func() { 270 if err != nil { 271 *p = orig 272 } 273 }() 274 275 // local-part = dot-atom / quoted-string 276 var localPart string 277 p.skipSpace() 278 if p.empty() { 279 return "", errors.New("mail: no addr-spec") 280 } 281 if p.peek() == '"' { 282 // quoted-string 283 debug.Printf("consumeAddrSpec: parsing quoted-string") 284 localPart, err = p.consumeQuotedString() 285 } else { 286 // dot-atom 287 debug.Printf("consumeAddrSpec: parsing dot-atom") 288 localPart, err = p.consumeAtom(true) 289 } 290 if err != nil { 291 debug.Printf("consumeAddrSpec: failed: %v", err) 292 return "", err 293 } 294 295 if !p.consume('@') { 296 return "", errors.New("mail: missing @ in addr-spec") 297 } 298 299 // domain = dot-atom / domain-literal 300 var domain string 301 p.skipSpace() 302 if p.empty() { 303 return "", errors.New("mail: no domain in addr-spec") 304 } 305 // TODO(dsymonds): Handle domain-literal 306 domain, err = p.consumeAtom(true) 307 if err != nil { 308 return "", err 309 } 310 311 return localPart + "@" + domain, nil 312 } 313 314 // consumePhrase parses the RFC 5322 phrase at the start of p. 315 func (p *addrParser) consumePhrase() (phrase string, err error) { 316 debug.Printf("consumePhrase: [%s]", *p) 317 // phrase = 1*word 318 var words []string 319 for { 320 // word = atom / quoted-string 321 var word string 322 p.skipSpace() 323 if p.empty() { 324 return "", errors.New("mail: missing phrase") 325 } 326 if p.peek() == '"' { 327 // quoted-string 328 word, err = p.consumeQuotedString() 329 } else { 330 // atom 331 // We actually parse dot-atom here to be more permissive 332 // than what RFC 5322 specifies. 333 word, err = p.consumeAtom(true) 334 } 335 336 // RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s. 337 if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 { 338 word, err = mime.DecodeWord(word) 339 } 340 341 if err != nil { 342 break 343 } 344 debug.Printf("consumePhrase: consumed %q", word) 345 words = append(words, word) 346 } 347 // Ignore any error if we got at least one word. 348 if err != nil && len(words) == 0 { 349 debug.Printf("consumePhrase: hit err: %v", err) 350 return "", fmt.Errorf("mail: missing word in phrase: %v", err) 351 } 352 phrase = strings.Join(words, " ") 353 return phrase, nil 354 } 355 356 // consumeQuotedString parses the quoted string at the start of p. 357 func (p *addrParser) consumeQuotedString() (qs string, err error) { 358 // Assume first byte is '"'. 359 i := 1 360 qsb := make([]byte, 0, 10) 361 Loop: 362 for { 363 if i >= p.len() { 364 return "", errors.New("mail: unclosed quoted-string") 365 } 366 switch c := (*p)[i]; { 367 case c == '"': 368 break Loop 369 case c == '\\': 370 if i+1 == p.len() { 371 return "", errors.New("mail: unclosed quoted-string") 372 } 373 qsb = append(qsb, (*p)[i+1]) 374 i += 2 375 case isQtext(c), c == ' ' || c == '\t': 376 // qtext (printable US-ASCII excluding " and \), or 377 // FWS (almost; we're ignoring CRLF) 378 qsb = append(qsb, c) 379 i++ 380 default: 381 return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) 382 } 383 } 384 *p = (*p)[i+1:] 385 return string(qsb), nil 386 } 387 388 // consumeAtom parses an RFC 5322 atom at the start of p. 389 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 390 func (p *addrParser) consumeAtom(dot bool) (atom string, err error) { 391 if !isAtext(p.peek(), false) { 392 return "", errors.New("mail: invalid string") 393 } 394 i := 1 395 for ; i < p.len() && isAtext((*p)[i], dot); i++ { 396 } 397 atom, *p = string((*p)[:i]), (*p)[i:] 398 return atom, nil 399 } 400 401 func (p *addrParser) consume(c byte) bool { 402 if p.empty() || p.peek() != c { 403 return false 404 } 405 *p = (*p)[1:] 406 return true 407 } 408 409 // skipSpace skips the leading space and tab characters. 410 func (p *addrParser) skipSpace() { 411 *p = bytes.TrimLeft(*p, " \t") 412 } 413 414 func (p *addrParser) peek() byte { 415 return (*p)[0] 416 } 417 418 func (p *addrParser) empty() bool { 419 return p.len() == 0 420 } 421 422 func (p *addrParser) len() int { 423 return len(*p) 424 } 425 426 var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 427 "abcdefghijklmnopqrstuvwxyz" + 428 "0123456789" + 429 "!#$%&'*+-/=?^_`{|}~") 430 431 // isAtext reports whether c is an RFC 5322 atext character. 432 // If dot is true, period is included. 433 func isAtext(c byte, dot bool) bool { 434 if dot && c == '.' { 435 return true 436 } 437 return bytes.IndexByte(atextChars, c) >= 0 438 } 439 440 // isQtext reports whether c is an RFC 5322 qtext character. 441 func isQtext(c byte) bool { 442 // Printable US-ASCII, excluding backslash or quote. 443 if c == '\\' || c == '"' { 444 return false 445 } 446 return '!' <= c && c <= '~' 447 } 448 449 // isVchar reports whether c is an RFC 5322 VCHAR character. 450 func isVchar(c byte) bool { 451 // Visible (printing) characters. 452 return '!' <= c && c <= '~' 453 } 454 455 // isWSP reports whether c is a WSP (white space). 456 // WSP is a space or horizontal tab (RFC5234 Appendix B). 457 func isWSP(c byte) bool { 458 return c == ' ' || c == '\t' 459 }