github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/net/mail/message.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322. 9 Notable divergences: 10 * Obsolete address formats are not parsed, including addresses with 11 embedded route information. 12 * Group addresses are not parsed. 13 * The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 */ 16 package mail 17 18 import ( 19 "bufio" 20 "bytes" 21 "encoding/base64" 22 "errors" 23 "fmt" 24 "io" 25 "io/ioutil" 26 "log" 27 "net/textproto" 28 "strconv" 29 "strings" 30 "time" 31 ) 32 33 var debug = debugT(false) 34 35 type debugT bool 36 37 func (d debugT) Printf(format string, args ...interface{}) { 38 if d { 39 log.Printf(format, args...) 40 } 41 } 42 43 // A Message represents a parsed mail message. 44 type Message struct { 45 Header Header 46 Body io.Reader 47 } 48 49 // ReadMessage reads a message from r. 50 // The headers are parsed, and the body of the message will be available 51 // for reading from r. 52 func ReadMessage(r io.Reader) (msg *Message, err error) { 53 tp := textproto.NewReader(bufio.NewReader(r)) 54 55 hdr, err := tp.ReadMIMEHeader() 56 if err != nil { 57 return nil, err 58 } 59 60 return &Message{ 61 Header: Header(hdr), 62 Body: tp.R, 63 }, nil 64 } 65 66 // Layouts suitable for passing to time.Parse. 67 // These are tried in order. 68 var dateLayouts []string 69 70 func init() { 71 // Generate layouts based on RFC 5322, section 3.3. 72 73 dows := [...]string{"", "Mon, "} // day-of-week 74 days := [...]string{"2", "02"} // day = 1*2DIGIT 75 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 76 seconds := [...]string{":05", ""} // second 77 // "-0700 (MST)" is not in RFC 5322, but is common. 78 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 79 80 for _, dow := range dows { 81 for _, day := range days { 82 for _, year := range years { 83 for _, second := range seconds { 84 for _, zone := range zones { 85 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 86 dateLayouts = append(dateLayouts, s) 87 } 88 } 89 } 90 } 91 } 92 } 93 94 func parseDate(date string) (time.Time, error) { 95 for _, layout := range dateLayouts { 96 t, err := time.Parse(layout, date) 97 if err == nil { 98 return t, nil 99 } 100 } 101 return time.Time{}, errors.New("mail: header could not be parsed") 102 } 103 104 // A Header represents the key-value pairs in a mail message header. 105 type Header map[string][]string 106 107 // Get gets the first value associated with the given key. 108 // If there are no values associated with the key, Get returns "". 109 func (h Header) Get(key string) string { 110 return textproto.MIMEHeader(h).Get(key) 111 } 112 113 var ErrHeaderNotPresent = errors.New("mail: header not in message") 114 115 // Date parses the Date header field. 116 func (h Header) Date() (time.Time, error) { 117 hdr := h.Get("Date") 118 if hdr == "" { 119 return time.Time{}, ErrHeaderNotPresent 120 } 121 return parseDate(hdr) 122 } 123 124 // AddressList parses the named header field as a list of addresses. 125 func (h Header) AddressList(key string) ([]*Address, error) { 126 hdr := h.Get(key) 127 if hdr == "" { 128 return nil, ErrHeaderNotPresent 129 } 130 return ParseAddressList(hdr) 131 } 132 133 // Address represents a single mail address. 134 // An address such as "Barry Gibbs <bg@example.com>" is represented 135 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. 136 type Address struct { 137 Name string // Proper name; may be empty. 138 Address string // user@domain 139 } 140 141 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" 142 func ParseAddress(address string) (*Address, error) { 143 return newAddrParser(address).parseAddress() 144 } 145 146 // ParseAddressList parses the given string as a list of addresses. 147 func ParseAddressList(list string) ([]*Address, error) { 148 return newAddrParser(list).parseAddressList() 149 } 150 151 // String formats the address as a valid RFC 5322 address. 152 // If the address's name contains non-ASCII characters 153 // the name will be rendered according to RFC 2047. 154 func (a *Address) String() string { 155 s := "<" + a.Address + ">" 156 if a.Name == "" { 157 return s 158 } 159 // If every character is printable ASCII, quoting is simple. 160 allPrintable := true 161 for i := 0; i < len(a.Name); i++ { 162 if !isVchar(a.Name[i]) { 163 allPrintable = false 164 break 165 } 166 } 167 if allPrintable { 168 b := bytes.NewBufferString(`"`) 169 for i := 0; i < len(a.Name); i++ { 170 if !isQtext(a.Name[i]) { 171 b.WriteByte('\\') 172 } 173 b.WriteByte(a.Name[i]) 174 } 175 b.WriteString(`" `) 176 b.WriteString(s) 177 return b.String() 178 } 179 180 // UTF-8 "Q" encoding 181 b := bytes.NewBufferString("=?utf-8?q?") 182 for i := 0; i < len(a.Name); i++ { 183 switch c := a.Name[i]; { 184 case c == ' ': 185 b.WriteByte('_') 186 case isVchar(c) && c != '=' && c != '?' && c != '_': 187 b.WriteByte(c) 188 default: 189 fmt.Fprintf(b, "=%02X", c) 190 } 191 } 192 b.WriteString("?= ") 193 b.WriteString(s) 194 return b.String() 195 } 196 197 type addrParser []byte 198 199 func newAddrParser(s string) *addrParser { 200 p := addrParser(s) 201 return &p 202 } 203 204 func (p *addrParser) parseAddressList() ([]*Address, error) { 205 var list []*Address 206 for { 207 p.skipSpace() 208 addr, err := p.parseAddress() 209 if err != nil { 210 return nil, err 211 } 212 list = append(list, addr) 213 214 p.skipSpace() 215 if p.empty() { 216 break 217 } 218 if !p.consume(',') { 219 return nil, errors.New("mail: expected comma") 220 } 221 } 222 return list, nil 223 } 224 225 // parseAddress parses a single RFC 5322 address at the start of p. 226 func (p *addrParser) parseAddress() (addr *Address, err error) { 227 debug.Printf("parseAddress: %q", *p) 228 p.skipSpace() 229 if p.empty() { 230 return nil, errors.New("mail: no address") 231 } 232 233 // address = name-addr / addr-spec 234 // TODO(dsymonds): Support parsing group address. 235 236 // addr-spec has a more restricted grammar than name-addr, 237 // so try parsing it first, and fallback to name-addr. 238 // TODO(dsymonds): Is this really correct? 239 spec, err := p.consumeAddrSpec() 240 if err == nil { 241 return &Address{ 242 Address: spec, 243 }, err 244 } 245 debug.Printf("parseAddress: not an addr-spec: %v", err) 246 debug.Printf("parseAddress: state is now %q", *p) 247 248 // display-name 249 var displayName string 250 if p.peek() != '<' { 251 displayName, err = p.consumePhrase() 252 if err != nil { 253 return nil, err 254 } 255 } 256 debug.Printf("parseAddress: displayName=%q", displayName) 257 258 // angle-addr = "<" addr-spec ">" 259 p.skipSpace() 260 if !p.consume('<') { 261 return nil, errors.New("mail: no angle-addr") 262 } 263 spec, err = p.consumeAddrSpec() 264 if err != nil { 265 return nil, err 266 } 267 if !p.consume('>') { 268 return nil, errors.New("mail: unclosed angle-addr") 269 } 270 debug.Printf("parseAddress: spec=%q", spec) 271 272 return &Address{ 273 Name: displayName, 274 Address: spec, 275 }, nil 276 } 277 278 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 279 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 280 debug.Printf("consumeAddrSpec: %q", *p) 281 282 orig := *p 283 defer func() { 284 if err != nil { 285 *p = orig 286 } 287 }() 288 289 // local-part = dot-atom / quoted-string 290 var localPart string 291 p.skipSpace() 292 if p.empty() { 293 return "", errors.New("mail: no addr-spec") 294 } 295 if p.peek() == '"' { 296 // quoted-string 297 debug.Printf("consumeAddrSpec: parsing quoted-string") 298 localPart, err = p.consumeQuotedString() 299 } else { 300 // dot-atom 301 debug.Printf("consumeAddrSpec: parsing dot-atom") 302 localPart, err = p.consumeAtom(true) 303 } 304 if err != nil { 305 debug.Printf("consumeAddrSpec: failed: %v", err) 306 return "", err 307 } 308 309 if !p.consume('@') { 310 return "", errors.New("mail: missing @ in addr-spec") 311 } 312 313 // domain = dot-atom / domain-literal 314 var domain string 315 p.skipSpace() 316 if p.empty() { 317 return "", errors.New("mail: no domain in addr-spec") 318 } 319 // TODO(dsymonds): Handle domain-literal 320 domain, err = p.consumeAtom(true) 321 if err != nil { 322 return "", err 323 } 324 325 return localPart + "@" + domain, nil 326 } 327 328 // consumePhrase parses the RFC 5322 phrase at the start of p. 329 func (p *addrParser) consumePhrase() (phrase string, err error) { 330 debug.Printf("consumePhrase: [%s]", *p) 331 // phrase = 1*word 332 var words []string 333 for { 334 // word = atom / quoted-string 335 var word string 336 p.skipSpace() 337 if p.empty() { 338 return "", errors.New("mail: missing phrase") 339 } 340 if p.peek() == '"' { 341 // quoted-string 342 word, err = p.consumeQuotedString() 343 } else { 344 // atom 345 // We actually parse dot-atom here to be more permissive 346 // than what RFC 5322 specifies. 347 word, err = p.consumeAtom(true) 348 } 349 350 // RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s. 351 if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 { 352 word, err = decodeRFC2047Word(word) 353 } 354 355 if err != nil { 356 break 357 } 358 debug.Printf("consumePhrase: consumed %q", word) 359 words = append(words, word) 360 } 361 // Ignore any error if we got at least one word. 362 if err != nil && len(words) == 0 { 363 debug.Printf("consumePhrase: hit err: %v", err) 364 return "", errors.New("mail: missing word in phrase") 365 } 366 phrase = strings.Join(words, " ") 367 return phrase, nil 368 } 369 370 // consumeQuotedString parses the quoted string at the start of p. 371 func (p *addrParser) consumeQuotedString() (qs string, err error) { 372 // Assume first byte is '"'. 373 i := 1 374 qsb := make([]byte, 0, 10) 375 Loop: 376 for { 377 if i >= p.len() { 378 return "", errors.New("mail: unclosed quoted-string") 379 } 380 switch c := (*p)[i]; { 381 case c == '"': 382 break Loop 383 case c == '\\': 384 if i+1 == p.len() { 385 return "", errors.New("mail: unclosed quoted-string") 386 } 387 qsb = append(qsb, (*p)[i+1]) 388 i += 2 389 case isQtext(c), c == ' ' || c == '\t': 390 // qtext (printable US-ASCII excluding " and \), or 391 // FWS (almost; we're ignoring CRLF) 392 qsb = append(qsb, c) 393 i++ 394 default: 395 return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) 396 } 397 } 398 *p = (*p)[i+1:] 399 return string(qsb), nil 400 } 401 402 // consumeAtom parses an RFC 5322 atom at the start of p. 403 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 404 func (p *addrParser) consumeAtom(dot bool) (atom string, err error) { 405 if !isAtext(p.peek(), false) { 406 return "", errors.New("mail: invalid string") 407 } 408 i := 1 409 for ; i < p.len() && isAtext((*p)[i], dot); i++ { 410 } 411 atom, *p = string((*p)[:i]), (*p)[i:] 412 return atom, nil 413 } 414 415 func (p *addrParser) consume(c byte) bool { 416 if p.empty() || p.peek() != c { 417 return false 418 } 419 *p = (*p)[1:] 420 return true 421 } 422 423 // skipSpace skips the leading space and tab characters. 424 func (p *addrParser) skipSpace() { 425 *p = bytes.TrimLeft(*p, " \t") 426 } 427 428 func (p *addrParser) peek() byte { 429 return (*p)[0] 430 } 431 432 func (p *addrParser) empty() bool { 433 return p.len() == 0 434 } 435 436 func (p *addrParser) len() int { 437 return len(*p) 438 } 439 440 func decodeRFC2047Word(s string) (string, error) { 441 fields := strings.Split(s, "?") 442 if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" { 443 return "", errors.New("mail: address not RFC 2047 encoded") 444 } 445 charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2]) 446 if charset != "iso-8859-1" && charset != "utf-8" { 447 return "", fmt.Errorf("mail: charset not supported: %q", charset) 448 } 449 450 in := bytes.NewBufferString(fields[3]) 451 var r io.Reader 452 switch enc { 453 case "b": 454 r = base64.NewDecoder(base64.StdEncoding, in) 455 case "q": 456 r = qDecoder{r: in} 457 default: 458 return "", fmt.Errorf("mail: RFC 2047 encoding not supported: %q", enc) 459 } 460 461 dec, err := ioutil.ReadAll(r) 462 if err != nil { 463 return "", err 464 } 465 466 switch charset { 467 case "iso-8859-1": 468 b := new(bytes.Buffer) 469 for _, c := range dec { 470 b.WriteRune(rune(c)) 471 } 472 return b.String(), nil 473 case "utf-8": 474 return string(dec), nil 475 } 476 panic("unreachable") 477 } 478 479 type qDecoder struct { 480 r io.Reader 481 scratch [2]byte 482 } 483 484 func (qd qDecoder) Read(p []byte) (n int, err error) { 485 // This method writes at most one byte into p. 486 if len(p) == 0 { 487 return 0, nil 488 } 489 if _, err := qd.r.Read(qd.scratch[:1]); err != nil { 490 return 0, err 491 } 492 switch c := qd.scratch[0]; { 493 case c == '=': 494 if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil { 495 return 0, err 496 } 497 x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64) 498 if err != nil { 499 return 0, fmt.Errorf("mail: invalid RFC 2047 encoding: %q", qd.scratch[:2]) 500 } 501 p[0] = byte(x) 502 case c == '_': 503 p[0] = ' ' 504 default: 505 p[0] = c 506 } 507 return 1, nil 508 } 509 510 var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 511 "abcdefghijklmnopqrstuvwxyz" + 512 "0123456789" + 513 "!#$%&'*+-/=?^_`{|}~") 514 515 // isAtext returns true if c is an RFC 5322 atext character. 516 // If dot is true, period is included. 517 func isAtext(c byte, dot bool) bool { 518 if dot && c == '.' { 519 return true 520 } 521 return bytes.IndexByte(atextChars, c) >= 0 522 } 523 524 // isQtext returns true if c is an RFC 5322 qtext character. 525 func isQtext(c byte) bool { 526 // Printable US-ASCII, excluding backslash or quote. 527 if c == '\\' || c == '"' { 528 return false 529 } 530 return '!' <= c && c <= '~' 531 } 532 533 // isVchar returns true if c is an RFC 5322 VCHAR character. 534 func isVchar(c byte) bool { 535 // Visible (printing) characters. 536 return '!' <= c && c <= '~' 537 }