github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/net/mail/message.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322.
     9  Notable divergences:
    10  	* Obsolete address formats are not parsed, including addresses with
    11  	  embedded route information.
    12  	* Group addresses are not parsed.
    13  	* The full range of spacing (the CFWS syntax element) is not supported,
    14  	  such as breaking addresses across lines.
    15  */
    16  package mail
    17  
    18  import (
    19  	"bufio"
    20  	"bytes"
    21  	"encoding/base64"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"io/ioutil"
    26  	"log"
    27  	"net/textproto"
    28  	"strconv"
    29  	"strings"
    30  	"time"
    31  )
    32  
    33  var debug = debugT(false)
    34  
    35  type debugT bool
    36  
    37  func (d debugT) Printf(format string, args ...interface{}) {
    38  	if d {
    39  		log.Printf(format, args...)
    40  	}
    41  }
    42  
    43  // A Message represents a parsed mail message.
    44  type Message struct {
    45  	Header Header
    46  	Body   io.Reader
    47  }
    48  
    49  // ReadMessage reads a message from r.
    50  // The headers are parsed, and the body of the message will be available
    51  // for reading from r.
    52  func ReadMessage(r io.Reader) (msg *Message, err error) {
    53  	tp := textproto.NewReader(bufio.NewReader(r))
    54  
    55  	hdr, err := tp.ReadMIMEHeader()
    56  	if err != nil {
    57  		return nil, err
    58  	}
    59  
    60  	return &Message{
    61  		Header: Header(hdr),
    62  		Body:   tp.R,
    63  	}, nil
    64  }
    65  
    66  // Layouts suitable for passing to time.Parse.
    67  // These are tried in order.
    68  var dateLayouts []string
    69  
    70  func init() {
    71  	// Generate layouts based on RFC 5322, section 3.3.
    72  
    73  	dows := [...]string{"", "Mon, "}   // day-of-week
    74  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
    75  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
    76  	seconds := [...]string{":05", ""}  // second
    77  	// "-0700 (MST)" is not in RFC 5322, but is common.
    78  	zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
    79  
    80  	for _, dow := range dows {
    81  		for _, day := range days {
    82  			for _, year := range years {
    83  				for _, second := range seconds {
    84  					for _, zone := range zones {
    85  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    86  						dateLayouts = append(dateLayouts, s)
    87  					}
    88  				}
    89  			}
    90  		}
    91  	}
    92  }
    93  
    94  func parseDate(date string) (time.Time, error) {
    95  	for _, layout := range dateLayouts {
    96  		t, err := time.Parse(layout, date)
    97  		if err == nil {
    98  			return t, nil
    99  		}
   100  	}
   101  	return time.Time{}, errors.New("mail: header could not be parsed")
   102  }
   103  
   104  // A Header represents the key-value pairs in a mail message header.
   105  type Header map[string][]string
   106  
   107  // Get gets the first value associated with the given key.
   108  // If there are no values associated with the key, Get returns "".
   109  func (h Header) Get(key string) string {
   110  	return textproto.MIMEHeader(h).Get(key)
   111  }
   112  
   113  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   114  
   115  // Date parses the Date header field.
   116  func (h Header) Date() (time.Time, error) {
   117  	hdr := h.Get("Date")
   118  	if hdr == "" {
   119  		return time.Time{}, ErrHeaderNotPresent
   120  	}
   121  	return parseDate(hdr)
   122  }
   123  
   124  // AddressList parses the named header field as a list of addresses.
   125  func (h Header) AddressList(key string) ([]*Address, error) {
   126  	hdr := h.Get(key)
   127  	if hdr == "" {
   128  		return nil, ErrHeaderNotPresent
   129  	}
   130  	return ParseAddressList(hdr)
   131  }
   132  
   133  // Address represents a single mail address.
   134  // An address such as "Barry Gibbs <bg@example.com>" is represented
   135  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   136  type Address struct {
   137  	Name    string // Proper name; may be empty.
   138  	Address string // user@domain
   139  }
   140  
   141  // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   142  func ParseAddress(address string) (*Address, error) {
   143  	return newAddrParser(address).parseAddress()
   144  }
   145  
   146  // ParseAddressList parses the given string as a list of addresses.
   147  func ParseAddressList(list string) ([]*Address, error) {
   148  	return newAddrParser(list).parseAddressList()
   149  }
   150  
   151  // String formats the address as a valid RFC 5322 address.
   152  // If the address's name contains non-ASCII characters
   153  // the name will be rendered according to RFC 2047.
   154  func (a *Address) String() string {
   155  	s := "<" + a.Address + ">"
   156  	if a.Name == "" {
   157  		return s
   158  	}
   159  	// If every character is printable ASCII, quoting is simple.
   160  	allPrintable := true
   161  	for i := 0; i < len(a.Name); i++ {
   162  		if !isVchar(a.Name[i]) {
   163  			allPrintable = false
   164  			break
   165  		}
   166  	}
   167  	if allPrintable {
   168  		b := bytes.NewBufferString(`"`)
   169  		for i := 0; i < len(a.Name); i++ {
   170  			if !isQtext(a.Name[i]) {
   171  				b.WriteByte('\\')
   172  			}
   173  			b.WriteByte(a.Name[i])
   174  		}
   175  		b.WriteString(`" `)
   176  		b.WriteString(s)
   177  		return b.String()
   178  	}
   179  
   180  	// UTF-8 "Q" encoding
   181  	b := bytes.NewBufferString("=?utf-8?q?")
   182  	for i := 0; i < len(a.Name); i++ {
   183  		switch c := a.Name[i]; {
   184  		case c == ' ':
   185  			b.WriteByte('_')
   186  		case isVchar(c) && c != '=' && c != '?' && c != '_':
   187  			b.WriteByte(c)
   188  		default:
   189  			fmt.Fprintf(b, "=%02X", c)
   190  		}
   191  	}
   192  	b.WriteString("?= ")
   193  	b.WriteString(s)
   194  	return b.String()
   195  }
   196  
   197  type addrParser []byte
   198  
   199  func newAddrParser(s string) *addrParser {
   200  	p := addrParser(s)
   201  	return &p
   202  }
   203  
   204  func (p *addrParser) parseAddressList() ([]*Address, error) {
   205  	var list []*Address
   206  	for {
   207  		p.skipSpace()
   208  		addr, err := p.parseAddress()
   209  		if err != nil {
   210  			return nil, err
   211  		}
   212  		list = append(list, addr)
   213  
   214  		p.skipSpace()
   215  		if p.empty() {
   216  			break
   217  		}
   218  		if !p.consume(',') {
   219  			return nil, errors.New("mail: expected comma")
   220  		}
   221  	}
   222  	return list, nil
   223  }
   224  
   225  // parseAddress parses a single RFC 5322 address at the start of p.
   226  func (p *addrParser) parseAddress() (addr *Address, err error) {
   227  	debug.Printf("parseAddress: %q", *p)
   228  	p.skipSpace()
   229  	if p.empty() {
   230  		return nil, errors.New("mail: no address")
   231  	}
   232  
   233  	// address = name-addr / addr-spec
   234  	// TODO(dsymonds): Support parsing group address.
   235  
   236  	// addr-spec has a more restricted grammar than name-addr,
   237  	// so try parsing it first, and fallback to name-addr.
   238  	// TODO(dsymonds): Is this really correct?
   239  	spec, err := p.consumeAddrSpec()
   240  	if err == nil {
   241  		return &Address{
   242  			Address: spec,
   243  		}, err
   244  	}
   245  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   246  	debug.Printf("parseAddress: state is now %q", *p)
   247  
   248  	// display-name
   249  	var displayName string
   250  	if p.peek() != '<' {
   251  		displayName, err = p.consumePhrase()
   252  		if err != nil {
   253  			return nil, err
   254  		}
   255  	}
   256  	debug.Printf("parseAddress: displayName=%q", displayName)
   257  
   258  	// angle-addr = "<" addr-spec ">"
   259  	p.skipSpace()
   260  	if !p.consume('<') {
   261  		return nil, errors.New("mail: no angle-addr")
   262  	}
   263  	spec, err = p.consumeAddrSpec()
   264  	if err != nil {
   265  		return nil, err
   266  	}
   267  	if !p.consume('>') {
   268  		return nil, errors.New("mail: unclosed angle-addr")
   269  	}
   270  	debug.Printf("parseAddress: spec=%q", spec)
   271  
   272  	return &Address{
   273  		Name:    displayName,
   274  		Address: spec,
   275  	}, nil
   276  }
   277  
   278  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   279  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   280  	debug.Printf("consumeAddrSpec: %q", *p)
   281  
   282  	orig := *p
   283  	defer func() {
   284  		if err != nil {
   285  			*p = orig
   286  		}
   287  	}()
   288  
   289  	// local-part = dot-atom / quoted-string
   290  	var localPart string
   291  	p.skipSpace()
   292  	if p.empty() {
   293  		return "", errors.New("mail: no addr-spec")
   294  	}
   295  	if p.peek() == '"' {
   296  		// quoted-string
   297  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   298  		localPart, err = p.consumeQuotedString()
   299  	} else {
   300  		// dot-atom
   301  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   302  		localPart, err = p.consumeAtom(true)
   303  	}
   304  	if err != nil {
   305  		debug.Printf("consumeAddrSpec: failed: %v", err)
   306  		return "", err
   307  	}
   308  
   309  	if !p.consume('@') {
   310  		return "", errors.New("mail: missing @ in addr-spec")
   311  	}
   312  
   313  	// domain = dot-atom / domain-literal
   314  	var domain string
   315  	p.skipSpace()
   316  	if p.empty() {
   317  		return "", errors.New("mail: no domain in addr-spec")
   318  	}
   319  	// TODO(dsymonds): Handle domain-literal
   320  	domain, err = p.consumeAtom(true)
   321  	if err != nil {
   322  		return "", err
   323  	}
   324  
   325  	return localPart + "@" + domain, nil
   326  }
   327  
   328  // consumePhrase parses the RFC 5322 phrase at the start of p.
   329  func (p *addrParser) consumePhrase() (phrase string, err error) {
   330  	debug.Printf("consumePhrase: [%s]", *p)
   331  	// phrase = 1*word
   332  	var words []string
   333  	for {
   334  		// word = atom / quoted-string
   335  		var word string
   336  		p.skipSpace()
   337  		if p.empty() {
   338  			return "", errors.New("mail: missing phrase")
   339  		}
   340  		if p.peek() == '"' {
   341  			// quoted-string
   342  			word, err = p.consumeQuotedString()
   343  		} else {
   344  			// atom
   345  			// We actually parse dot-atom here to be more permissive
   346  			// than what RFC 5322 specifies.
   347  			word, err = p.consumeAtom(true)
   348  		}
   349  
   350  		// RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s.
   351  		if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 {
   352  			word, err = decodeRFC2047Word(word)
   353  		}
   354  
   355  		if err != nil {
   356  			break
   357  		}
   358  		debug.Printf("consumePhrase: consumed %q", word)
   359  		words = append(words, word)
   360  	}
   361  	// Ignore any error if we got at least one word.
   362  	if err != nil && len(words) == 0 {
   363  		debug.Printf("consumePhrase: hit err: %v", err)
   364  		return "", errors.New("mail: missing word in phrase")
   365  	}
   366  	phrase = strings.Join(words, " ")
   367  	return phrase, nil
   368  }
   369  
   370  // consumeQuotedString parses the quoted string at the start of p.
   371  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   372  	// Assume first byte is '"'.
   373  	i := 1
   374  	qsb := make([]byte, 0, 10)
   375  Loop:
   376  	for {
   377  		if i >= p.len() {
   378  			return "", errors.New("mail: unclosed quoted-string")
   379  		}
   380  		switch c := (*p)[i]; {
   381  		case c == '"':
   382  			break Loop
   383  		case c == '\\':
   384  			if i+1 == p.len() {
   385  				return "", errors.New("mail: unclosed quoted-string")
   386  			}
   387  			qsb = append(qsb, (*p)[i+1])
   388  			i += 2
   389  		case isQtext(c), c == ' ' || c == '\t':
   390  			// qtext (printable US-ASCII excluding " and \), or
   391  			// FWS (almost; we're ignoring CRLF)
   392  			qsb = append(qsb, c)
   393  			i++
   394  		default:
   395  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
   396  		}
   397  	}
   398  	*p = (*p)[i+1:]
   399  	return string(qsb), nil
   400  }
   401  
   402  // consumeAtom parses an RFC 5322 atom at the start of p.
   403  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   404  func (p *addrParser) consumeAtom(dot bool) (atom string, err error) {
   405  	if !isAtext(p.peek(), false) {
   406  		return "", errors.New("mail: invalid string")
   407  	}
   408  	i := 1
   409  	for ; i < p.len() && isAtext((*p)[i], dot); i++ {
   410  	}
   411  	atom, *p = string((*p)[:i]), (*p)[i:]
   412  	return atom, nil
   413  }
   414  
   415  func (p *addrParser) consume(c byte) bool {
   416  	if p.empty() || p.peek() != c {
   417  		return false
   418  	}
   419  	*p = (*p)[1:]
   420  	return true
   421  }
   422  
   423  // skipSpace skips the leading space and tab characters.
   424  func (p *addrParser) skipSpace() {
   425  	*p = bytes.TrimLeft(*p, " \t")
   426  }
   427  
   428  func (p *addrParser) peek() byte {
   429  	return (*p)[0]
   430  }
   431  
   432  func (p *addrParser) empty() bool {
   433  	return p.len() == 0
   434  }
   435  
   436  func (p *addrParser) len() int {
   437  	return len(*p)
   438  }
   439  
   440  func decodeRFC2047Word(s string) (string, error) {
   441  	fields := strings.Split(s, "?")
   442  	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" {
   443  		return "", errors.New("mail: address not RFC 2047 encoded")
   444  	}
   445  	charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2])
   446  	if charset != "iso-8859-1" && charset != "utf-8" {
   447  		return "", fmt.Errorf("mail: charset not supported: %q", charset)
   448  	}
   449  
   450  	in := bytes.NewBufferString(fields[3])
   451  	var r io.Reader
   452  	switch enc {
   453  	case "b":
   454  		r = base64.NewDecoder(base64.StdEncoding, in)
   455  	case "q":
   456  		r = qDecoder{r: in}
   457  	default:
   458  		return "", fmt.Errorf("mail: RFC 2047 encoding not supported: %q", enc)
   459  	}
   460  
   461  	dec, err := ioutil.ReadAll(r)
   462  	if err != nil {
   463  		return "", err
   464  	}
   465  
   466  	switch charset {
   467  	case "iso-8859-1":
   468  		b := new(bytes.Buffer)
   469  		for _, c := range dec {
   470  			b.WriteRune(rune(c))
   471  		}
   472  		return b.String(), nil
   473  	case "utf-8":
   474  		return string(dec), nil
   475  	}
   476  	panic("unreachable")
   477  }
   478  
   479  type qDecoder struct {
   480  	r       io.Reader
   481  	scratch [2]byte
   482  }
   483  
   484  func (qd qDecoder) Read(p []byte) (n int, err error) {
   485  	// This method writes at most one byte into p.
   486  	if len(p) == 0 {
   487  		return 0, nil
   488  	}
   489  	if _, err := qd.r.Read(qd.scratch[:1]); err != nil {
   490  		return 0, err
   491  	}
   492  	switch c := qd.scratch[0]; {
   493  	case c == '=':
   494  		if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil {
   495  			return 0, err
   496  		}
   497  		x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64)
   498  		if err != nil {
   499  			return 0, fmt.Errorf("mail: invalid RFC 2047 encoding: %q", qd.scratch[:2])
   500  		}
   501  		p[0] = byte(x)
   502  	case c == '_':
   503  		p[0] = ' '
   504  	default:
   505  		p[0] = c
   506  	}
   507  	return 1, nil
   508  }
   509  
   510  var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
   511  	"abcdefghijklmnopqrstuvwxyz" +
   512  	"0123456789" +
   513  	"!#$%&'*+-/=?^_`{|}~")
   514  
   515  // isAtext returns true if c is an RFC 5322 atext character.
   516  // If dot is true, period is included.
   517  func isAtext(c byte, dot bool) bool {
   518  	if dot && c == '.' {
   519  		return true
   520  	}
   521  	return bytes.IndexByte(atextChars, c) >= 0
   522  }
   523  
   524  // isQtext returns true if c is an RFC 5322 qtext character.
   525  func isQtext(c byte) bool {
   526  	// Printable US-ASCII, excluding backslash or quote.
   527  	if c == '\\' || c == '"' {
   528  		return false
   529  	}
   530  	return '!' <= c && c <= '~'
   531  }
   532  
   533  // isVchar returns true if c is an RFC 5322 VCHAR character.
   534  func isVchar(c byte) bool {
   535  	// Visible (printing) characters.
   536  	return '!' <= c && c <= '~'
   537  }