github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/net/mail/message.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322.
     9  Notable divergences:
    10  	* Obsolete address formats are not parsed, including addresses with
    11  	  embedded route information.
    12  	* Group addresses are not parsed.
    13  	* The full range of spacing (the CFWS syntax element) is not supported,
    14  	  such as breaking addresses across lines.
    15  */
    16  package mail
    17  
    18  import (
    19  	"bufio"
    20  	"bytes"
    21  	"errors"
    22  	"fmt"
    23  	"internal/mime"
    24  	"io"
    25  	"log"
    26  	"net/textproto"
    27  	"strings"
    28  	"time"
    29  )
    30  
    31  var debug = debugT(false)
    32  
    33  type debugT bool
    34  
    35  func (d debugT) Printf(format string, args ...interface{}) {
    36  	if d {
    37  		log.Printf(format, args...)
    38  	}
    39  }
    40  
    41  // A Message represents a parsed mail message.
    42  type Message struct {
    43  	Header Header
    44  	Body   io.Reader
    45  }
    46  
    47  // ReadMessage reads a message from r.
    48  // The headers are parsed, and the body of the message will be available
    49  // for reading from r.
    50  func ReadMessage(r io.Reader) (msg *Message, err error) {
    51  	tp := textproto.NewReader(bufio.NewReader(r))
    52  
    53  	hdr, err := tp.ReadMIMEHeader()
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  
    58  	return &Message{
    59  		Header: Header(hdr),
    60  		Body:   tp.R,
    61  	}, nil
    62  }
    63  
    64  // Layouts suitable for passing to time.Parse.
    65  // These are tried in order.
    66  var dateLayouts []string
    67  
    68  func init() {
    69  	// Generate layouts based on RFC 5322, section 3.3.
    70  
    71  	dows := [...]string{"", "Mon, "}   // day-of-week
    72  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
    73  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
    74  	seconds := [...]string{":05", ""}  // second
    75  	// "-0700 (MST)" is not in RFC 5322, but is common.
    76  	zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
    77  
    78  	for _, dow := range dows {
    79  		for _, day := range days {
    80  			for _, year := range years {
    81  				for _, second := range seconds {
    82  					for _, zone := range zones {
    83  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    84  						dateLayouts = append(dateLayouts, s)
    85  					}
    86  				}
    87  			}
    88  		}
    89  	}
    90  }
    91  
    92  func parseDate(date string) (time.Time, error) {
    93  	for _, layout := range dateLayouts {
    94  		t, err := time.Parse(layout, date)
    95  		if err == nil {
    96  			return t, nil
    97  		}
    98  	}
    99  	return time.Time{}, errors.New("mail: header could not be parsed")
   100  }
   101  
   102  // A Header represents the key-value pairs in a mail message header.
   103  type Header map[string][]string
   104  
   105  // Get gets the first value associated with the given key.
   106  // If there are no values associated with the key, Get returns "".
   107  func (h Header) Get(key string) string {
   108  	return textproto.MIMEHeader(h).Get(key)
   109  }
   110  
   111  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   112  
   113  // Date parses the Date header field.
   114  func (h Header) Date() (time.Time, error) {
   115  	hdr := h.Get("Date")
   116  	if hdr == "" {
   117  		return time.Time{}, ErrHeaderNotPresent
   118  	}
   119  	return parseDate(hdr)
   120  }
   121  
   122  // AddressList parses the named header field as a list of addresses.
   123  func (h Header) AddressList(key string) ([]*Address, error) {
   124  	hdr := h.Get(key)
   125  	if hdr == "" {
   126  		return nil, ErrHeaderNotPresent
   127  	}
   128  	return ParseAddressList(hdr)
   129  }
   130  
   131  // Address represents a single mail address.
   132  // An address such as "Barry Gibbs <bg@example.com>" is represented
   133  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   134  type Address struct {
   135  	Name    string // Proper name; may be empty.
   136  	Address string // user@domain
   137  }
   138  
   139  // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   140  func ParseAddress(address string) (*Address, error) {
   141  	return newAddrParser(address).parseAddress()
   142  }
   143  
   144  // ParseAddressList parses the given string as a list of addresses.
   145  func ParseAddressList(list string) ([]*Address, error) {
   146  	return newAddrParser(list).parseAddressList()
   147  }
   148  
   149  // String formats the address as a valid RFC 5322 address.
   150  // If the address's name contains non-ASCII characters
   151  // the name will be rendered according to RFC 2047.
   152  func (a *Address) String() string {
   153  	s := "<" + a.Address + ">"
   154  	if a.Name == "" {
   155  		return s
   156  	}
   157  	// If every character is printable ASCII, quoting is simple.
   158  	allPrintable := true
   159  	for i := 0; i < len(a.Name); i++ {
   160  		// isWSP here should actually be isFWS,
   161  		// but we don't support folding yet.
   162  		if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) {
   163  			allPrintable = false
   164  			break
   165  		}
   166  	}
   167  	if allPrintable {
   168  		b := bytes.NewBufferString(`"`)
   169  		for i := 0; i < len(a.Name); i++ {
   170  			if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) {
   171  				b.WriteByte('\\')
   172  			}
   173  			b.WriteByte(a.Name[i])
   174  		}
   175  		b.WriteString(`" `)
   176  		b.WriteString(s)
   177  		return b.String()
   178  	}
   179  
   180  	return mime.EncodeWord(a.Name) + " " + s
   181  }
   182  
   183  type addrParser []byte
   184  
   185  func newAddrParser(s string) *addrParser {
   186  	p := addrParser(s)
   187  	return &p
   188  }
   189  
   190  func (p *addrParser) parseAddressList() ([]*Address, error) {
   191  	var list []*Address
   192  	for {
   193  		p.skipSpace()
   194  		addr, err := p.parseAddress()
   195  		if err != nil {
   196  			return nil, err
   197  		}
   198  		list = append(list, addr)
   199  
   200  		p.skipSpace()
   201  		if p.empty() {
   202  			break
   203  		}
   204  		if !p.consume(',') {
   205  			return nil, errors.New("mail: expected comma")
   206  		}
   207  	}
   208  	return list, nil
   209  }
   210  
   211  // parseAddress parses a single RFC 5322 address at the start of p.
   212  func (p *addrParser) parseAddress() (addr *Address, err error) {
   213  	debug.Printf("parseAddress: %q", *p)
   214  	p.skipSpace()
   215  	if p.empty() {
   216  		return nil, errors.New("mail: no address")
   217  	}
   218  
   219  	// address = name-addr / addr-spec
   220  	// TODO(dsymonds): Support parsing group address.
   221  
   222  	// addr-spec has a more restricted grammar than name-addr,
   223  	// so try parsing it first, and fallback to name-addr.
   224  	// TODO(dsymonds): Is this really correct?
   225  	spec, err := p.consumeAddrSpec()
   226  	if err == nil {
   227  		return &Address{
   228  			Address: spec,
   229  		}, err
   230  	}
   231  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   232  	debug.Printf("parseAddress: state is now %q", *p)
   233  
   234  	// display-name
   235  	var displayName string
   236  	if p.peek() != '<' {
   237  		displayName, err = p.consumePhrase()
   238  		if err != nil {
   239  			return nil, err
   240  		}
   241  	}
   242  	debug.Printf("parseAddress: displayName=%q", displayName)
   243  
   244  	// angle-addr = "<" addr-spec ">"
   245  	p.skipSpace()
   246  	if !p.consume('<') {
   247  		return nil, errors.New("mail: no angle-addr")
   248  	}
   249  	spec, err = p.consumeAddrSpec()
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  	if !p.consume('>') {
   254  		return nil, errors.New("mail: unclosed angle-addr")
   255  	}
   256  	debug.Printf("parseAddress: spec=%q", spec)
   257  
   258  	return &Address{
   259  		Name:    displayName,
   260  		Address: spec,
   261  	}, nil
   262  }
   263  
   264  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   265  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   266  	debug.Printf("consumeAddrSpec: %q", *p)
   267  
   268  	orig := *p
   269  	defer func() {
   270  		if err != nil {
   271  			*p = orig
   272  		}
   273  	}()
   274  
   275  	// local-part = dot-atom / quoted-string
   276  	var localPart string
   277  	p.skipSpace()
   278  	if p.empty() {
   279  		return "", errors.New("mail: no addr-spec")
   280  	}
   281  	if p.peek() == '"' {
   282  		// quoted-string
   283  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   284  		localPart, err = p.consumeQuotedString()
   285  	} else {
   286  		// dot-atom
   287  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   288  		localPart, err = p.consumeAtom(true)
   289  	}
   290  	if err != nil {
   291  		debug.Printf("consumeAddrSpec: failed: %v", err)
   292  		return "", err
   293  	}
   294  
   295  	if !p.consume('@') {
   296  		return "", errors.New("mail: missing @ in addr-spec")
   297  	}
   298  
   299  	// domain = dot-atom / domain-literal
   300  	var domain string
   301  	p.skipSpace()
   302  	if p.empty() {
   303  		return "", errors.New("mail: no domain in addr-spec")
   304  	}
   305  	// TODO(dsymonds): Handle domain-literal
   306  	domain, err = p.consumeAtom(true)
   307  	if err != nil {
   308  		return "", err
   309  	}
   310  
   311  	return localPart + "@" + domain, nil
   312  }
   313  
   314  // consumePhrase parses the RFC 5322 phrase at the start of p.
   315  func (p *addrParser) consumePhrase() (phrase string, err error) {
   316  	debug.Printf("consumePhrase: [%s]", *p)
   317  	// phrase = 1*word
   318  	var words []string
   319  	for {
   320  		// word = atom / quoted-string
   321  		var word string
   322  		p.skipSpace()
   323  		if p.empty() {
   324  			return "", errors.New("mail: missing phrase")
   325  		}
   326  		if p.peek() == '"' {
   327  			// quoted-string
   328  			word, err = p.consumeQuotedString()
   329  		} else {
   330  			// atom
   331  			// We actually parse dot-atom here to be more permissive
   332  			// than what RFC 5322 specifies.
   333  			word, err = p.consumeAtom(true)
   334  		}
   335  
   336  		// RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s.
   337  		if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 {
   338  			word, err = mime.DecodeWord(word)
   339  		}
   340  
   341  		if err != nil {
   342  			break
   343  		}
   344  		debug.Printf("consumePhrase: consumed %q", word)
   345  		words = append(words, word)
   346  	}
   347  	// Ignore any error if we got at least one word.
   348  	if err != nil && len(words) == 0 {
   349  		debug.Printf("consumePhrase: hit err: %v", err)
   350  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   351  	}
   352  	phrase = strings.Join(words, " ")
   353  	return phrase, nil
   354  }
   355  
   356  // consumeQuotedString parses the quoted string at the start of p.
   357  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   358  	// Assume first byte is '"'.
   359  	i := 1
   360  	qsb := make([]byte, 0, 10)
   361  Loop:
   362  	for {
   363  		if i >= p.len() {
   364  			return "", errors.New("mail: unclosed quoted-string")
   365  		}
   366  		switch c := (*p)[i]; {
   367  		case c == '"':
   368  			break Loop
   369  		case c == '\\':
   370  			if i+1 == p.len() {
   371  				return "", errors.New("mail: unclosed quoted-string")
   372  			}
   373  			qsb = append(qsb, (*p)[i+1])
   374  			i += 2
   375  		case isQtext(c), c == ' ' || c == '\t':
   376  			// qtext (printable US-ASCII excluding " and \), or
   377  			// FWS (almost; we're ignoring CRLF)
   378  			qsb = append(qsb, c)
   379  			i++
   380  		default:
   381  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
   382  		}
   383  	}
   384  	*p = (*p)[i+1:]
   385  	return string(qsb), nil
   386  }
   387  
   388  // consumeAtom parses an RFC 5322 atom at the start of p.
   389  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   390  func (p *addrParser) consumeAtom(dot bool) (atom string, err error) {
   391  	if !isAtext(p.peek(), false) {
   392  		return "", errors.New("mail: invalid string")
   393  	}
   394  	i := 1
   395  	for ; i < p.len() && isAtext((*p)[i], dot); i++ {
   396  	}
   397  	atom, *p = string((*p)[:i]), (*p)[i:]
   398  	return atom, nil
   399  }
   400  
   401  func (p *addrParser) consume(c byte) bool {
   402  	if p.empty() || p.peek() != c {
   403  		return false
   404  	}
   405  	*p = (*p)[1:]
   406  	return true
   407  }
   408  
   409  // skipSpace skips the leading space and tab characters.
   410  func (p *addrParser) skipSpace() {
   411  	*p = bytes.TrimLeft(*p, " \t")
   412  }
   413  
   414  func (p *addrParser) peek() byte {
   415  	return (*p)[0]
   416  }
   417  
   418  func (p *addrParser) empty() bool {
   419  	return p.len() == 0
   420  }
   421  
   422  func (p *addrParser) len() int {
   423  	return len(*p)
   424  }
   425  
   426  var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
   427  	"abcdefghijklmnopqrstuvwxyz" +
   428  	"0123456789" +
   429  	"!#$%&'*+-/=?^_`{|}~")
   430  
   431  // isAtext reports whether c is an RFC 5322 atext character.
   432  // If dot is true, period is included.
   433  func isAtext(c byte, dot bool) bool {
   434  	if dot && c == '.' {
   435  		return true
   436  	}
   437  	return bytes.IndexByte(atextChars, c) >= 0
   438  }
   439  
   440  // isQtext reports whether c is an RFC 5322 qtext character.
   441  func isQtext(c byte) bool {
   442  	// Printable US-ASCII, excluding backslash or quote.
   443  	if c == '\\' || c == '"' {
   444  		return false
   445  	}
   446  	return '!' <= c && c <= '~'
   447  }
   448  
   449  // isVchar reports whether c is an RFC 5322 VCHAR character.
   450  func isVchar(c byte) bool {
   451  	// Visible (printing) characters.
   452  	return '!' <= c && c <= '~'
   453  }
   454  
   455  // isWSP reports whether c is a WSP (white space).
   456  // WSP is a space or horizontal tab (RFC5234 Appendix B).
   457  func isWSP(c byte) bool {
   458  	return c == ' ' || c == '\t'
   459  }