github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/net/mail/message.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322.
     9  Notable divergences:
    10  	* Obsolete address formats are not parsed, including addresses with
    11  	  embedded route information.
    12  	* Group addresses are not parsed.
    13  	* The full range of spacing (the CFWS syntax element) is not supported,
    14  	  such as breaking addresses across lines.
    15  */
    16  package mail
    17  
    18  import (
    19  	"bufio"
    20  	"bytes"
    21  	"encoding/base64"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"io/ioutil"
    26  	"log"
    27  	"net/textproto"
    28  	"strconv"
    29  	"strings"
    30  	"time"
    31  	"unicode"
    32  )
    33  
    34  var debug = debugT(false)
    35  
    36  type debugT bool
    37  
    38  func (d debugT) Printf(format string, args ...interface{}) {
    39  	if d {
    40  		log.Printf(format, args...)
    41  	}
    42  }
    43  
    44  // A Message represents a parsed mail message.
    45  type Message struct {
    46  	Header Header
    47  	Body   io.Reader
    48  }
    49  
    50  // ReadMessage reads a message from r.
    51  // The headers are parsed, and the body of the message will be available
    52  // for reading from r.
    53  func ReadMessage(r io.Reader) (msg *Message, err error) {
    54  	tp := textproto.NewReader(bufio.NewReader(r))
    55  
    56  	hdr, err := tp.ReadMIMEHeader()
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  
    61  	return &Message{
    62  		Header: Header(hdr),
    63  		Body:   tp.R,
    64  	}, nil
    65  }
    66  
    67  // Layouts suitable for passing to time.Parse.
    68  // These are tried in order.
    69  var dateLayouts []string
    70  
    71  func init() {
    72  	// Generate layouts based on RFC 5322, section 3.3.
    73  
    74  	dows := [...]string{"", "Mon, "}   // day-of-week
    75  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
    76  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
    77  	seconds := [...]string{":05", ""}  // second
    78  	// "-0700 (MST)" is not in RFC 5322, but is common.
    79  	zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
    80  
    81  	for _, dow := range dows {
    82  		for _, day := range days {
    83  			for _, year := range years {
    84  				for _, second := range seconds {
    85  					for _, zone := range zones {
    86  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    87  						dateLayouts = append(dateLayouts, s)
    88  					}
    89  				}
    90  			}
    91  		}
    92  	}
    93  }
    94  
    95  func parseDate(date string) (time.Time, error) {
    96  	for _, layout := range dateLayouts {
    97  		t, err := time.Parse(layout, date)
    98  		if err == nil {
    99  			return t, nil
   100  		}
   101  	}
   102  	return time.Time{}, errors.New("mail: header could not be parsed")
   103  }
   104  
   105  // A Header represents the key-value pairs in a mail message header.
   106  type Header map[string][]string
   107  
   108  // Get gets the first value associated with the given key.
   109  // If there are no values associated with the key, Get returns "".
   110  func (h Header) Get(key string) string {
   111  	return textproto.MIMEHeader(h).Get(key)
   112  }
   113  
   114  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   115  
   116  // Date parses the Date header field.
   117  func (h Header) Date() (time.Time, error) {
   118  	hdr := h.Get("Date")
   119  	if hdr == "" {
   120  		return time.Time{}, ErrHeaderNotPresent
   121  	}
   122  	return parseDate(hdr)
   123  }
   124  
   125  // AddressList parses the named header field as a list of addresses.
   126  func (h Header) AddressList(key string) ([]*Address, error) {
   127  	hdr := h.Get(key)
   128  	if hdr == "" {
   129  		return nil, ErrHeaderNotPresent
   130  	}
   131  	return ParseAddressList(hdr)
   132  }
   133  
   134  // Address represents a single mail address.
   135  // An address such as "Barry Gibbs <bg@example.com>" is represented
   136  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   137  type Address struct {
   138  	Name    string // Proper name; may be empty.
   139  	Address string // user@domain
   140  }
   141  
   142  // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   143  func ParseAddress(address string) (*Address, error) {
   144  	return newAddrParser(address).parseAddress()
   145  }
   146  
   147  // ParseAddressList parses the given string as a list of addresses.
   148  func ParseAddressList(list string) ([]*Address, error) {
   149  	return newAddrParser(list).parseAddressList()
   150  }
   151  
   152  // String formats the address as a valid RFC 5322 address.
   153  // If the address's name contains non-ASCII characters
   154  // the name will be rendered according to RFC 2047.
   155  func (a *Address) String() string {
   156  	s := "<" + a.Address + ">"
   157  	if a.Name == "" {
   158  		return s
   159  	}
   160  	// If every character is printable ASCII, quoting is simple.
   161  	allPrintable := true
   162  	for i := 0; i < len(a.Name); i++ {
   163  		// isWSP here should actually be isFWS,
   164  		// but we don't support folding yet.
   165  		if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) {
   166  			allPrintable = false
   167  			break
   168  		}
   169  	}
   170  	if allPrintable {
   171  		b := bytes.NewBufferString(`"`)
   172  		for i := 0; i < len(a.Name); i++ {
   173  			if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) {
   174  				b.WriteByte('\\')
   175  			}
   176  			b.WriteByte(a.Name[i])
   177  		}
   178  		b.WriteString(`" `)
   179  		b.WriteString(s)
   180  		return b.String()
   181  	}
   182  
   183  	// UTF-8 "Q" encoding
   184  	b := bytes.NewBufferString("=?utf-8?q?")
   185  	for i := 0; i < len(a.Name); i++ {
   186  		switch c := a.Name[i]; {
   187  		case c == ' ':
   188  			b.WriteByte('_')
   189  		case isVchar(c) && c != '=' && c != '?' && c != '_':
   190  			b.WriteByte(c)
   191  		default:
   192  			fmt.Fprintf(b, "=%02X", c)
   193  		}
   194  	}
   195  	b.WriteString("?= ")
   196  	b.WriteString(s)
   197  	return b.String()
   198  }
   199  
   200  type addrParser []byte
   201  
   202  func newAddrParser(s string) *addrParser {
   203  	p := addrParser(s)
   204  	return &p
   205  }
   206  
   207  func (p *addrParser) parseAddressList() ([]*Address, error) {
   208  	var list []*Address
   209  	for {
   210  		p.skipSpace()
   211  		addr, err := p.parseAddress()
   212  		if err != nil {
   213  			return nil, err
   214  		}
   215  		list = append(list, addr)
   216  
   217  		p.skipSpace()
   218  		if p.empty() {
   219  			break
   220  		}
   221  		if !p.consume(',') {
   222  			return nil, errors.New("mail: expected comma")
   223  		}
   224  	}
   225  	return list, nil
   226  }
   227  
   228  // parseAddress parses a single RFC 5322 address at the start of p.
   229  func (p *addrParser) parseAddress() (addr *Address, err error) {
   230  	debug.Printf("parseAddress: %q", *p)
   231  	p.skipSpace()
   232  	if p.empty() {
   233  		return nil, errors.New("mail: no address")
   234  	}
   235  
   236  	// address = name-addr / addr-spec
   237  	// TODO(dsymonds): Support parsing group address.
   238  
   239  	// addr-spec has a more restricted grammar than name-addr,
   240  	// so try parsing it first, and fallback to name-addr.
   241  	// TODO(dsymonds): Is this really correct?
   242  	spec, err := p.consumeAddrSpec()
   243  	if err == nil {
   244  		return &Address{
   245  			Address: spec,
   246  		}, err
   247  	}
   248  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   249  	debug.Printf("parseAddress: state is now %q", *p)
   250  
   251  	// display-name
   252  	var displayName string
   253  	if p.peek() != '<' {
   254  		displayName, err = p.consumePhrase()
   255  		if err != nil {
   256  			return nil, err
   257  		}
   258  	}
   259  	debug.Printf("parseAddress: displayName=%q", displayName)
   260  
   261  	// angle-addr = "<" addr-spec ">"
   262  	p.skipSpace()
   263  	if !p.consume('<') {
   264  		return nil, errors.New("mail: no angle-addr")
   265  	}
   266  	spec, err = p.consumeAddrSpec()
   267  	if err != nil {
   268  		return nil, err
   269  	}
   270  	if !p.consume('>') {
   271  		return nil, errors.New("mail: unclosed angle-addr")
   272  	}
   273  	debug.Printf("parseAddress: spec=%q", spec)
   274  
   275  	return &Address{
   276  		Name:    displayName,
   277  		Address: spec,
   278  	}, nil
   279  }
   280  
   281  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   282  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   283  	debug.Printf("consumeAddrSpec: %q", *p)
   284  
   285  	orig := *p
   286  	defer func() {
   287  		if err != nil {
   288  			*p = orig
   289  		}
   290  	}()
   291  
   292  	// local-part = dot-atom / quoted-string
   293  	var localPart string
   294  	p.skipSpace()
   295  	if p.empty() {
   296  		return "", errors.New("mail: no addr-spec")
   297  	}
   298  	if p.peek() == '"' {
   299  		// quoted-string
   300  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   301  		localPart, err = p.consumeQuotedString()
   302  	} else {
   303  		// dot-atom
   304  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   305  		localPart, err = p.consumeAtom(true)
   306  	}
   307  	if err != nil {
   308  		debug.Printf("consumeAddrSpec: failed: %v", err)
   309  		return "", err
   310  	}
   311  
   312  	if !p.consume('@') {
   313  		return "", errors.New("mail: missing @ in addr-spec")
   314  	}
   315  
   316  	// domain = dot-atom / domain-literal
   317  	var domain string
   318  	p.skipSpace()
   319  	if p.empty() {
   320  		return "", errors.New("mail: no domain in addr-spec")
   321  	}
   322  	// TODO(dsymonds): Handle domain-literal
   323  	domain, err = p.consumeAtom(true)
   324  	if err != nil {
   325  		return "", err
   326  	}
   327  
   328  	return localPart + "@" + domain, nil
   329  }
   330  
   331  // consumePhrase parses the RFC 5322 phrase at the start of p.
   332  func (p *addrParser) consumePhrase() (phrase string, err error) {
   333  	debug.Printf("consumePhrase: [%s]", *p)
   334  	// phrase = 1*word
   335  	var words []string
   336  	for {
   337  		// word = atom / quoted-string
   338  		var word string
   339  		p.skipSpace()
   340  		if p.empty() {
   341  			return "", errors.New("mail: missing phrase")
   342  		}
   343  		if p.peek() == '"' {
   344  			// quoted-string
   345  			word, err = p.consumeQuotedString()
   346  		} else {
   347  			// atom
   348  			// We actually parse dot-atom here to be more permissive
   349  			// than what RFC 5322 specifies.
   350  			word, err = p.consumeAtom(true)
   351  		}
   352  
   353  		// RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s.
   354  		if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 {
   355  			word, err = decodeRFC2047Word(word)
   356  		}
   357  
   358  		if err != nil {
   359  			break
   360  		}
   361  		debug.Printf("consumePhrase: consumed %q", word)
   362  		words = append(words, word)
   363  	}
   364  	// Ignore any error if we got at least one word.
   365  	if err != nil && len(words) == 0 {
   366  		debug.Printf("consumePhrase: hit err: %v", err)
   367  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   368  	}
   369  	phrase = strings.Join(words, " ")
   370  	return phrase, nil
   371  }
   372  
   373  // consumeQuotedString parses the quoted string at the start of p.
   374  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   375  	// Assume first byte is '"'.
   376  	i := 1
   377  	qsb := make([]byte, 0, 10)
   378  Loop:
   379  	for {
   380  		if i >= p.len() {
   381  			return "", errors.New("mail: unclosed quoted-string")
   382  		}
   383  		switch c := (*p)[i]; {
   384  		case c == '"':
   385  			break Loop
   386  		case c == '\\':
   387  			if i+1 == p.len() {
   388  				return "", errors.New("mail: unclosed quoted-string")
   389  			}
   390  			qsb = append(qsb, (*p)[i+1])
   391  			i += 2
   392  		case isQtext(c), c == ' ' || c == '\t':
   393  			// qtext (printable US-ASCII excluding " and \), or
   394  			// FWS (almost; we're ignoring CRLF)
   395  			qsb = append(qsb, c)
   396  			i++
   397  		default:
   398  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
   399  		}
   400  	}
   401  	*p = (*p)[i+1:]
   402  	return string(qsb), nil
   403  }
   404  
   405  // consumeAtom parses an RFC 5322 atom at the start of p.
   406  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   407  func (p *addrParser) consumeAtom(dot bool) (atom string, err error) {
   408  	if !isAtext(p.peek(), false) {
   409  		return "", errors.New("mail: invalid string")
   410  	}
   411  	i := 1
   412  	for ; i < p.len() && isAtext((*p)[i], dot); i++ {
   413  	}
   414  	atom, *p = string((*p)[:i]), (*p)[i:]
   415  	return atom, nil
   416  }
   417  
   418  func (p *addrParser) consume(c byte) bool {
   419  	if p.empty() || p.peek() != c {
   420  		return false
   421  	}
   422  	*p = (*p)[1:]
   423  	return true
   424  }
   425  
   426  // skipSpace skips the leading space and tab characters.
   427  func (p *addrParser) skipSpace() {
   428  	*p = bytes.TrimLeft(*p, " \t")
   429  }
   430  
   431  func (p *addrParser) peek() byte {
   432  	return (*p)[0]
   433  }
   434  
   435  func (p *addrParser) empty() bool {
   436  	return p.len() == 0
   437  }
   438  
   439  func (p *addrParser) len() int {
   440  	return len(*p)
   441  }
   442  
   443  func decodeRFC2047Word(s string) (string, error) {
   444  	fields := strings.Split(s, "?")
   445  	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" {
   446  		return "", errors.New("address not RFC 2047 encoded")
   447  	}
   448  	charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2])
   449  	if charset != "us-ascii" && charset != "iso-8859-1" && charset != "utf-8" {
   450  		return "", fmt.Errorf("charset not supported: %q", charset)
   451  	}
   452  
   453  	in := bytes.NewBufferString(fields[3])
   454  	var r io.Reader
   455  	switch enc {
   456  	case "b":
   457  		r = base64.NewDecoder(base64.StdEncoding, in)
   458  	case "q":
   459  		r = qDecoder{r: in}
   460  	default:
   461  		return "", fmt.Errorf("RFC 2047 encoding not supported: %q", enc)
   462  	}
   463  
   464  	dec, err := ioutil.ReadAll(r)
   465  	if err != nil {
   466  		return "", err
   467  	}
   468  
   469  	switch charset {
   470  	case "us-ascii":
   471  		b := new(bytes.Buffer)
   472  		for _, c := range dec {
   473  			if c >= 0x80 {
   474  				b.WriteRune(unicode.ReplacementChar)
   475  			} else {
   476  				b.WriteRune(rune(c))
   477  			}
   478  		}
   479  		return b.String(), nil
   480  	case "iso-8859-1":
   481  		b := new(bytes.Buffer)
   482  		for _, c := range dec {
   483  			b.WriteRune(rune(c))
   484  		}
   485  		return b.String(), nil
   486  	case "utf-8":
   487  		return string(dec), nil
   488  	}
   489  	panic("unreachable")
   490  }
   491  
   492  type qDecoder struct {
   493  	r       io.Reader
   494  	scratch [2]byte
   495  }
   496  
   497  func (qd qDecoder) Read(p []byte) (n int, err error) {
   498  	// This method writes at most one byte into p.
   499  	if len(p) == 0 {
   500  		return 0, nil
   501  	}
   502  	if _, err := qd.r.Read(qd.scratch[:1]); err != nil {
   503  		return 0, err
   504  	}
   505  	switch c := qd.scratch[0]; {
   506  	case c == '=':
   507  		if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil {
   508  			return 0, err
   509  		}
   510  		x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64)
   511  		if err != nil {
   512  			return 0, fmt.Errorf("mail: invalid RFC 2047 encoding: %q", qd.scratch[:2])
   513  		}
   514  		p[0] = byte(x)
   515  	case c == '_':
   516  		p[0] = ' '
   517  	default:
   518  		p[0] = c
   519  	}
   520  	return 1, nil
   521  }
   522  
   523  var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
   524  	"abcdefghijklmnopqrstuvwxyz" +
   525  	"0123456789" +
   526  	"!#$%&'*+-/=?^_`{|}~")
   527  
   528  // isAtext returns true if c is an RFC 5322 atext character.
   529  // If dot is true, period is included.
   530  func isAtext(c byte, dot bool) bool {
   531  	if dot && c == '.' {
   532  		return true
   533  	}
   534  	return bytes.IndexByte(atextChars, c) >= 0
   535  }
   536  
   537  // isQtext returns true if c is an RFC 5322 qtext character.
   538  func isQtext(c byte) bool {
   539  	// Printable US-ASCII, excluding backslash or quote.
   540  	if c == '\\' || c == '"' {
   541  		return false
   542  	}
   543  	return '!' <= c && c <= '~'
   544  }
   545  
   546  // isVchar returns true if c is an RFC 5322 VCHAR character.
   547  func isVchar(c byte) bool {
   548  	// Visible (printing) characters.
   549  	return '!' <= c && c <= '~'
   550  }
   551  
   552  // isWSP returns true if c is a WSP (white space).
   553  // WSP is a space or horizontal tab (RFC5234 Appendix B).
   554  func isWSP(c byte) bool {
   555  	return c == ' ' || c == '\t'
   556  }