github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/net/mail/message.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322 and
     9  extended by RFC 6532.
    10  Notable divergences:
    11  	* Obsolete address formats are not parsed, including addresses with
    12  	  embedded route information.
    13  	* Group addresses are not parsed.
    14  	* The full range of spacing (the CFWS syntax element) is not supported,
    15  	  such as breaking addresses across lines.
    16  	* No unicode normalization is performed.
    17  	* Address with some RFC 5322 3.2.3 specials without quotes are parsed.
    18  */
    19  package mail
    20  
    21  import (
    22  	"bufio"
    23  	"bytes"
    24  	"errors"
    25  	"fmt"
    26  	"io"
    27  	"log"
    28  	"mime"
    29  	"net/textproto"
    30  	"strings"
    31  	"time"
    32  	"unicode/utf8"
    33  )
    34  
    35  var debug = debugT(false)
    36  
    37  type debugT bool
    38  
    39  func (d debugT) Printf(format string, args ...interface{}) {
    40  	if d {
    41  		log.Printf(format, args...)
    42  	}
    43  }
    44  
    45  // A Message represents a parsed mail message.
    46  type Message struct {
    47  	Header Header
    48  	Body   io.Reader
    49  }
    50  
    51  // ReadMessage reads a message from r.
    52  // The headers are parsed, and the body of the message will be available
    53  // for reading from msg.Body.
    54  func ReadMessage(r io.Reader) (msg *Message, err error) {
    55  	tp := textproto.NewReader(bufio.NewReader(r))
    56  
    57  	hdr, err := tp.ReadMIMEHeader()
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  
    62  	return &Message{
    63  		Header: Header(hdr),
    64  		Body:   tp.R,
    65  	}, nil
    66  }
    67  
    68  // Layouts suitable for passing to time.Parse.
    69  // These are tried in order.
    70  var dateLayouts []string
    71  
    72  func init() {
    73  	// Generate layouts based on RFC 5322, section 3.3.
    74  
    75  	dows := [...]string{"", "Mon, "}   // day-of-week
    76  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
    77  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
    78  	seconds := [...]string{":05", ""}  // second
    79  	// "-0700 (MST)" is not in RFC 5322, but is common.
    80  	zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
    81  
    82  	for _, dow := range dows {
    83  		for _, day := range days {
    84  			for _, year := range years {
    85  				for _, second := range seconds {
    86  					for _, zone := range zones {
    87  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    88  						dateLayouts = append(dateLayouts, s)
    89  					}
    90  				}
    91  			}
    92  		}
    93  	}
    94  }
    95  
    96  // ParseDate parses an RFC 5322 date string.
    97  func ParseDate(date string) (time.Time, error) {
    98  	for _, layout := range dateLayouts {
    99  		t, err := time.Parse(layout, date)
   100  		if err == nil {
   101  			return t, nil
   102  		}
   103  	}
   104  	return time.Time{}, errors.New("mail: header could not be parsed")
   105  }
   106  
   107  // A Header represents the key-value pairs in a mail message header.
   108  type Header map[string][]string
   109  
   110  // Get gets the first value associated with the given key.
   111  // It is case insensitive; CanonicalMIMEHeaderKey is used
   112  // to canonicalize the provided key.
   113  // If there are no values associated with the key, Get returns "".
   114  // To access multiple values of a key, or to use non-canonical keys,
   115  // access the map directly.
   116  func (h Header) Get(key string) string {
   117  	return textproto.MIMEHeader(h).Get(key)
   118  }
   119  
   120  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   121  
   122  // Date parses the Date header field.
   123  func (h Header) Date() (time.Time, error) {
   124  	hdr := h.Get("Date")
   125  	if hdr == "" {
   126  		return time.Time{}, ErrHeaderNotPresent
   127  	}
   128  	return ParseDate(hdr)
   129  }
   130  
   131  // AddressList parses the named header field as a list of addresses.
   132  func (h Header) AddressList(key string) ([]*Address, error) {
   133  	hdr := h.Get(key)
   134  	if hdr == "" {
   135  		return nil, ErrHeaderNotPresent
   136  	}
   137  	return ParseAddressList(hdr)
   138  }
   139  
   140  // Address represents a single mail address.
   141  // An address such as "Barry Gibbs <bg@example.com>" is represented
   142  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   143  type Address struct {
   144  	Name    string // Proper name; may be empty.
   145  	Address string // user@domain
   146  }
   147  
   148  // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   149  func ParseAddress(address string) (*Address, error) {
   150  	return (&addrParser{s: address}).parseSingleAddress()
   151  }
   152  
   153  // ParseAddressList parses the given string as a list of addresses.
   154  func ParseAddressList(list string) ([]*Address, error) {
   155  	return (&addrParser{s: list}).parseAddressList()
   156  }
   157  
   158  // An AddressParser is an RFC 5322 address parser.
   159  type AddressParser struct {
   160  	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
   161  	WordDecoder *mime.WordDecoder
   162  }
   163  
   164  // Parse parses a single RFC 5322 address of the
   165  // form "Gogh Fir <gf@example.com>" or "foo@example.com".
   166  func (p *AddressParser) Parse(address string) (*Address, error) {
   167  	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
   168  }
   169  
   170  // ParseList parses the given string as a list of comma-separated addresses
   171  // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
   172  func (p *AddressParser) ParseList(list string) ([]*Address, error) {
   173  	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
   174  }
   175  
   176  // String formats the address as a valid RFC 5322 address.
   177  // If the address's name contains non-ASCII characters
   178  // the name will be rendered according to RFC 2047.
   179  func (a *Address) String() string {
   180  	// Format address local@domain
   181  	at := strings.LastIndex(a.Address, "@")
   182  	var local, domain string
   183  	if at < 0 {
   184  		// This is a malformed address ("@" is required in addr-spec);
   185  		// treat the whole address as local-part.
   186  		local = a.Address
   187  	} else {
   188  		local, domain = a.Address[:at], a.Address[at+1:]
   189  	}
   190  
   191  	// Add quotes if needed
   192  	quoteLocal := false
   193  	for i, r := range local {
   194  		if isAtext(r, false, false) {
   195  			continue
   196  		}
   197  		if r == '.' {
   198  			// Dots are okay if they are surrounded by atext.
   199  			// We only need to check that the previous byte is
   200  			// not a dot, and this isn't the end of the string.
   201  			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
   202  				continue
   203  			}
   204  		}
   205  		quoteLocal = true
   206  		break
   207  	}
   208  	if quoteLocal {
   209  		local = quoteString(local)
   210  
   211  	}
   212  
   213  	s := "<" + local + "@" + domain + ">"
   214  
   215  	if a.Name == "" {
   216  		return s
   217  	}
   218  
   219  	// If every character is printable ASCII, quoting is simple.
   220  	allPrintable := true
   221  	for _, r := range a.Name {
   222  		// isWSP here should actually be isFWS,
   223  		// but we don't support folding yet.
   224  		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
   225  			allPrintable = false
   226  			break
   227  		}
   228  	}
   229  	if allPrintable {
   230  		return quoteString(a.Name) + " " + s
   231  	}
   232  
   233  	// Text in an encoded-word in a display-name must not contain certain
   234  	// characters like quotes or parentheses (see RFC 2047 section 5.3).
   235  	// When this is the case encode the name using base64 encoding.
   236  	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
   237  		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
   238  	}
   239  	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
   240  }
   241  
   242  type addrParser struct {
   243  	s   string
   244  	dec *mime.WordDecoder // may be nil
   245  }
   246  
   247  func (p *addrParser) parseAddressList() ([]*Address, error) {
   248  	var list []*Address
   249  	for {
   250  		p.skipSpace()
   251  		addr, err := p.parseAddress()
   252  		if err != nil {
   253  			return nil, err
   254  		}
   255  		list = append(list, addr)
   256  
   257  		p.skipSpace()
   258  		if p.empty() {
   259  			break
   260  		}
   261  		if !p.consume(',') {
   262  			return nil, errors.New("mail: expected comma")
   263  		}
   264  	}
   265  	return list, nil
   266  }
   267  
   268  func (p *addrParser) parseSingleAddress() (*Address, error) {
   269  	addr, err := p.parseAddress()
   270  	if err != nil {
   271  		return nil, err
   272  	}
   273  	p.skipSpace()
   274  	if !p.empty() {
   275  		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
   276  	}
   277  	return addr, nil
   278  }
   279  
   280  // parseAddress parses a single RFC 5322 address at the start of p.
   281  func (p *addrParser) parseAddress() (addr *Address, err error) {
   282  	debug.Printf("parseAddress: %q", p.s)
   283  	p.skipSpace()
   284  	if p.empty() {
   285  		return nil, errors.New("mail: no address")
   286  	}
   287  
   288  	// address = name-addr / addr-spec
   289  	// TODO(dsymonds): Support parsing group address.
   290  
   291  	// addr-spec has a more restricted grammar than name-addr,
   292  	// so try parsing it first, and fallback to name-addr.
   293  	// TODO(dsymonds): Is this really correct?
   294  	spec, err := p.consumeAddrSpec()
   295  	if err == nil {
   296  		return &Address{
   297  			Address: spec,
   298  		}, err
   299  	}
   300  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   301  	debug.Printf("parseAddress: state is now %q", p.s)
   302  
   303  	// display-name
   304  	var displayName string
   305  	if p.peek() != '<' {
   306  		displayName, err = p.consumePhrase()
   307  		if err != nil {
   308  			return nil, err
   309  		}
   310  	}
   311  	debug.Printf("parseAddress: displayName=%q", displayName)
   312  
   313  	// angle-addr = "<" addr-spec ">"
   314  	p.skipSpace()
   315  	if !p.consume('<') {
   316  		return nil, errors.New("mail: no angle-addr")
   317  	}
   318  	spec, err = p.consumeAddrSpec()
   319  	if err != nil {
   320  		return nil, err
   321  	}
   322  	if !p.consume('>') {
   323  		return nil, errors.New("mail: unclosed angle-addr")
   324  	}
   325  	debug.Printf("parseAddress: spec=%q", spec)
   326  
   327  	return &Address{
   328  		Name:    displayName,
   329  		Address: spec,
   330  	}, nil
   331  }
   332  
   333  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   334  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   335  	debug.Printf("consumeAddrSpec: %q", p.s)
   336  
   337  	orig := *p
   338  	defer func() {
   339  		if err != nil {
   340  			*p = orig
   341  		}
   342  	}()
   343  
   344  	// local-part = dot-atom / quoted-string
   345  	var localPart string
   346  	p.skipSpace()
   347  	if p.empty() {
   348  		return "", errors.New("mail: no addr-spec")
   349  	}
   350  	if p.peek() == '"' {
   351  		// quoted-string
   352  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   353  		localPart, err = p.consumeQuotedString()
   354  		if localPart == "" {
   355  			err = errors.New("mail: empty quoted string in addr-spec")
   356  		}
   357  	} else {
   358  		// dot-atom
   359  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   360  		localPart, err = p.consumeAtom(true, false)
   361  	}
   362  	if err != nil {
   363  		debug.Printf("consumeAddrSpec: failed: %v", err)
   364  		return "", err
   365  	}
   366  
   367  	if !p.consume('@') {
   368  		return "", errors.New("mail: missing @ in addr-spec")
   369  	}
   370  
   371  	// domain = dot-atom / domain-literal
   372  	var domain string
   373  	p.skipSpace()
   374  	if p.empty() {
   375  		return "", errors.New("mail: no domain in addr-spec")
   376  	}
   377  	// TODO(dsymonds): Handle domain-literal
   378  	domain, err = p.consumeAtom(true, false)
   379  	if err != nil {
   380  		return "", err
   381  	}
   382  
   383  	return localPart + "@" + domain, nil
   384  }
   385  
   386  // consumePhrase parses the RFC 5322 phrase at the start of p.
   387  func (p *addrParser) consumePhrase() (phrase string, err error) {
   388  	debug.Printf("consumePhrase: [%s]", p.s)
   389  	// phrase = 1*word
   390  	var words []string
   391  	var isPrevEncoded bool
   392  	for {
   393  		// word = atom / quoted-string
   394  		var word string
   395  		p.skipSpace()
   396  		if p.empty() {
   397  			break
   398  		}
   399  		isEncoded := false
   400  		if p.peek() == '"' {
   401  			// quoted-string
   402  			word, err = p.consumeQuotedString()
   403  		} else {
   404  			// atom
   405  			// We actually parse dot-atom here to be more permissive
   406  			// than what RFC 5322 specifies.
   407  			word, err = p.consumeAtom(true, true)
   408  			if err == nil {
   409  				word, isEncoded, err = p.decodeRFC2047Word(word)
   410  			}
   411  		}
   412  
   413  		if err != nil {
   414  			break
   415  		}
   416  		debug.Printf("consumePhrase: consumed %q", word)
   417  		if isPrevEncoded && isEncoded {
   418  			words[len(words)-1] += word
   419  		} else {
   420  			words = append(words, word)
   421  		}
   422  		isPrevEncoded = isEncoded
   423  	}
   424  	// Ignore any error if we got at least one word.
   425  	if err != nil && len(words) == 0 {
   426  		debug.Printf("consumePhrase: hit err: %v", err)
   427  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   428  	}
   429  	phrase = strings.Join(words, " ")
   430  	return phrase, nil
   431  }
   432  
   433  // consumeQuotedString parses the quoted string at the start of p.
   434  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   435  	// Assume first byte is '"'.
   436  	i := 1
   437  	qsb := make([]rune, 0, 10)
   438  
   439  	escaped := false
   440  
   441  Loop:
   442  	for {
   443  		r, size := utf8.DecodeRuneInString(p.s[i:])
   444  
   445  		switch {
   446  		case size == 0:
   447  			return "", errors.New("mail: unclosed quoted-string")
   448  
   449  		case size == 1 && r == utf8.RuneError:
   450  			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
   451  
   452  		case escaped:
   453  			//  quoted-pair = ("\" (VCHAR / WSP))
   454  
   455  			if !isVchar(r) && !isWSP(r) {
   456  				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   457  			}
   458  
   459  			qsb = append(qsb, r)
   460  			escaped = false
   461  
   462  		case isQtext(r) || isWSP(r):
   463  			// qtext (printable US-ASCII excluding " and \), or
   464  			// FWS (almost; we're ignoring CRLF)
   465  			qsb = append(qsb, r)
   466  
   467  		case r == '"':
   468  			break Loop
   469  
   470  		case r == '\\':
   471  			escaped = true
   472  
   473  		default:
   474  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   475  
   476  		}
   477  
   478  		i += size
   479  	}
   480  	p.s = p.s[i+1:]
   481  	return string(qsb), nil
   482  }
   483  
   484  // consumeAtom parses an RFC 5322 atom at the start of p.
   485  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   486  // If permissive is true, consumeAtom will not fail on:
   487  // - leading/trailing/double dots in the atom (see golang.org/issue/4938)
   488  // - special characters (RFC 5322 3.2.3) except '<', '>' and '"' (see golang.org/issue/21018)
   489  func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
   490  	i := 0
   491  
   492  Loop:
   493  	for {
   494  		r, size := utf8.DecodeRuneInString(p.s[i:])
   495  		switch {
   496  		case size == 1 && r == utf8.RuneError:
   497  			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
   498  
   499  		case size == 0 || !isAtext(r, dot, permissive):
   500  			break Loop
   501  
   502  		default:
   503  			i += size
   504  
   505  		}
   506  	}
   507  
   508  	if i == 0 {
   509  		return "", errors.New("mail: invalid string")
   510  	}
   511  	atom, p.s = p.s[:i], p.s[i:]
   512  	if !permissive {
   513  		if strings.HasPrefix(atom, ".") {
   514  			return "", errors.New("mail: leading dot in atom")
   515  		}
   516  		if strings.Contains(atom, "..") {
   517  			return "", errors.New("mail: double dot in atom")
   518  		}
   519  		if strings.HasSuffix(atom, ".") {
   520  			return "", errors.New("mail: trailing dot in atom")
   521  		}
   522  	}
   523  	return atom, nil
   524  }
   525  
   526  func (p *addrParser) consume(c byte) bool {
   527  	if p.empty() || p.peek() != c {
   528  		return false
   529  	}
   530  	p.s = p.s[1:]
   531  	return true
   532  }
   533  
   534  // skipSpace skips the leading space and tab characters.
   535  func (p *addrParser) skipSpace() {
   536  	p.s = strings.TrimLeft(p.s, " \t")
   537  }
   538  
   539  func (p *addrParser) peek() byte {
   540  	return p.s[0]
   541  }
   542  
   543  func (p *addrParser) empty() bool {
   544  	return p.len() == 0
   545  }
   546  
   547  func (p *addrParser) len() int {
   548  	return len(p.s)
   549  }
   550  
   551  func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
   552  	if p.dec != nil {
   553  		word, err = p.dec.Decode(s)
   554  	} else {
   555  		word, err = rfc2047Decoder.Decode(s)
   556  	}
   557  
   558  	if err == nil {
   559  		return word, true, nil
   560  	}
   561  
   562  	if _, ok := err.(charsetError); ok {
   563  		return s, true, err
   564  	}
   565  
   566  	// Ignore invalid RFC 2047 encoded-word errors.
   567  	return s, false, nil
   568  }
   569  
   570  var rfc2047Decoder = mime.WordDecoder{
   571  	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
   572  		return nil, charsetError(charset)
   573  	},
   574  }
   575  
   576  type charsetError string
   577  
   578  func (e charsetError) Error() string {
   579  	return fmt.Sprintf("charset not supported: %q", string(e))
   580  }
   581  
   582  // isAtext reports whether r is an RFC 5322 atext character.
   583  // If dot is true, period is included.
   584  // If permissive is true, RFC 5322 3.2.3 specials is included,
   585  // except '<', '>' and '"'.
   586  func isAtext(r rune, dot, permissive bool) bool {
   587  	switch r {
   588  	case '.':
   589  		return dot
   590  
   591  	// RFC 5322 3.2.3. specials
   592  	case '(', ')', '[', ']', ':', ';', '@', '\\', ',':
   593  		return permissive
   594  
   595  	case '<', '>', '"':
   596  		return false
   597  	}
   598  	return isVchar(r)
   599  }
   600  
   601  // isQtext reports whether r is an RFC 5322 qtext character.
   602  func isQtext(r rune) bool {
   603  	// Printable US-ASCII, excluding backslash or quote.
   604  	if r == '\\' || r == '"' {
   605  		return false
   606  	}
   607  	return isVchar(r)
   608  }
   609  
   610  // quoteString renders a string as an RFC 5322 quoted-string.
   611  func quoteString(s string) string {
   612  	var buf bytes.Buffer
   613  	buf.WriteByte('"')
   614  	for _, r := range s {
   615  		if isQtext(r) || isWSP(r) {
   616  			buf.WriteRune(r)
   617  		} else if isVchar(r) {
   618  			buf.WriteByte('\\')
   619  			buf.WriteRune(r)
   620  		}
   621  	}
   622  	buf.WriteByte('"')
   623  	return buf.String()
   624  }
   625  
   626  // isVchar reports whether r is an RFC 5322 VCHAR character.
   627  func isVchar(r rune) bool {
   628  	// Visible (printing) characters.
   629  	return '!' <= r && r <= '~' || isMultibyte(r)
   630  }
   631  
   632  // isMultibyte reports whether r is a multi-byte UTF-8 character
   633  // as supported by RFC 6532
   634  func isMultibyte(r rune) bool {
   635  	return r >= utf8.RuneSelf
   636  }
   637  
   638  // isWSP reports whether r is a WSP (white space).
   639  // WSP is a space or horizontal tab (RFC 5234 Appendix B).
   640  func isWSP(r rune) bool {
   641  	return r == ' ' || r == '\t'
   642  }