github.com/zebozhuang/go@v0.0.0-20200207033046-f8a98f6f5c5d/src/net/mail/message.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322 and
     9  extended by RFC 6532.
    10  Notable divergences:
    11  	* Obsolete address formats are not parsed, including addresses with
    12  	  embedded route information.
    13  	* Group addresses are not parsed.
    14  	* The full range of spacing (the CFWS syntax element) is not supported,
    15  	  such as breaking addresses across lines.
    16  	* No unicode normalization is performed.
    17  */
    18  package mail
    19  
    20  import (
    21  	"bufio"
    22  	"bytes"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"log"
    27  	"mime"
    28  	"net/textproto"
    29  	"strings"
    30  	"time"
    31  	"unicode/utf8"
    32  )
    33  
    34  var debug = debugT(false)
    35  
    36  type debugT bool
    37  
    38  func (d debugT) Printf(format string, args ...interface{}) {
    39  	if d {
    40  		log.Printf(format, args...)
    41  	}
    42  }
    43  
    44  // A Message represents a parsed mail message.
    45  type Message struct {
    46  	Header Header
    47  	Body   io.Reader
    48  }
    49  
    50  // ReadMessage reads a message from r.
    51  // The headers are parsed, and the body of the message will be available
    52  // for reading from msg.Body.
    53  func ReadMessage(r io.Reader) (msg *Message, err error) {
    54  	tp := textproto.NewReader(bufio.NewReader(r))
    55  
    56  	hdr, err := tp.ReadMIMEHeader()
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  
    61  	return &Message{
    62  		Header: Header(hdr),
    63  		Body:   tp.R,
    64  	}, nil
    65  }
    66  
    67  // Layouts suitable for passing to time.Parse.
    68  // These are tried in order.
    69  var dateLayouts []string
    70  
    71  func init() {
    72  	// Generate layouts based on RFC 5322, section 3.3.
    73  
    74  	dows := [...]string{"", "Mon, "}   // day-of-week
    75  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
    76  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
    77  	seconds := [...]string{":05", ""}  // second
    78  	// "-0700 (MST)" is not in RFC 5322, but is common.
    79  	zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
    80  
    81  	for _, dow := range dows {
    82  		for _, day := range days {
    83  			for _, year := range years {
    84  				for _, second := range seconds {
    85  					for _, zone := range zones {
    86  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    87  						dateLayouts = append(dateLayouts, s)
    88  					}
    89  				}
    90  			}
    91  		}
    92  	}
    93  }
    94  
    95  // ParseDate parses an RFC 5322 date string.
    96  func ParseDate(date string) (time.Time, error) {
    97  	for _, layout := range dateLayouts {
    98  		t, err := time.Parse(layout, date)
    99  		if err == nil {
   100  			return t, nil
   101  		}
   102  	}
   103  	return time.Time{}, errors.New("mail: header could not be parsed")
   104  }
   105  
   106  // A Header represents the key-value pairs in a mail message header.
   107  type Header map[string][]string
   108  
   109  // Get gets the first value associated with the given key.
   110  // It is case insensitive; CanonicalMIMEHeaderKey is used
   111  // to canonicalize the provided key.
   112  // If there are no values associated with the key, Get returns "".
   113  // To access multiple values of a key, or to use non-canonical keys,
   114  // access the map directly.
   115  func (h Header) Get(key string) string {
   116  	return textproto.MIMEHeader(h).Get(key)
   117  }
   118  
   119  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   120  
   121  // Date parses the Date header field.
   122  func (h Header) Date() (time.Time, error) {
   123  	hdr := h.Get("Date")
   124  	if hdr == "" {
   125  		return time.Time{}, ErrHeaderNotPresent
   126  	}
   127  	return ParseDate(hdr)
   128  }
   129  
   130  // AddressList parses the named header field as a list of addresses.
   131  func (h Header) AddressList(key string) ([]*Address, error) {
   132  	hdr := h.Get(key)
   133  	if hdr == "" {
   134  		return nil, ErrHeaderNotPresent
   135  	}
   136  	return ParseAddressList(hdr)
   137  }
   138  
   139  // Address represents a single mail address.
   140  // An address such as "Barry Gibbs <bg@example.com>" is represented
   141  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   142  type Address struct {
   143  	Name    string // Proper name; may be empty.
   144  	Address string // user@domain
   145  }
   146  
   147  // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   148  func ParseAddress(address string) (*Address, error) {
   149  	return (&addrParser{s: address}).parseSingleAddress()
   150  }
   151  
   152  // ParseAddressList parses the given string as a list of addresses.
   153  func ParseAddressList(list string) ([]*Address, error) {
   154  	return (&addrParser{s: list}).parseAddressList()
   155  }
   156  
   157  // An AddressParser is an RFC 5322 address parser.
   158  type AddressParser struct {
   159  	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
   160  	WordDecoder *mime.WordDecoder
   161  }
   162  
   163  // Parse parses a single RFC 5322 address of the
   164  // form "Gogh Fir <gf@example.com>" or "foo@example.com".
   165  func (p *AddressParser) Parse(address string) (*Address, error) {
   166  	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
   167  }
   168  
   169  // ParseList parses the given string as a list of comma-separated addresses
   170  // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
   171  func (p *AddressParser) ParseList(list string) ([]*Address, error) {
   172  	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
   173  }
   174  
   175  // String formats the address as a valid RFC 5322 address.
   176  // If the address's name contains non-ASCII characters
   177  // the name will be rendered according to RFC 2047.
   178  func (a *Address) String() string {
   179  	// Format address local@domain
   180  	at := strings.LastIndex(a.Address, "@")
   181  	var local, domain string
   182  	if at < 0 {
   183  		// This is a malformed address ("@" is required in addr-spec);
   184  		// treat the whole address as local-part.
   185  		local = a.Address
   186  	} else {
   187  		local, domain = a.Address[:at], a.Address[at+1:]
   188  	}
   189  
   190  	// Add quotes if needed
   191  	quoteLocal := false
   192  	for i, r := range local {
   193  		if isAtext(r, false) {
   194  			continue
   195  		}
   196  		if r == '.' {
   197  			// Dots are okay if they are surrounded by atext.
   198  			// We only need to check that the previous byte is
   199  			// not a dot, and this isn't the end of the string.
   200  			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
   201  				continue
   202  			}
   203  		}
   204  		quoteLocal = true
   205  		break
   206  	}
   207  	if quoteLocal {
   208  		local = quoteString(local)
   209  
   210  	}
   211  
   212  	s := "<" + local + "@" + domain + ">"
   213  
   214  	if a.Name == "" {
   215  		return s
   216  	}
   217  
   218  	// If every character is printable ASCII, quoting is simple.
   219  	allPrintable := true
   220  	for _, r := range a.Name {
   221  		// isWSP here should actually be isFWS,
   222  		// but we don't support folding yet.
   223  		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
   224  			allPrintable = false
   225  			break
   226  		}
   227  	}
   228  	if allPrintable {
   229  		return quoteString(a.Name) + " " + s
   230  	}
   231  
   232  	// Text in an encoded-word in a display-name must not contain certain
   233  	// characters like quotes or parentheses (see RFC 2047 section 5.3).
   234  	// When this is the case encode the name using base64 encoding.
   235  	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
   236  		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
   237  	}
   238  	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
   239  }
   240  
   241  type addrParser struct {
   242  	s   string
   243  	dec *mime.WordDecoder // may be nil
   244  }
   245  
   246  func (p *addrParser) parseAddressList() ([]*Address, error) {
   247  	var list []*Address
   248  	for {
   249  		p.skipSpace()
   250  		addr, err := p.parseAddress()
   251  		if err != nil {
   252  			return nil, err
   253  		}
   254  		list = append(list, addr)
   255  
   256  		p.skipSpace()
   257  		if p.empty() {
   258  			break
   259  		}
   260  		if !p.consume(',') {
   261  			return nil, errors.New("mail: expected comma")
   262  		}
   263  	}
   264  	return list, nil
   265  }
   266  
   267  func (p *addrParser) parseSingleAddress() (*Address, error) {
   268  	addr, err := p.parseAddress()
   269  	if err != nil {
   270  		return nil, err
   271  	}
   272  	p.skipSpace()
   273  	if !p.empty() {
   274  		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
   275  	}
   276  	return addr, nil
   277  }
   278  
   279  // parseAddress parses a single RFC 5322 address at the start of p.
   280  func (p *addrParser) parseAddress() (addr *Address, err error) {
   281  	debug.Printf("parseAddress: %q", p.s)
   282  	p.skipSpace()
   283  	if p.empty() {
   284  		return nil, errors.New("mail: no address")
   285  	}
   286  
   287  	// address = name-addr / addr-spec
   288  	// TODO(dsymonds): Support parsing group address.
   289  
   290  	// addr-spec has a more restricted grammar than name-addr,
   291  	// so try parsing it first, and fallback to name-addr.
   292  	// TODO(dsymonds): Is this really correct?
   293  	spec, err := p.consumeAddrSpec()
   294  	if err == nil {
   295  		return &Address{
   296  			Address: spec,
   297  		}, err
   298  	}
   299  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   300  	debug.Printf("parseAddress: state is now %q", p.s)
   301  
   302  	// display-name
   303  	var displayName string
   304  	if p.peek() != '<' {
   305  		displayName, err = p.consumePhrase()
   306  		if err != nil {
   307  			return nil, err
   308  		}
   309  	}
   310  	debug.Printf("parseAddress: displayName=%q", displayName)
   311  
   312  	// angle-addr = "<" addr-spec ">"
   313  	p.skipSpace()
   314  	if !p.consume('<') {
   315  		return nil, errors.New("mail: no angle-addr")
   316  	}
   317  	spec, err = p.consumeAddrSpec()
   318  	if err != nil {
   319  		return nil, err
   320  	}
   321  	if !p.consume('>') {
   322  		return nil, errors.New("mail: unclosed angle-addr")
   323  	}
   324  	debug.Printf("parseAddress: spec=%q", spec)
   325  
   326  	return &Address{
   327  		Name:    displayName,
   328  		Address: spec,
   329  	}, nil
   330  }
   331  
   332  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   333  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   334  	debug.Printf("consumeAddrSpec: %q", p.s)
   335  
   336  	orig := *p
   337  	defer func() {
   338  		if err != nil {
   339  			*p = orig
   340  		}
   341  	}()
   342  
   343  	// local-part = dot-atom / quoted-string
   344  	var localPart string
   345  	p.skipSpace()
   346  	if p.empty() {
   347  		return "", errors.New("mail: no addr-spec")
   348  	}
   349  	if p.peek() == '"' {
   350  		// quoted-string
   351  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   352  		localPart, err = p.consumeQuotedString()
   353  		if localPart == "" {
   354  			err = errors.New("mail: empty quoted string in addr-spec")
   355  		}
   356  	} else {
   357  		// dot-atom
   358  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   359  		localPart, err = p.consumeAtom(true, false)
   360  	}
   361  	if err != nil {
   362  		debug.Printf("consumeAddrSpec: failed: %v", err)
   363  		return "", err
   364  	}
   365  
   366  	if !p.consume('@') {
   367  		return "", errors.New("mail: missing @ in addr-spec")
   368  	}
   369  
   370  	// domain = dot-atom / domain-literal
   371  	var domain string
   372  	p.skipSpace()
   373  	if p.empty() {
   374  		return "", errors.New("mail: no domain in addr-spec")
   375  	}
   376  	// TODO(dsymonds): Handle domain-literal
   377  	domain, err = p.consumeAtom(true, false)
   378  	if err != nil {
   379  		return "", err
   380  	}
   381  
   382  	return localPart + "@" + domain, nil
   383  }
   384  
   385  // consumePhrase parses the RFC 5322 phrase at the start of p.
   386  func (p *addrParser) consumePhrase() (phrase string, err error) {
   387  	debug.Printf("consumePhrase: [%s]", p.s)
   388  	// phrase = 1*word
   389  	var words []string
   390  	var isPrevEncoded bool
   391  	for {
   392  		// word = atom / quoted-string
   393  		var word string
   394  		p.skipSpace()
   395  		if p.empty() {
   396  			break
   397  		}
   398  		isEncoded := false
   399  		if p.peek() == '"' {
   400  			// quoted-string
   401  			word, err = p.consumeQuotedString()
   402  		} else {
   403  			// atom
   404  			// We actually parse dot-atom here to be more permissive
   405  			// than what RFC 5322 specifies.
   406  			word, err = p.consumeAtom(true, true)
   407  			if err == nil {
   408  				word, isEncoded, err = p.decodeRFC2047Word(word)
   409  			}
   410  		}
   411  
   412  		if err != nil {
   413  			break
   414  		}
   415  		debug.Printf("consumePhrase: consumed %q", word)
   416  		if isPrevEncoded && isEncoded {
   417  			words[len(words)-1] += word
   418  		} else {
   419  			words = append(words, word)
   420  		}
   421  		isPrevEncoded = isEncoded
   422  	}
   423  	// Ignore any error if we got at least one word.
   424  	if err != nil && len(words) == 0 {
   425  		debug.Printf("consumePhrase: hit err: %v", err)
   426  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   427  	}
   428  	phrase = strings.Join(words, " ")
   429  	return phrase, nil
   430  }
   431  
   432  // consumeQuotedString parses the quoted string at the start of p.
   433  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   434  	// Assume first byte is '"'.
   435  	i := 1
   436  	qsb := make([]rune, 0, 10)
   437  
   438  	escaped := false
   439  
   440  Loop:
   441  	for {
   442  		r, size := utf8.DecodeRuneInString(p.s[i:])
   443  
   444  		switch {
   445  		case size == 0:
   446  			return "", errors.New("mail: unclosed quoted-string")
   447  
   448  		case size == 1 && r == utf8.RuneError:
   449  			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
   450  
   451  		case escaped:
   452  			//  quoted-pair = ("\" (VCHAR / WSP))
   453  
   454  			if !isVchar(r) && !isWSP(r) {
   455  				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   456  			}
   457  
   458  			qsb = append(qsb, r)
   459  			escaped = false
   460  
   461  		case isQtext(r) || isWSP(r):
   462  			// qtext (printable US-ASCII excluding " and \), or
   463  			// FWS (almost; we're ignoring CRLF)
   464  			qsb = append(qsb, r)
   465  
   466  		case r == '"':
   467  			break Loop
   468  
   469  		case r == '\\':
   470  			escaped = true
   471  
   472  		default:
   473  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   474  
   475  		}
   476  
   477  		i += size
   478  	}
   479  	p.s = p.s[i+1:]
   480  	return string(qsb), nil
   481  }
   482  
   483  // consumeAtom parses an RFC 5322 atom at the start of p.
   484  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   485  // If permissive is true, consumeAtom will not fail on
   486  // leading/trailing/double dots in the atom (see golang.org/issue/4938).
   487  func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
   488  	i := 0
   489  
   490  Loop:
   491  	for {
   492  		r, size := utf8.DecodeRuneInString(p.s[i:])
   493  
   494  		switch {
   495  		case size == 1 && r == utf8.RuneError:
   496  			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
   497  
   498  		case size == 0 || !isAtext(r, dot):
   499  			break Loop
   500  
   501  		default:
   502  			i += size
   503  
   504  		}
   505  	}
   506  
   507  	if i == 0 {
   508  		return "", errors.New("mail: invalid string")
   509  	}
   510  	atom, p.s = p.s[:i], p.s[i:]
   511  	if !permissive {
   512  		if strings.HasPrefix(atom, ".") {
   513  			return "", errors.New("mail: leading dot in atom")
   514  		}
   515  		if strings.Contains(atom, "..") {
   516  			return "", errors.New("mail: double dot in atom")
   517  		}
   518  		if strings.HasSuffix(atom, ".") {
   519  			return "", errors.New("mail: trailing dot in atom")
   520  		}
   521  	}
   522  	return atom, nil
   523  }
   524  
   525  func (p *addrParser) consume(c byte) bool {
   526  	if p.empty() || p.peek() != c {
   527  		return false
   528  	}
   529  	p.s = p.s[1:]
   530  	return true
   531  }
   532  
   533  // skipSpace skips the leading space and tab characters.
   534  func (p *addrParser) skipSpace() {
   535  	p.s = strings.TrimLeft(p.s, " \t")
   536  }
   537  
   538  func (p *addrParser) peek() byte {
   539  	return p.s[0]
   540  }
   541  
   542  func (p *addrParser) empty() bool {
   543  	return p.len() == 0
   544  }
   545  
   546  func (p *addrParser) len() int {
   547  	return len(p.s)
   548  }
   549  
   550  func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
   551  	if p.dec != nil {
   552  		word, err = p.dec.Decode(s)
   553  	} else {
   554  		word, err = rfc2047Decoder.Decode(s)
   555  	}
   556  
   557  	if err == nil {
   558  		return word, true, nil
   559  	}
   560  
   561  	if _, ok := err.(charsetError); ok {
   562  		return s, true, err
   563  	}
   564  
   565  	// Ignore invalid RFC 2047 encoded-word errors.
   566  	return s, false, nil
   567  }
   568  
   569  var rfc2047Decoder = mime.WordDecoder{
   570  	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
   571  		return nil, charsetError(charset)
   572  	},
   573  }
   574  
   575  type charsetError string
   576  
   577  func (e charsetError) Error() string {
   578  	return fmt.Sprintf("charset not supported: %q", string(e))
   579  }
   580  
   581  // isAtext reports whether r is an RFC 5322 atext character.
   582  // If dot is true, period is included.
   583  func isAtext(r rune, dot bool) bool {
   584  	switch r {
   585  	case '.':
   586  		return dot
   587  
   588  	case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials
   589  		return false
   590  	}
   591  	return isVchar(r)
   592  }
   593  
   594  // isQtext reports whether r is an RFC 5322 qtext character.
   595  func isQtext(r rune) bool {
   596  	// Printable US-ASCII, excluding backslash or quote.
   597  	if r == '\\' || r == '"' {
   598  		return false
   599  	}
   600  	return isVchar(r)
   601  }
   602  
   603  // quoteString renders a string as an RFC 5322 quoted-string.
   604  func quoteString(s string) string {
   605  	var buf bytes.Buffer
   606  	buf.WriteByte('"')
   607  	for _, r := range s {
   608  		if isQtext(r) || isWSP(r) {
   609  			buf.WriteRune(r)
   610  		} else if isVchar(r) {
   611  			buf.WriteByte('\\')
   612  			buf.WriteRune(r)
   613  		}
   614  	}
   615  	buf.WriteByte('"')
   616  	return buf.String()
   617  }
   618  
   619  // isVchar reports whether r is an RFC 5322 VCHAR character.
   620  func isVchar(r rune) bool {
   621  	// Visible (printing) characters.
   622  	return '!' <= r && r <= '~' || isMultibyte(r)
   623  }
   624  
   625  // isMultibyte reports whether r is a multi-byte UTF-8 character
   626  // as supported by RFC 6532
   627  func isMultibyte(r rune) bool {
   628  	return r >= utf8.RuneSelf
   629  }
   630  
   631  // isWSP reports whether r is a WSP (white space).
   632  // WSP is a space or horizontal tab (RFC 5234 Appendix B).
   633  func isWSP(r rune) bool {
   634  	return r == ' ' || r == '\t'
   635  }