github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/net/mail/message.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package mail implements parsing of mail messages.
     7  
     8  For the most part, this package follows the syntax as specified by RFC 5322.
     9  Notable divergences:
    10  	* Obsolete address formats are not parsed, including addresses with
    11  	  embedded route information.
    12  	* Group addresses are not parsed.
    13  	* The full range of spacing (the CFWS syntax element) is not supported,
    14  	  such as breaking addresses across lines.
    15  */
    16  package mail
    17  
    18  import (
    19  	"bufio"
    20  	"bytes"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"log"
    25  	"mime"
    26  	"net/textproto"
    27  	"strings"
    28  	"time"
    29  )
    30  
    31  var debug = debugT(false)
    32  
    33  type debugT bool
    34  
    35  func (d debugT) Printf(format string, args ...interface{}) {
    36  	if d {
    37  		log.Printf(format, args...)
    38  	}
    39  }
    40  
    41  // A Message represents a parsed mail message.
    42  type Message struct {
    43  	Header Header
    44  	Body   io.Reader
    45  }
    46  
    47  // ReadMessage reads a message from r.
    48  // The headers are parsed, and the body of the message will be available
    49  // for reading from r.
    50  func ReadMessage(r io.Reader) (msg *Message, err error) {
    51  	tp := textproto.NewReader(bufio.NewReader(r))
    52  
    53  	hdr, err := tp.ReadMIMEHeader()
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  
    58  	return &Message{
    59  		Header: Header(hdr),
    60  		Body:   tp.R,
    61  	}, nil
    62  }
    63  
    64  // Layouts suitable for passing to time.Parse.
    65  // These are tried in order.
    66  var dateLayouts []string
    67  
    68  func init() {
    69  	// Generate layouts based on RFC 5322, section 3.3.
    70  
    71  	dows := [...]string{"", "Mon, "}   // day-of-week
    72  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
    73  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
    74  	seconds := [...]string{":05", ""}  // second
    75  	// "-0700 (MST)" is not in RFC 5322, but is common.
    76  	zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
    77  
    78  	for _, dow := range dows {
    79  		for _, day := range days {
    80  			for _, year := range years {
    81  				for _, second := range seconds {
    82  					for _, zone := range zones {
    83  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    84  						dateLayouts = append(dateLayouts, s)
    85  					}
    86  				}
    87  			}
    88  		}
    89  	}
    90  }
    91  
    92  func parseDate(date string) (time.Time, error) {
    93  	for _, layout := range dateLayouts {
    94  		t, err := time.Parse(layout, date)
    95  		if err == nil {
    96  			return t, nil
    97  		}
    98  	}
    99  	return time.Time{}, errors.New("mail: header could not be parsed")
   100  }
   101  
   102  // A Header represents the key-value pairs in a mail message header.
   103  type Header map[string][]string
   104  
   105  // Get gets the first value associated with the given key.
   106  // If there are no values associated with the key, Get returns "".
   107  func (h Header) Get(key string) string {
   108  	return textproto.MIMEHeader(h).Get(key)
   109  }
   110  
   111  var ErrHeaderNotPresent = errors.New("mail: header not in message")
   112  
   113  // Date parses the Date header field.
   114  func (h Header) Date() (time.Time, error) {
   115  	hdr := h.Get("Date")
   116  	if hdr == "" {
   117  		return time.Time{}, ErrHeaderNotPresent
   118  	}
   119  	return parseDate(hdr)
   120  }
   121  
   122  // AddressList parses the named header field as a list of addresses.
   123  func (h Header) AddressList(key string) ([]*Address, error) {
   124  	hdr := h.Get(key)
   125  	if hdr == "" {
   126  		return nil, ErrHeaderNotPresent
   127  	}
   128  	return ParseAddressList(hdr)
   129  }
   130  
   131  // Address represents a single mail address.
   132  // An address such as "Barry Gibbs <bg@example.com>" is represented
   133  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   134  type Address struct {
   135  	Name    string // Proper name; may be empty.
   136  	Address string // user@domain
   137  }
   138  
   139  // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   140  func ParseAddress(address string) (*Address, error) {
   141  	return (&addrParser{s: address}).parseSingleAddress()
   142  }
   143  
   144  // ParseAddressList parses the given string as a list of addresses.
   145  func ParseAddressList(list string) ([]*Address, error) {
   146  	return (&addrParser{s: list}).parseAddressList()
   147  }
   148  
   149  // An AddressParser is an RFC 5322 address parser.
   150  type AddressParser struct {
   151  	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
   152  	WordDecoder *mime.WordDecoder
   153  }
   154  
   155  // Parse parses a single RFC 5322 address of the
   156  // form "Gogh Fir <gf@example.com>" or "foo@example.com".
   157  func (p *AddressParser) Parse(address string) (*Address, error) {
   158  	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
   159  }
   160  
   161  // ParseList parses the given string as a list of comma-separated addresses
   162  // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
   163  func (p *AddressParser) ParseList(list string) ([]*Address, error) {
   164  	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
   165  }
   166  
   167  // String formats the address as a valid RFC 5322 address.
   168  // If the address's name contains non-ASCII characters
   169  // the name will be rendered according to RFC 2047.
   170  func (a *Address) String() string {
   171  	// Format address local@domain
   172  	at := strings.LastIndex(a.Address, "@")
   173  	var local, domain string
   174  	if at < 0 {
   175  		// This is a malformed address ("@" is required in addr-spec);
   176  		// treat the whole address as local-part.
   177  		local = a.Address
   178  	} else {
   179  		local, domain = a.Address[:at], a.Address[at+1:]
   180  	}
   181  
   182  	// Add quotes if needed
   183  	// TODO: rendering quoted local part and rendering printable name
   184  	//       should be merged in helper function.
   185  	quoteLocal := false
   186  	for i := 0; i < len(local); i++ {
   187  		ch := local[i]
   188  		if isAtext(ch, false) {
   189  			continue
   190  		}
   191  		if ch == '.' {
   192  			// Dots are okay if they are surrounded by atext.
   193  			// We only need to check that the previous byte is
   194  			// not a dot, and this isn't the end of the string.
   195  			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
   196  				continue
   197  			}
   198  		}
   199  		quoteLocal = true
   200  		break
   201  	}
   202  	if quoteLocal {
   203  		local = quoteString(local)
   204  
   205  	}
   206  
   207  	s := "<" + local + "@" + domain + ">"
   208  
   209  	if a.Name == "" {
   210  		return s
   211  	}
   212  
   213  	// If every character is printable ASCII, quoting is simple.
   214  	allPrintable := true
   215  	for i := 0; i < len(a.Name); i++ {
   216  		// isWSP here should actually be isFWS,
   217  		// but we don't support folding yet.
   218  		if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) {
   219  			allPrintable = false
   220  			break
   221  		}
   222  	}
   223  	if allPrintable {
   224  		b := bytes.NewBufferString(`"`)
   225  		for i := 0; i < len(a.Name); i++ {
   226  			if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) {
   227  				b.WriteByte('\\')
   228  			}
   229  			b.WriteByte(a.Name[i])
   230  		}
   231  		b.WriteString(`" `)
   232  		b.WriteString(s)
   233  		return b.String()
   234  	}
   235  
   236  	// Text in an encoded-word in a display-name must not contain certain
   237  	// characters like quotes or parentheses (see RFC 2047 section 5.3).
   238  	// When this is the case encode the name using base64 encoding.
   239  	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
   240  		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
   241  	}
   242  	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
   243  }
   244  
   245  type addrParser struct {
   246  	s   string
   247  	dec *mime.WordDecoder // may be nil
   248  }
   249  
   250  func (p *addrParser) parseAddressList() ([]*Address, error) {
   251  	var list []*Address
   252  	for {
   253  		p.skipSpace()
   254  		addr, err := p.parseAddress()
   255  		if err != nil {
   256  			return nil, err
   257  		}
   258  		list = append(list, addr)
   259  
   260  		p.skipSpace()
   261  		if p.empty() {
   262  			break
   263  		}
   264  		if !p.consume(',') {
   265  			return nil, errors.New("mail: expected comma")
   266  		}
   267  	}
   268  	return list, nil
   269  }
   270  
   271  func (p *addrParser) parseSingleAddress() (*Address, error) {
   272  	addr, err := p.parseAddress()
   273  	if err != nil {
   274  		return nil, err
   275  	}
   276  	p.skipSpace()
   277  	if !p.empty() {
   278  		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
   279  	}
   280  	return addr, nil
   281  }
   282  
   283  // parseAddress parses a single RFC 5322 address at the start of p.
   284  func (p *addrParser) parseAddress() (addr *Address, err error) {
   285  	debug.Printf("parseAddress: %q", p.s)
   286  	p.skipSpace()
   287  	if p.empty() {
   288  		return nil, errors.New("mail: no address")
   289  	}
   290  
   291  	// address = name-addr / addr-spec
   292  	// TODO(dsymonds): Support parsing group address.
   293  
   294  	// addr-spec has a more restricted grammar than name-addr,
   295  	// so try parsing it first, and fallback to name-addr.
   296  	// TODO(dsymonds): Is this really correct?
   297  	spec, err := p.consumeAddrSpec()
   298  	if err == nil {
   299  		return &Address{
   300  			Address: spec,
   301  		}, err
   302  	}
   303  	debug.Printf("parseAddress: not an addr-spec: %v", err)
   304  	debug.Printf("parseAddress: state is now %q", p.s)
   305  
   306  	// display-name
   307  	var displayName string
   308  	if p.peek() != '<' {
   309  		displayName, err = p.consumePhrase()
   310  		if err != nil {
   311  			return nil, err
   312  		}
   313  	}
   314  	debug.Printf("parseAddress: displayName=%q", displayName)
   315  
   316  	// angle-addr = "<" addr-spec ">"
   317  	p.skipSpace()
   318  	if !p.consume('<') {
   319  		return nil, errors.New("mail: no angle-addr")
   320  	}
   321  	spec, err = p.consumeAddrSpec()
   322  	if err != nil {
   323  		return nil, err
   324  	}
   325  	if !p.consume('>') {
   326  		return nil, errors.New("mail: unclosed angle-addr")
   327  	}
   328  	debug.Printf("parseAddress: spec=%q", spec)
   329  
   330  	return &Address{
   331  		Name:    displayName,
   332  		Address: spec,
   333  	}, nil
   334  }
   335  
   336  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   337  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   338  	debug.Printf("consumeAddrSpec: %q", p.s)
   339  
   340  	orig := *p
   341  	defer func() {
   342  		if err != nil {
   343  			*p = orig
   344  		}
   345  	}()
   346  
   347  	// local-part = dot-atom / quoted-string
   348  	var localPart string
   349  	p.skipSpace()
   350  	if p.empty() {
   351  		return "", errors.New("mail: no addr-spec")
   352  	}
   353  	if p.peek() == '"' {
   354  		// quoted-string
   355  		debug.Printf("consumeAddrSpec: parsing quoted-string")
   356  		localPart, err = p.consumeQuotedString()
   357  	} else {
   358  		// dot-atom
   359  		debug.Printf("consumeAddrSpec: parsing dot-atom")
   360  		localPart, err = p.consumeAtom(true, false)
   361  	}
   362  	if err != nil {
   363  		debug.Printf("consumeAddrSpec: failed: %v", err)
   364  		return "", err
   365  	}
   366  
   367  	if !p.consume('@') {
   368  		return "", errors.New("mail: missing @ in addr-spec")
   369  	}
   370  
   371  	// domain = dot-atom / domain-literal
   372  	var domain string
   373  	p.skipSpace()
   374  	if p.empty() {
   375  		return "", errors.New("mail: no domain in addr-spec")
   376  	}
   377  	// TODO(dsymonds): Handle domain-literal
   378  	domain, err = p.consumeAtom(true, false)
   379  	if err != nil {
   380  		return "", err
   381  	}
   382  
   383  	return localPart + "@" + domain, nil
   384  }
   385  
   386  // consumePhrase parses the RFC 5322 phrase at the start of p.
   387  func (p *addrParser) consumePhrase() (phrase string, err error) {
   388  	debug.Printf("consumePhrase: [%s]", p.s)
   389  	// phrase = 1*word
   390  	var words []string
   391  	for {
   392  		// word = atom / quoted-string
   393  		var word string
   394  		p.skipSpace()
   395  		if p.empty() {
   396  			return "", errors.New("mail: missing phrase")
   397  		}
   398  		if p.peek() == '"' {
   399  			// quoted-string
   400  			word, err = p.consumeQuotedString()
   401  		} else {
   402  			// atom
   403  			// We actually parse dot-atom here to be more permissive
   404  			// than what RFC 5322 specifies.
   405  			word, err = p.consumeAtom(true, true)
   406  			if err == nil {
   407  				word, err = p.decodeRFC2047Word(word)
   408  			}
   409  		}
   410  
   411  		if err != nil {
   412  			break
   413  		}
   414  		debug.Printf("consumePhrase: consumed %q", word)
   415  		words = append(words, word)
   416  	}
   417  	// Ignore any error if we got at least one word.
   418  	if err != nil && len(words) == 0 {
   419  		debug.Printf("consumePhrase: hit err: %v", err)
   420  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   421  	}
   422  	phrase = strings.Join(words, " ")
   423  	return phrase, nil
   424  }
   425  
   426  // consumeQuotedString parses the quoted string at the start of p.
   427  func (p *addrParser) consumeQuotedString() (qs string, err error) {
   428  	// Assume first byte is '"'.
   429  	i := 1
   430  	qsb := make([]byte, 0, 10)
   431  Loop:
   432  	for {
   433  		if i >= p.len() {
   434  			return "", errors.New("mail: unclosed quoted-string")
   435  		}
   436  		switch c := p.s[i]; {
   437  		case c == '"':
   438  			break Loop
   439  		case c == '\\':
   440  			if i+1 == p.len() {
   441  				return "", errors.New("mail: unclosed quoted-string")
   442  			}
   443  			qsb = append(qsb, p.s[i+1])
   444  			i += 2
   445  		case isQtext(c), c == ' ':
   446  			// qtext (printable US-ASCII excluding " and \), or
   447  			// FWS (almost; we're ignoring CRLF)
   448  			qsb = append(qsb, c)
   449  			i++
   450  		default:
   451  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
   452  		}
   453  	}
   454  	p.s = p.s[i+1:]
   455  	if len(qsb) == 0 {
   456  		return "", errors.New("mail: empty quoted-string")
   457  	}
   458  	return string(qsb), nil
   459  }
   460  
   461  var errNonASCII = errors.New("mail: unencoded non-ASCII text in address")
   462  
   463  // consumeAtom parses an RFC 5322 atom at the start of p.
   464  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   465  // If permissive is true, consumeAtom will not fail on
   466  // leading/trailing/double dots in the atom (see golang.org/issue/4938).
   467  func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
   468  	if c := p.peek(); !isAtext(c, false) {
   469  		if c > 127 {
   470  			return "", errNonASCII
   471  		}
   472  		return "", errors.New("mail: invalid string")
   473  	}
   474  	i := 1
   475  	for ; i < p.len() && isAtext(p.s[i], dot); i++ {
   476  	}
   477  	if i < p.len() && p.s[i] > 127 {
   478  		return "", errNonASCII
   479  	}
   480  	atom, p.s = string(p.s[:i]), p.s[i:]
   481  	if !permissive {
   482  		if strings.HasPrefix(atom, ".") {
   483  			return "", errors.New("mail: leading dot in atom")
   484  		}
   485  		if strings.Contains(atom, "..") {
   486  			return "", errors.New("mail: double dot in atom")
   487  		}
   488  		if strings.HasSuffix(atom, ".") {
   489  			return "", errors.New("mail: trailing dot in atom")
   490  		}
   491  	}
   492  	return atom, nil
   493  }
   494  
   495  func (p *addrParser) consume(c byte) bool {
   496  	if p.empty() || p.peek() != c {
   497  		return false
   498  	}
   499  	p.s = p.s[1:]
   500  	return true
   501  }
   502  
   503  // skipSpace skips the leading space and tab characters.
   504  func (p *addrParser) skipSpace() {
   505  	p.s = strings.TrimLeft(p.s, " \t")
   506  }
   507  
   508  func (p *addrParser) peek() byte {
   509  	return p.s[0]
   510  }
   511  
   512  func (p *addrParser) empty() bool {
   513  	return p.len() == 0
   514  }
   515  
   516  func (p *addrParser) len() int {
   517  	return len(p.s)
   518  }
   519  
   520  func (p *addrParser) decodeRFC2047Word(s string) (string, error) {
   521  	if p.dec != nil {
   522  		return p.dec.DecodeHeader(s)
   523  	}
   524  
   525  	dec, err := rfc2047Decoder.Decode(s)
   526  	if err == nil {
   527  		return dec, nil
   528  	}
   529  
   530  	if _, ok := err.(charsetError); ok {
   531  		return s, err
   532  	}
   533  
   534  	// Ignore invalid RFC 2047 encoded-word errors.
   535  	return s, nil
   536  }
   537  
   538  var rfc2047Decoder = mime.WordDecoder{
   539  	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
   540  		return nil, charsetError(charset)
   541  	},
   542  }
   543  
   544  type charsetError string
   545  
   546  func (e charsetError) Error() string {
   547  	return fmt.Sprintf("charset not supported: %q", string(e))
   548  }
   549  
   550  var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
   551  	"abcdefghijklmnopqrstuvwxyz" +
   552  	"0123456789" +
   553  	"!#$%&'*+-/=?^_`{|}~")
   554  
   555  // isAtext reports whether c is an RFC 5322 atext character.
   556  // If dot is true, period is included.
   557  func isAtext(c byte, dot bool) bool {
   558  	if dot && c == '.' {
   559  		return true
   560  	}
   561  	return bytes.IndexByte(atextChars, c) >= 0
   562  }
   563  
   564  // isQtext reports whether c is an RFC 5322 qtext character.
   565  func isQtext(c byte) bool {
   566  	// Printable US-ASCII, excluding backslash or quote.
   567  	if c == '\\' || c == '"' {
   568  		return false
   569  	}
   570  	return '!' <= c && c <= '~'
   571  }
   572  
   573  // quoteString renders a string as a RFC5322 quoted-string.
   574  func quoteString(s string) string {
   575  	var buf bytes.Buffer
   576  	buf.WriteByte('"')
   577  	for _, c := range s {
   578  		ch := byte(c)
   579  		if isQtext(ch) || isWSP(ch) {
   580  			buf.WriteByte(ch)
   581  		} else if isVchar(ch) {
   582  			buf.WriteByte('\\')
   583  			buf.WriteByte(ch)
   584  		}
   585  	}
   586  	buf.WriteByte('"')
   587  	return buf.String()
   588  }
   589  
   590  // isVchar reports whether c is an RFC 5322 VCHAR character.
   591  func isVchar(c byte) bool {
   592  	// Visible (printing) characters.
   593  	return '!' <= c && c <= '~'
   594  }
   595  
   596  // isWSP reports whether c is a WSP (white space).
   597  // WSP is a space or horizontal tab (RFC5234 Appendix B).
   598  func isWSP(c byte) bool {
   599  	return c == ' ' || c == '\t'
   600  }