github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/mime/encodedword.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/base64"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"strings"
    14  	"sync"
    15  	"unicode"
    16  	"unicode/utf8"
    17  )
    18  
    19  // A WordEncoder is a RFC 2047 encoded-word encoder.
    20  type WordEncoder byte
    21  
    22  const (
    23  	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
    24  	BEncoding = WordEncoder('b')
    25  	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
    26  	QEncoding = WordEncoder('q')
    27  )
    28  
    29  var (
    30  	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
    31  )
    32  
    33  // Encode returns the encoded-word form of s. If s is ASCII without special
    34  // characters, it is returned unchanged. The provided charset is the IANA
    35  // charset name of s. It is case insensitive.
    36  func (e WordEncoder) Encode(charset, s string) string {
    37  	if !needsEncoding(s) {
    38  		return s
    39  	}
    40  	return e.encodeWord(charset, s)
    41  }
    42  
    43  func needsEncoding(s string) bool {
    44  	for _, b := range s {
    45  		if (b < ' ' || b > '~') && b != '\t' {
    46  			return true
    47  		}
    48  	}
    49  	return false
    50  }
    51  
    52  // encodeWord encodes a string into an encoded-word.
    53  func (e WordEncoder) encodeWord(charset, s string) string {
    54  	buf := getBuffer()
    55  	defer putBuffer(buf)
    56  
    57  	buf.WriteString("=?")
    58  	buf.WriteString(charset)
    59  	buf.WriteByte('?')
    60  	buf.WriteByte(byte(e))
    61  	buf.WriteByte('?')
    62  
    63  	if e == BEncoding {
    64  		w := base64.NewEncoder(base64.StdEncoding, buf)
    65  		io.WriteString(w, s)
    66  		w.Close()
    67  	} else {
    68  		enc := make([]byte, 3)
    69  		for i := 0; i < len(s); i++ {
    70  			b := s[i]
    71  			switch {
    72  			case b == ' ':
    73  				buf.WriteByte('_')
    74  			case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
    75  				buf.WriteByte(b)
    76  			default:
    77  				enc[0] = '='
    78  				enc[1] = upperhex[b>>4]
    79  				enc[2] = upperhex[b&0x0f]
    80  				buf.Write(enc)
    81  			}
    82  		}
    83  	}
    84  	buf.WriteString("?=")
    85  	return buf.String()
    86  }
    87  
    88  const upperhex = "0123456789ABCDEF"
    89  
    90  // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
    91  type WordDecoder struct {
    92  	// CharsetReader, if non-nil, defines a function to generate
    93  	// charset-conversion readers, converting from the provided
    94  	// charset into UTF-8.
    95  	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
    96  	// are handled by default.
    97  	// One of the the CharsetReader's result values must be non-nil.
    98  	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
    99  }
   100  
   101  // Decode decodes an RFC 2047 encoded-word.
   102  func (d *WordDecoder) Decode(word string) (string, error) {
   103  	fields := strings.Split(word, "?") // TODO: remove allocation?
   104  	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
   105  		return "", errInvalidWord
   106  	}
   107  
   108  	content, err := decode(fields[2][0], fields[3])
   109  	if err != nil {
   110  		return "", err
   111  	}
   112  
   113  	buf := getBuffer()
   114  	defer putBuffer(buf)
   115  
   116  	if err := d.convert(buf, fields[1], content); err != nil {
   117  		return "", err
   118  	}
   119  
   120  	return buf.String(), nil
   121  }
   122  
   123  // DecodeHeader decodes all encoded-words of the given string. It returns an
   124  // error if and only if CharsetReader of d returns an error.
   125  func (d *WordDecoder) DecodeHeader(header string) (string, error) {
   126  	// If there is no encoded-word, returns before creating a buffer.
   127  	i := strings.Index(header, "=?")
   128  	if i == -1 {
   129  		return header, nil
   130  	}
   131  
   132  	buf := getBuffer()
   133  	defer putBuffer(buf)
   134  
   135  	buf.WriteString(header[:i])
   136  	header = header[i:]
   137  
   138  	betweenWords := false
   139  	for {
   140  		start := strings.Index(header, "=?")
   141  		if start == -1 {
   142  			break
   143  		}
   144  		cur := start + len("=?")
   145  
   146  		i := strings.Index(header[cur:], "?")
   147  		if i == -1 {
   148  			break
   149  		}
   150  		charset := header[cur : cur+i]
   151  		cur += i + len("?")
   152  
   153  		if len(header) < cur+len("Q??=") {
   154  			break
   155  		}
   156  		encoding := header[cur]
   157  		cur++
   158  
   159  		if header[cur] != '?' {
   160  			break
   161  		}
   162  		cur++
   163  
   164  		j := strings.Index(header[cur:], "?=")
   165  		if j == -1 {
   166  			break
   167  		}
   168  		text := header[cur : cur+j]
   169  		end := cur + j + len("?=")
   170  
   171  		content, err := decode(encoding, text)
   172  		if err != nil {
   173  			betweenWords = false
   174  			buf.WriteString(header[:start+2])
   175  			header = header[start+2:]
   176  			continue
   177  		}
   178  
   179  		// Write characters before the encoded-word. White-space and newline
   180  		// characters separating two encoded-words must be deleted.
   181  		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
   182  			buf.WriteString(header[:start])
   183  		}
   184  
   185  		if err := d.convert(buf, charset, content); err != nil {
   186  			return "", err
   187  		}
   188  
   189  		header = header[end:]
   190  		betweenWords = true
   191  	}
   192  
   193  	if len(header) > 0 {
   194  		buf.WriteString(header)
   195  	}
   196  
   197  	return buf.String(), nil
   198  }
   199  
   200  func decode(encoding byte, text string) ([]byte, error) {
   201  	switch encoding {
   202  	case 'B', 'b':
   203  		return base64.StdEncoding.DecodeString(text)
   204  	case 'Q', 'q':
   205  		return qDecode(text)
   206  	default:
   207  		return nil, errInvalidWord
   208  	}
   209  }
   210  
   211  func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
   212  	switch {
   213  	case strings.EqualFold("utf-8", charset):
   214  		buf.Write(content)
   215  	case strings.EqualFold("iso-8859-1", charset):
   216  		for _, c := range content {
   217  			buf.WriteRune(rune(c))
   218  		}
   219  	case strings.EqualFold("us-ascii", charset):
   220  		for _, c := range content {
   221  			if c >= utf8.RuneSelf {
   222  				buf.WriteRune(unicode.ReplacementChar)
   223  			} else {
   224  				buf.WriteByte(c)
   225  			}
   226  		}
   227  	default:
   228  		if d.CharsetReader == nil {
   229  			return fmt.Errorf("mime: unhandled charset %q", charset)
   230  		}
   231  		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
   232  		if err != nil {
   233  			return err
   234  		}
   235  		if _, err = buf.ReadFrom(r); err != nil {
   236  			return err
   237  		}
   238  	}
   239  	return nil
   240  }
   241  
   242  // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
   243  // one byte of non-whitespace.
   244  func hasNonWhitespace(s string) bool {
   245  	for _, b := range s {
   246  		switch b {
   247  		// Encoded-words can only be separated by linear white spaces which does
   248  		// not include vertical tabs (\v).
   249  		case ' ', '\t', '\n', '\r':
   250  		default:
   251  			return true
   252  		}
   253  	}
   254  	return false
   255  }
   256  
   257  // qDecode decodes a Q encoded string.
   258  func qDecode(s string) ([]byte, error) {
   259  	dec := make([]byte, len(s))
   260  	n := 0
   261  	for i := 0; i < len(s); i++ {
   262  		switch c := s[i]; {
   263  		case c == '_':
   264  			dec[n] = ' '
   265  		case c == '=':
   266  			if i+2 >= len(s) {
   267  				return nil, errInvalidWord
   268  			}
   269  			b, err := readHexByte(s[i+1], s[i+2])
   270  			if err != nil {
   271  				return nil, err
   272  			}
   273  			dec[n] = b
   274  			i += 2
   275  		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
   276  			dec[n] = c
   277  		default:
   278  			return nil, errInvalidWord
   279  		}
   280  		n++
   281  	}
   282  
   283  	return dec[:n], nil
   284  }
   285  
   286  // readHexByte returns the byte from its quoted-printable representation.
   287  func readHexByte(a, b byte) (byte, error) {
   288  	var hb, lb byte
   289  	var err error
   290  	if hb, err = fromHex(a); err != nil {
   291  		return 0, err
   292  	}
   293  	if lb, err = fromHex(b); err != nil {
   294  		return 0, err
   295  	}
   296  	return hb<<4 | lb, nil
   297  }
   298  
   299  func fromHex(b byte) (byte, error) {
   300  	switch {
   301  	case b >= '0' && b <= '9':
   302  		return b - '0', nil
   303  	case b >= 'A' && b <= 'F':
   304  		return b - 'A' + 10, nil
   305  	// Accept badly encoded bytes.
   306  	case b >= 'a' && b <= 'f':
   307  		return b - 'a' + 10, nil
   308  	}
   309  	return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
   310  }
   311  
   312  var bufPool = sync.Pool{
   313  	New: func() interface{} {
   314  		return new(bytes.Buffer)
   315  	},
   316  }
   317  
   318  func getBuffer() *bytes.Buffer {
   319  	return bufPool.Get().(*bytes.Buffer)
   320  }
   321  
   322  func putBuffer(buf *bytes.Buffer) {
   323  	if buf.Len() > 1024 {
   324  		return
   325  	}
   326  	buf.Reset()
   327  	bufPool.Put(buf)
   328  }