github.com/zxy12/golang151_with_comment@v0.0.0-20190507085033-721809559d3c/mime/encodedword.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/base64"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"strings"
    14  	"sync"
    15  	"unicode"
    16  	"unicode/utf8"
    17  )
    18  
    19  // A WordEncoder is a RFC 2047 encoded-word encoder.
    20  type WordEncoder byte
    21  
    22  const (
    23  	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
    24  	BEncoding = WordEncoder('b')
    25  	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
    26  	QEncoding = WordEncoder('q')
    27  )
    28  
    29  var (
    30  	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
    31  )
    32  
    33  // Encode returns the encoded-word form of s. If s is ASCII without special
    34  // characters, it is returned unchanged. The provided charset is the IANA
    35  // charset name of s. It is case insensitive.
    36  func (e WordEncoder) Encode(charset, s string) string {
    37  	if !needsEncoding(s) {
    38  		return s
    39  	}
    40  	return e.encodeWord(charset, s)
    41  }
    42  
    43  func needsEncoding(s string) bool {
    44  	for _, b := range s {
    45  		if (b < ' ' || b > '~') && b != '\t' {
    46  			return true
    47  		}
    48  	}
    49  	return false
    50  }
    51  
    52  // encodeWord encodes a string into an encoded-word.
    53  func (e WordEncoder) encodeWord(charset, s string) string {
    54  	buf := getBuffer()
    55  	defer putBuffer(buf)
    56  
    57  	buf.WriteString("=?")
    58  	buf.WriteString(charset)
    59  	buf.WriteByte('?')
    60  	buf.WriteByte(byte(e))
    61  	buf.WriteByte('?')
    62  
    63  	if e == BEncoding {
    64  		w := base64.NewEncoder(base64.StdEncoding, buf)
    65  		io.WriteString(w, s)
    66  		w.Close()
    67  	} else {
    68  		enc := make([]byte, 3)
    69  		for i := 0; i < len(s); i++ {
    70  			b := s[i]
    71  			switch {
    72  			case b == ' ':
    73  				buf.WriteByte('_')
    74  			case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
    75  				buf.WriteByte(b)
    76  			default:
    77  				enc[0] = '='
    78  				enc[1] = upperhex[b>>4]
    79  				enc[2] = upperhex[b&0x0f]
    80  				buf.Write(enc)
    81  			}
    82  		}
    83  	}
    84  	buf.WriteString("?=")
    85  	return buf.String()
    86  }
    87  
    88  const upperhex = "0123456789ABCDEF"
    89  
    90  // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
    91  type WordDecoder struct {
    92  	// CharsetReader, if non-nil, defines a function to generate
    93  	// charset-conversion readers, converting from the provided
    94  	// charset into UTF-8.
    95  	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
    96  	// are handled by default.
    97  	// One of the the CharsetReader's result values must be non-nil.
    98  	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
    99  }
   100  
   101  // Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word,
   102  // word is returned unchanged.
   103  func (d *WordDecoder) Decode(word string) (string, error) {
   104  	fields := strings.Split(word, "?") // TODO: remove allocation?
   105  	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
   106  		return "", errInvalidWord
   107  	}
   108  
   109  	content, err := decode(fields[2][0], fields[3])
   110  	if err != nil {
   111  		return "", err
   112  	}
   113  
   114  	buf := getBuffer()
   115  	defer putBuffer(buf)
   116  
   117  	if err := d.convert(buf, fields[1], content); err != nil {
   118  		return "", err
   119  	}
   120  
   121  	return buf.String(), nil
   122  }
   123  
   124  // DecodeHeader decodes all encoded-words of the given string. It returns an
   125  // error if and only if CharsetReader of d returns an error.
   126  func (d *WordDecoder) DecodeHeader(header string) (string, error) {
   127  	// If there is no encoded-word, returns before creating a buffer.
   128  	i := strings.Index(header, "=?")
   129  	if i == -1 {
   130  		return header, nil
   131  	}
   132  
   133  	buf := getBuffer()
   134  	defer putBuffer(buf)
   135  
   136  	buf.WriteString(header[:i])
   137  	header = header[i:]
   138  
   139  	betweenWords := false
   140  	for {
   141  		start := strings.Index(header, "=?")
   142  		if start == -1 {
   143  			break
   144  		}
   145  		cur := start + len("=?")
   146  
   147  		i := strings.Index(header[cur:], "?")
   148  		if i == -1 {
   149  			break
   150  		}
   151  		charset := header[cur : cur+i]
   152  		cur += i + len("?")
   153  
   154  		if len(header) < cur+len("Q??=") {
   155  			break
   156  		}
   157  		encoding := header[cur]
   158  		cur++
   159  
   160  		if header[cur] != '?' {
   161  			break
   162  		}
   163  		cur++
   164  
   165  		j := strings.Index(header[cur:], "?=")
   166  		if j == -1 {
   167  			break
   168  		}
   169  		text := header[cur : cur+j]
   170  		end := cur + j + len("?=")
   171  
   172  		content, err := decode(encoding, text)
   173  		if err != nil {
   174  			betweenWords = false
   175  			buf.WriteString(header[:start+2])
   176  			header = header[start+2:]
   177  			continue
   178  		}
   179  
   180  		// Write characters before the encoded-word. White-space and newline
   181  		// characters separating two encoded-words must be deleted.
   182  		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
   183  			buf.WriteString(header[:start])
   184  		}
   185  
   186  		if err := d.convert(buf, charset, content); err != nil {
   187  			return "", err
   188  		}
   189  
   190  		header = header[end:]
   191  		betweenWords = true
   192  	}
   193  
   194  	if len(header) > 0 {
   195  		buf.WriteString(header)
   196  	}
   197  
   198  	return buf.String(), nil
   199  }
   200  
   201  func decode(encoding byte, text string) ([]byte, error) {
   202  	switch encoding {
   203  	case 'B', 'b':
   204  		return base64.StdEncoding.DecodeString(text)
   205  	case 'Q', 'q':
   206  		return qDecode(text)
   207  	default:
   208  		return nil, errInvalidWord
   209  	}
   210  }
   211  
   212  func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
   213  	switch {
   214  	case strings.EqualFold("utf-8", charset):
   215  		buf.Write(content)
   216  	case strings.EqualFold("iso-8859-1", charset):
   217  		for _, c := range content {
   218  			buf.WriteRune(rune(c))
   219  		}
   220  	case strings.EqualFold("us-ascii", charset):
   221  		for _, c := range content {
   222  			if c >= utf8.RuneSelf {
   223  				buf.WriteRune(unicode.ReplacementChar)
   224  			} else {
   225  				buf.WriteByte(c)
   226  			}
   227  		}
   228  	default:
   229  		if d.CharsetReader == nil {
   230  			return fmt.Errorf("mime: unhandled charset %q", charset)
   231  		}
   232  		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
   233  		if err != nil {
   234  			return err
   235  		}
   236  		if _, err = buf.ReadFrom(r); err != nil {
   237  			return err
   238  		}
   239  	}
   240  	return nil
   241  }
   242  
   243  // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
   244  // one byte of non-whitespace.
   245  func hasNonWhitespace(s string) bool {
   246  	for _, b := range s {
   247  		switch b {
   248  		// Encoded-words can only be separated by linear white spaces which does
   249  		// not include vertical tabs (\v).
   250  		case ' ', '\t', '\n', '\r':
   251  		default:
   252  			return true
   253  		}
   254  	}
   255  	return false
   256  }
   257  
   258  // qDecode decodes a Q encoded string.
   259  func qDecode(s string) ([]byte, error) {
   260  	dec := make([]byte, len(s))
   261  	n := 0
   262  	for i := 0; i < len(s); i++ {
   263  		switch c := s[i]; {
   264  		case c == '_':
   265  			dec[n] = ' '
   266  		case c == '=':
   267  			if i+2 >= len(s) {
   268  				return nil, errInvalidWord
   269  			}
   270  			b, err := readHexByte(s[i+1], s[i+2])
   271  			if err != nil {
   272  				return nil, err
   273  			}
   274  			dec[n] = b
   275  			i += 2
   276  		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
   277  			dec[n] = c
   278  		default:
   279  			return nil, errInvalidWord
   280  		}
   281  		n++
   282  	}
   283  
   284  	return dec[:n], nil
   285  }
   286  
   287  // readHexByte returns the byte from its quoted-printable representation.
   288  func readHexByte(a, b byte) (byte, error) {
   289  	var hb, lb byte
   290  	var err error
   291  	if hb, err = fromHex(a); err != nil {
   292  		return 0, err
   293  	}
   294  	if lb, err = fromHex(b); err != nil {
   295  		return 0, err
   296  	}
   297  	return hb<<4 | lb, nil
   298  }
   299  
   300  func fromHex(b byte) (byte, error) {
   301  	switch {
   302  	case b >= '0' && b <= '9':
   303  		return b - '0', nil
   304  	case b >= 'A' && b <= 'F':
   305  		return b - 'A' + 10, nil
   306  	// Accept badly encoded bytes.
   307  	case b >= 'a' && b <= 'f':
   308  		return b - 'a' + 10, nil
   309  	}
   310  	return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
   311  }
   312  
   313  var bufPool = sync.Pool{
   314  	New: func() interface{} {
   315  		return new(bytes.Buffer)
   316  	},
   317  }
   318  
   319  func getBuffer() *bytes.Buffer {
   320  	return bufPool.Get().(*bytes.Buffer)
   321  }
   322  
   323  func putBuffer(buf *bytes.Buffer) {
   324  	if buf.Len() > 1024 {
   325  		return
   326  	}
   327  	buf.Reset()
   328  	bufPool.Put(buf)
   329  }