github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/resource/api/internal/mime/encodedword.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //+build !go1.6
     6  
     7  // TODO(natefinch) remove this once we support building on go 1.6 for all platforms.
     8  // This code was copied from the Go 1.6 sourcecode.
     9  
    10  package mime
    11  
    12  import (
    13  	"bytes"
    14  	"encoding/base64"
    15  	"errors"
    16  	"fmt"
    17  	"io"
    18  	"strings"
    19  	"unicode"
    20  	"unicode/utf8"
    21  )
    22  
    23  // A WordEncoder is a RFC 2047 encoded-word encoder.
    24  type WordEncoder byte
    25  
    26  const (
    27  	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
    28  	BEncoding = WordEncoder('b')
    29  	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
    30  	QEncoding = WordEncoder('q')
    31  )
    32  
    33  var (
    34  	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
    35  )
    36  
    37  // Encode returns the encoded-word form of s. If s is ASCII without special
    38  // characters, it is returned unchanged. The provided charset is the IANA
    39  // charset name of s. It is case insensitive.
    40  func (e WordEncoder) Encode(charset, s string) string {
    41  	if !needsEncoding(s) {
    42  		return s
    43  	}
    44  	return e.encodeWord(charset, s)
    45  }
    46  
    47  func needsEncoding(s string) bool {
    48  	for _, b := range s {
    49  		if (b < ' ' || b > '~') && b != '\t' {
    50  			return true
    51  		}
    52  	}
    53  	return false
    54  }
    55  
    56  // encodeWord encodes a string into an encoded-word.
    57  func (e WordEncoder) encodeWord(charset, s string) string {
    58  	buf := getBuffer()
    59  	defer putBuffer(buf)
    60  
    61  	e.openWord(buf, charset)
    62  	if e == BEncoding {
    63  		e.bEncode(buf, charset, s)
    64  	} else {
    65  		e.qEncode(buf, charset, s)
    66  	}
    67  	closeWord(buf)
    68  
    69  	return buf.String()
    70  }
    71  
    72  const (
    73  	// The maximum length of an encoded-word is 75 characters.
    74  	// See RFC 2047, section 2.
    75  	maxEncodedWordLen = 75
    76  	// maxContentLen is how much content can be encoded, ignoring the header and
    77  	// 2-byte footer.
    78  	maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=")
    79  )
    80  
    81  var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
    82  
    83  // bEncode encodes s using base64 encoding and writes it to buf.
    84  func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
    85  	w := base64.NewEncoder(base64.StdEncoding, buf)
    86  	// If the charset is not UTF-8 or if the content is short, do not bother
    87  	// splitting the encoded-word.
    88  	if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
    89  		io.WriteString(w, s)
    90  		w.Close()
    91  		return
    92  	}
    93  
    94  	var currentLen, last, runeLen int
    95  	for i := 0; i < len(s); i += runeLen {
    96  		// Multi-byte characters must not be split across encoded-words.
    97  		// See RFC 2047, section 5.3.
    98  		_, runeLen = utf8.DecodeRuneInString(s[i:])
    99  
   100  		if currentLen+runeLen <= maxBase64Len {
   101  			currentLen += runeLen
   102  		} else {
   103  			io.WriteString(w, s[last:i])
   104  			w.Close()
   105  			e.splitWord(buf, charset)
   106  			last = i
   107  			currentLen = runeLen
   108  		}
   109  	}
   110  	io.WriteString(w, s[last:])
   111  	w.Close()
   112  }
   113  
   114  // qEncode encodes s using Q encoding and writes it to buf. It splits the
   115  // encoded-words when necessary.
   116  func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
   117  	// We only split encoded-words when the charset is UTF-8.
   118  	if !isUTF8(charset) {
   119  		writeQString(buf, s)
   120  		return
   121  	}
   122  
   123  	var currentLen, runeLen int
   124  	for i := 0; i < len(s); i += runeLen {
   125  		b := s[i]
   126  		// Multi-byte characters must not be split across encoded-words.
   127  		// See RFC 2047, section 5.3.
   128  		var encLen int
   129  		if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
   130  			runeLen, encLen = 1, 1
   131  		} else {
   132  			_, runeLen = utf8.DecodeRuneInString(s[i:])
   133  			encLen = 3 * runeLen
   134  		}
   135  
   136  		if currentLen+encLen > maxContentLen {
   137  			e.splitWord(buf, charset)
   138  			currentLen = 0
   139  		}
   140  		writeQString(buf, s[i:i+runeLen])
   141  		currentLen += encLen
   142  	}
   143  }
   144  
   145  // writeQString encodes s using Q encoding and writes it to buf.
   146  func writeQString(buf *bytes.Buffer, s string) {
   147  	for i := 0; i < len(s); i++ {
   148  		switch b := s[i]; {
   149  		case b == ' ':
   150  			buf.WriteByte('_')
   151  		case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
   152  			buf.WriteByte(b)
   153  		default:
   154  			buf.WriteByte('=')
   155  			buf.WriteByte(upperhex[b>>4])
   156  			buf.WriteByte(upperhex[b&0x0f])
   157  		}
   158  	}
   159  }
   160  
   161  // openWord writes the beginning of an encoded-word into buf.
   162  func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
   163  	buf.WriteString("=?")
   164  	buf.WriteString(charset)
   165  	buf.WriteByte('?')
   166  	buf.WriteByte(byte(e))
   167  	buf.WriteByte('?')
   168  }
   169  
   170  // closeWord writes the end of an encoded-word into buf.
   171  func closeWord(buf *bytes.Buffer) {
   172  	buf.WriteString("?=")
   173  }
   174  
   175  // splitWord closes the current encoded-word and opens a new one.
   176  func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
   177  	closeWord(buf)
   178  	buf.WriteByte(' ')
   179  	e.openWord(buf, charset)
   180  }
   181  
   182  func isUTF8(charset string) bool {
   183  	return strings.EqualFold(charset, "UTF-8")
   184  }
   185  
   186  const upperhex = "0123456789ABCDEF"
   187  
   188  // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
   189  type WordDecoder struct {
   190  	// CharsetReader, if non-nil, defines a function to generate
   191  	// charset-conversion readers, converting from the provided
   192  	// charset into UTF-8.
   193  	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
   194  	// are handled by default.
   195  	// One of the the CharsetReader's result values must be non-nil.
   196  	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
   197  }
   198  
   199  // Decode decodes an RFC 2047 encoded-word.
   200  func (d *WordDecoder) Decode(word string) (string, error) {
   201  	if !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 {
   202  		return "", errInvalidWord
   203  	}
   204  	word = word[2 : len(word)-2]
   205  
   206  	// split delimits the first 2 fields
   207  	split := strings.IndexByte(word, '?')
   208  	// the field after split must only be one byte
   209  	if word[split+2] != '?' {
   210  		return "", errInvalidWord
   211  	}
   212  
   213  	// split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii"
   214  	charset := word[:split]
   215  	encoding := word[split+1]
   216  	text := word[split+3:]
   217  
   218  	content, err := decode(encoding, text)
   219  	if err != nil {
   220  		return "", err
   221  	}
   222  
   223  	buf := getBuffer()
   224  	defer putBuffer(buf)
   225  
   226  	if err := d.convert(buf, charset, content); err != nil {
   227  		return "", err
   228  	}
   229  
   230  	return buf.String(), nil
   231  }
   232  
   233  // DecodeHeader decodes all encoded-words of the given string. It returns an
   234  // error if and only if CharsetReader of d returns an error.
   235  func (d *WordDecoder) DecodeHeader(header string) (string, error) {
   236  	// If there is no encoded-word, returns before creating a buffer.
   237  	i := strings.Index(header, "=?")
   238  	if i == -1 {
   239  		return header, nil
   240  	}
   241  
   242  	buf := getBuffer()
   243  	defer putBuffer(buf)
   244  
   245  	buf.WriteString(header[:i])
   246  	header = header[i:]
   247  
   248  	betweenWords := false
   249  	for {
   250  		start := strings.Index(header, "=?")
   251  		if start == -1 {
   252  			break
   253  		}
   254  		cur := start + len("=?")
   255  
   256  		i := strings.Index(header[cur:], "?")
   257  		if i == -1 {
   258  			break
   259  		}
   260  		charset := header[cur : cur+i]
   261  		cur += i + len("?")
   262  
   263  		if len(header) < cur+len("Q??=") {
   264  			break
   265  		}
   266  		encoding := header[cur]
   267  		cur++
   268  
   269  		if header[cur] != '?' {
   270  			break
   271  		}
   272  		cur++
   273  
   274  		j := strings.Index(header[cur:], "?=")
   275  		if j == -1 {
   276  			break
   277  		}
   278  		text := header[cur : cur+j]
   279  		end := cur + j + len("?=")
   280  
   281  		content, err := decode(encoding, text)
   282  		if err != nil {
   283  			betweenWords = false
   284  			buf.WriteString(header[:start+2])
   285  			header = header[start+2:]
   286  			continue
   287  		}
   288  
   289  		// Write characters before the encoded-word. White-space and newline
   290  		// characters separating two encoded-words must be deleted.
   291  		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
   292  			buf.WriteString(header[:start])
   293  		}
   294  
   295  		if err := d.convert(buf, charset, content); err != nil {
   296  			return "", err
   297  		}
   298  
   299  		header = header[end:]
   300  		betweenWords = true
   301  	}
   302  
   303  	if len(header) > 0 {
   304  		buf.WriteString(header)
   305  	}
   306  
   307  	return buf.String(), nil
   308  }
   309  
   310  func decode(encoding byte, text string) ([]byte, error) {
   311  	switch encoding {
   312  	case 'B', 'b':
   313  		return base64.StdEncoding.DecodeString(text)
   314  	case 'Q', 'q':
   315  		return qDecode(text)
   316  	default:
   317  		return nil, errInvalidWord
   318  	}
   319  }
   320  
   321  func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
   322  	switch {
   323  	case strings.EqualFold("utf-8", charset):
   324  		buf.Write(content)
   325  	case strings.EqualFold("iso-8859-1", charset):
   326  		for _, c := range content {
   327  			buf.WriteRune(rune(c))
   328  		}
   329  	case strings.EqualFold("us-ascii", charset):
   330  		for _, c := range content {
   331  			if c >= utf8.RuneSelf {
   332  				buf.WriteRune(unicode.ReplacementChar)
   333  			} else {
   334  				buf.WriteByte(c)
   335  			}
   336  		}
   337  	default:
   338  		if d.CharsetReader == nil {
   339  			return fmt.Errorf("mime: unhandled charset %q", charset)
   340  		}
   341  		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
   342  		if err != nil {
   343  			return err
   344  		}
   345  		if _, err = buf.ReadFrom(r); err != nil {
   346  			return err
   347  		}
   348  	}
   349  	return nil
   350  }
   351  
   352  // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
   353  // one byte of non-whitespace.
   354  func hasNonWhitespace(s string) bool {
   355  	for _, b := range s {
   356  		switch b {
   357  		// Encoded-words can only be separated by linear white spaces which does
   358  		// not include vertical tabs (\v).
   359  		case ' ', '\t', '\n', '\r':
   360  		default:
   361  			return true
   362  		}
   363  	}
   364  	return false
   365  }
   366  
   367  // qDecode decodes a Q encoded string.
   368  func qDecode(s string) ([]byte, error) {
   369  	dec := make([]byte, len(s))
   370  	n := 0
   371  	for i := 0; i < len(s); i++ {
   372  		switch c := s[i]; {
   373  		case c == '_':
   374  			dec[n] = ' '
   375  		case c == '=':
   376  			if i+2 >= len(s) {
   377  				return nil, errInvalidWord
   378  			}
   379  			b, err := readHexByte(s[i+1], s[i+2])
   380  			if err != nil {
   381  				return nil, err
   382  			}
   383  			dec[n] = b
   384  			i += 2
   385  		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
   386  			dec[n] = c
   387  		default:
   388  			return nil, errInvalidWord
   389  		}
   390  		n++
   391  	}
   392  
   393  	return dec[:n], nil
   394  }
   395  
   396  // readHexByte returns the byte from its quoted-printable representation.
   397  func readHexByte(a, b byte) (byte, error) {
   398  	var hb, lb byte
   399  	var err error
   400  	if hb, err = fromHex(a); err != nil {
   401  		return 0, err
   402  	}
   403  	if lb, err = fromHex(b); err != nil {
   404  		return 0, err
   405  	}
   406  	return hb<<4 | lb, nil
   407  }
   408  
   409  func fromHex(b byte) (byte, error) {
   410  	switch {
   411  	case b >= '0' && b <= '9':
   412  		return b - '0', nil
   413  	case b >= 'A' && b <= 'F':
   414  		return b - 'A' + 10, nil
   415  	// Accept badly encoded bytes.
   416  	case b >= 'a' && b <= 'f':
   417  		return b - 'a' + 10, nil
   418  	}
   419  	return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
   420  }
   421  
   422  func getBuffer() *bytes.Buffer {
   423  	return &bytes.Buffer{}
   424  }
   425  
   426  func putBuffer(buf *bytes.Buffer) {}