github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/resource/api/internal/mime/encodedword.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //+build !go1.6 6 7 // TODO(natefinch) remove this once we support building on go 1.6 for all platforms. 8 // This code was copied from the Go 1.6 sourcecode. 9 10 package mime 11 12 import ( 13 "bytes" 14 "encoding/base64" 15 "errors" 16 "fmt" 17 "io" 18 "strings" 19 "unicode" 20 "unicode/utf8" 21 ) 22 23 // A WordEncoder is a RFC 2047 encoded-word encoder. 24 type WordEncoder byte 25 26 const ( 27 // BEncoding represents Base64 encoding scheme as defined by RFC 2045. 28 BEncoding = WordEncoder('b') 29 // QEncoding represents the Q-encoding scheme as defined by RFC 2047. 30 QEncoding = WordEncoder('q') 31 ) 32 33 var ( 34 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word") 35 ) 36 37 // Encode returns the encoded-word form of s. If s is ASCII without special 38 // characters, it is returned unchanged. The provided charset is the IANA 39 // charset name of s. It is case insensitive. 40 func (e WordEncoder) Encode(charset, s string) string { 41 if !needsEncoding(s) { 42 return s 43 } 44 return e.encodeWord(charset, s) 45 } 46 47 func needsEncoding(s string) bool { 48 for _, b := range s { 49 if (b < ' ' || b > '~') && b != '\t' { 50 return true 51 } 52 } 53 return false 54 } 55 56 // encodeWord encodes a string into an encoded-word. 57 func (e WordEncoder) encodeWord(charset, s string) string { 58 buf := getBuffer() 59 defer putBuffer(buf) 60 61 e.openWord(buf, charset) 62 if e == BEncoding { 63 e.bEncode(buf, charset, s) 64 } else { 65 e.qEncode(buf, charset, s) 66 } 67 closeWord(buf) 68 69 return buf.String() 70 } 71 72 const ( 73 // The maximum length of an encoded-word is 75 characters. 74 // See RFC 2047, section 2. 75 maxEncodedWordLen = 75 76 // maxContentLen is how much content can be encoded, ignoring the header and 77 // 2-byte footer. 78 maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=") 79 ) 80 81 var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen) 82 83 // bEncode encodes s using base64 encoding and writes it to buf. 84 func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) { 85 w := base64.NewEncoder(base64.StdEncoding, buf) 86 // If the charset is not UTF-8 or if the content is short, do not bother 87 // splitting the encoded-word. 88 if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen { 89 io.WriteString(w, s) 90 w.Close() 91 return 92 } 93 94 var currentLen, last, runeLen int 95 for i := 0; i < len(s); i += runeLen { 96 // Multi-byte characters must not be split across encoded-words. 97 // See RFC 2047, section 5.3. 98 _, runeLen = utf8.DecodeRuneInString(s[i:]) 99 100 if currentLen+runeLen <= maxBase64Len { 101 currentLen += runeLen 102 } else { 103 io.WriteString(w, s[last:i]) 104 w.Close() 105 e.splitWord(buf, charset) 106 last = i 107 currentLen = runeLen 108 } 109 } 110 io.WriteString(w, s[last:]) 111 w.Close() 112 } 113 114 // qEncode encodes s using Q encoding and writes it to buf. It splits the 115 // encoded-words when necessary. 116 func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) { 117 // We only split encoded-words when the charset is UTF-8. 118 if !isUTF8(charset) { 119 writeQString(buf, s) 120 return 121 } 122 123 var currentLen, runeLen int 124 for i := 0; i < len(s); i += runeLen { 125 b := s[i] 126 // Multi-byte characters must not be split across encoded-words. 127 // See RFC 2047, section 5.3. 128 var encLen int 129 if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' { 130 runeLen, encLen = 1, 1 131 } else { 132 _, runeLen = utf8.DecodeRuneInString(s[i:]) 133 encLen = 3 * runeLen 134 } 135 136 if currentLen+encLen > maxContentLen { 137 e.splitWord(buf, charset) 138 currentLen = 0 139 } 140 writeQString(buf, s[i:i+runeLen]) 141 currentLen += encLen 142 } 143 } 144 145 // writeQString encodes s using Q encoding and writes it to buf. 146 func writeQString(buf *bytes.Buffer, s string) { 147 for i := 0; i < len(s); i++ { 148 switch b := s[i]; { 149 case b == ' ': 150 buf.WriteByte('_') 151 case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_': 152 buf.WriteByte(b) 153 default: 154 buf.WriteByte('=') 155 buf.WriteByte(upperhex[b>>4]) 156 buf.WriteByte(upperhex[b&0x0f]) 157 } 158 } 159 } 160 161 // openWord writes the beginning of an encoded-word into buf. 162 func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) { 163 buf.WriteString("=?") 164 buf.WriteString(charset) 165 buf.WriteByte('?') 166 buf.WriteByte(byte(e)) 167 buf.WriteByte('?') 168 } 169 170 // closeWord writes the end of an encoded-word into buf. 171 func closeWord(buf *bytes.Buffer) { 172 buf.WriteString("?=") 173 } 174 175 // splitWord closes the current encoded-word and opens a new one. 176 func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) { 177 closeWord(buf) 178 buf.WriteByte(' ') 179 e.openWord(buf, charset) 180 } 181 182 func isUTF8(charset string) bool { 183 return strings.EqualFold(charset, "UTF-8") 184 } 185 186 const upperhex = "0123456789ABCDEF" 187 188 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words. 189 type WordDecoder struct { 190 // CharsetReader, if non-nil, defines a function to generate 191 // charset-conversion readers, converting from the provided 192 // charset into UTF-8. 193 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets 194 // are handled by default. 195 // One of the the CharsetReader's result values must be non-nil. 196 CharsetReader func(charset string, input io.Reader) (io.Reader, error) 197 } 198 199 // Decode decodes an RFC 2047 encoded-word. 200 func (d *WordDecoder) Decode(word string) (string, error) { 201 if !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 { 202 return "", errInvalidWord 203 } 204 word = word[2 : len(word)-2] 205 206 // split delimits the first 2 fields 207 split := strings.IndexByte(word, '?') 208 // the field after split must only be one byte 209 if word[split+2] != '?' { 210 return "", errInvalidWord 211 } 212 213 // split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii" 214 charset := word[:split] 215 encoding := word[split+1] 216 text := word[split+3:] 217 218 content, err := decode(encoding, text) 219 if err != nil { 220 return "", err 221 } 222 223 buf := getBuffer() 224 defer putBuffer(buf) 225 226 if err := d.convert(buf, charset, content); err != nil { 227 return "", err 228 } 229 230 return buf.String(), nil 231 } 232 233 // DecodeHeader decodes all encoded-words of the given string. It returns an 234 // error if and only if CharsetReader of d returns an error. 235 func (d *WordDecoder) DecodeHeader(header string) (string, error) { 236 // If there is no encoded-word, returns before creating a buffer. 237 i := strings.Index(header, "=?") 238 if i == -1 { 239 return header, nil 240 } 241 242 buf := getBuffer() 243 defer putBuffer(buf) 244 245 buf.WriteString(header[:i]) 246 header = header[i:] 247 248 betweenWords := false 249 for { 250 start := strings.Index(header, "=?") 251 if start == -1 { 252 break 253 } 254 cur := start + len("=?") 255 256 i := strings.Index(header[cur:], "?") 257 if i == -1 { 258 break 259 } 260 charset := header[cur : cur+i] 261 cur += i + len("?") 262 263 if len(header) < cur+len("Q??=") { 264 break 265 } 266 encoding := header[cur] 267 cur++ 268 269 if header[cur] != '?' { 270 break 271 } 272 cur++ 273 274 j := strings.Index(header[cur:], "?=") 275 if j == -1 { 276 break 277 } 278 text := header[cur : cur+j] 279 end := cur + j + len("?=") 280 281 content, err := decode(encoding, text) 282 if err != nil { 283 betweenWords = false 284 buf.WriteString(header[:start+2]) 285 header = header[start+2:] 286 continue 287 } 288 289 // Write characters before the encoded-word. White-space and newline 290 // characters separating two encoded-words must be deleted. 291 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) { 292 buf.WriteString(header[:start]) 293 } 294 295 if err := d.convert(buf, charset, content); err != nil { 296 return "", err 297 } 298 299 header = header[end:] 300 betweenWords = true 301 } 302 303 if len(header) > 0 { 304 buf.WriteString(header) 305 } 306 307 return buf.String(), nil 308 } 309 310 func decode(encoding byte, text string) ([]byte, error) { 311 switch encoding { 312 case 'B', 'b': 313 return base64.StdEncoding.DecodeString(text) 314 case 'Q', 'q': 315 return qDecode(text) 316 default: 317 return nil, errInvalidWord 318 } 319 } 320 321 func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error { 322 switch { 323 case strings.EqualFold("utf-8", charset): 324 buf.Write(content) 325 case strings.EqualFold("iso-8859-1", charset): 326 for _, c := range content { 327 buf.WriteRune(rune(c)) 328 } 329 case strings.EqualFold("us-ascii", charset): 330 for _, c := range content { 331 if c >= utf8.RuneSelf { 332 buf.WriteRune(unicode.ReplacementChar) 333 } else { 334 buf.WriteByte(c) 335 } 336 } 337 default: 338 if d.CharsetReader == nil { 339 return fmt.Errorf("mime: unhandled charset %q", charset) 340 } 341 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content)) 342 if err != nil { 343 return err 344 } 345 if _, err = buf.ReadFrom(r); err != nil { 346 return err 347 } 348 } 349 return nil 350 } 351 352 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least 353 // one byte of non-whitespace. 354 func hasNonWhitespace(s string) bool { 355 for _, b := range s { 356 switch b { 357 // Encoded-words can only be separated by linear white spaces which does 358 // not include vertical tabs (\v). 359 case ' ', '\t', '\n', '\r': 360 default: 361 return true 362 } 363 } 364 return false 365 } 366 367 // qDecode decodes a Q encoded string. 368 func qDecode(s string) ([]byte, error) { 369 dec := make([]byte, len(s)) 370 n := 0 371 for i := 0; i < len(s); i++ { 372 switch c := s[i]; { 373 case c == '_': 374 dec[n] = ' ' 375 case c == '=': 376 if i+2 >= len(s) { 377 return nil, errInvalidWord 378 } 379 b, err := readHexByte(s[i+1], s[i+2]) 380 if err != nil { 381 return nil, err 382 } 383 dec[n] = b 384 i += 2 385 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t': 386 dec[n] = c 387 default: 388 return nil, errInvalidWord 389 } 390 n++ 391 } 392 393 return dec[:n], nil 394 } 395 396 // readHexByte returns the byte from its quoted-printable representation. 397 func readHexByte(a, b byte) (byte, error) { 398 var hb, lb byte 399 var err error 400 if hb, err = fromHex(a); err != nil { 401 return 0, err 402 } 403 if lb, err = fromHex(b); err != nil { 404 return 0, err 405 } 406 return hb<<4 | lb, nil 407 } 408 409 func fromHex(b byte) (byte, error) { 410 switch { 411 case b >= '0' && b <= '9': 412 return b - '0', nil 413 case b >= 'A' && b <= 'F': 414 return b - 'A' + 10, nil 415 // Accept badly encoded bytes. 416 case b >= 'a' && b <= 'f': 417 return b - 'a' + 10, nil 418 } 419 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b) 420 } 421 422 func getBuffer() *bytes.Buffer { 423 return &bytes.Buffer{} 424 } 425 426 func putBuffer(buf *bytes.Buffer) {}