github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/mime/encodedword.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package mime 6 7 import ( 8 "bytes" 9 "encoding/base64" 10 "errors" 11 "fmt" 12 "io" 13 "strings" 14 "sync" 15 "unicode" 16 "unicode/utf8" 17 ) 18 19 // A WordEncoder is a RFC 2047 encoded-word encoder. 20 type WordEncoder byte 21 22 const ( 23 // BEncoding represents Base64 encoding scheme as defined by RFC 2045. 24 BEncoding = WordEncoder('b') 25 // QEncoding represents the Q-encoding scheme as defined by RFC 2047. 26 QEncoding = WordEncoder('q') 27 ) 28 29 var ( 30 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word") 31 ) 32 33 // Encode returns the encoded-word form of s. If s is ASCII without special 34 // characters, it is returned unchanged. The provided charset is the IANA 35 // charset name of s. It is case insensitive. 36 func (e WordEncoder) Encode(charset, s string) string { 37 if !needsEncoding(s) { 38 return s 39 } 40 return e.encodeWord(charset, s) 41 } 42 43 func needsEncoding(s string) bool { 44 for _, b := range s { 45 if (b < ' ' || b > '~') && b != '\t' { 46 return true 47 } 48 } 49 return false 50 } 51 52 // encodeWord encodes a string into an encoded-word. 53 func (e WordEncoder) encodeWord(charset, s string) string { 54 buf := getBuffer() 55 defer putBuffer(buf) 56 57 e.openWord(buf, charset) 58 if e == BEncoding { 59 e.bEncode(buf, charset, s) 60 } else { 61 e.qEncode(buf, charset, s) 62 } 63 closeWord(buf) 64 65 return buf.String() 66 } 67 68 const ( 69 // The maximum length of an encoded-word is 75 characters. 70 // See RFC 2047, section 2. 71 maxEncodedWordLen = 75 72 // maxContentLen is how much content can be encoded, ignoring the header and 73 // 2-byte footer. 74 maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=") 75 ) 76 77 var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen) 78 79 // bEncode encodes s using base64 encoding and writes it to buf. 80 func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) { 81 w := base64.NewEncoder(base64.StdEncoding, buf) 82 // If the charset is not UTF-8 or if the content is short, do not bother 83 // splitting the encoded-word. 84 if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen { 85 io.WriteString(w, s) 86 w.Close() 87 return 88 } 89 90 var currentLen, last, runeLen int 91 for i := 0; i < len(s); i += runeLen { 92 // Multi-byte characters must not be split accross encoded-words. 93 // See RFC 2047, section 5.3. 94 _, runeLen = utf8.DecodeRuneInString(s[i:]) 95 96 if currentLen+runeLen <= maxBase64Len { 97 currentLen += runeLen 98 } else { 99 io.WriteString(w, s[last:i]) 100 w.Close() 101 e.splitWord(buf, charset) 102 last = i 103 currentLen = runeLen 104 } 105 } 106 io.WriteString(w, s[last:]) 107 w.Close() 108 } 109 110 // qEncode encodes s using Q encoding and writes it to buf. It splits the 111 // encoded-words when necessary. 112 func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) { 113 // We only split encoded-words when the charset is UTF-8. 114 if !isUTF8(charset) { 115 writeQString(buf, s) 116 return 117 } 118 119 var currentLen, runeLen int 120 for i := 0; i < len(s); i += runeLen { 121 b := s[i] 122 // Multi-byte characters must not be split accross encoded-words. 123 // See RFC 2047, section 5.3. 124 var encLen int 125 if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' { 126 runeLen, encLen = 1, 1 127 } else { 128 _, runeLen = utf8.DecodeRuneInString(s[i:]) 129 encLen = 3 * runeLen 130 } 131 132 if currentLen+encLen > maxContentLen { 133 e.splitWord(buf, charset) 134 currentLen = 0 135 } 136 writeQString(buf, s[i:i+runeLen]) 137 currentLen += encLen 138 } 139 } 140 141 // writeQString encodes s using Q encoding and writes it to buf. 142 func writeQString(buf *bytes.Buffer, s string) { 143 for i := 0; i < len(s); i++ { 144 switch b := s[i]; { 145 case b == ' ': 146 buf.WriteByte('_') 147 case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_': 148 buf.WriteByte(b) 149 default: 150 buf.WriteByte('=') 151 buf.WriteByte(upperhex[b>>4]) 152 buf.WriteByte(upperhex[b&0x0f]) 153 } 154 } 155 } 156 157 // openWord writes the beginning of an encoded-word into buf. 158 func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) { 159 buf.WriteString("=?") 160 buf.WriteString(charset) 161 buf.WriteByte('?') 162 buf.WriteByte(byte(e)) 163 buf.WriteByte('?') 164 } 165 166 // closeWord writes the end of an encoded-word into buf. 167 func closeWord(buf *bytes.Buffer) { 168 buf.WriteString("?=") 169 } 170 171 // splitWord closes the current encoded-word and opens a new one. 172 func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) { 173 closeWord(buf) 174 buf.WriteByte(' ') 175 e.openWord(buf, charset) 176 } 177 178 func isUTF8(charset string) bool { 179 return strings.EqualFold(charset, "UTF-8") 180 } 181 182 const upperhex = "0123456789ABCDEF" 183 184 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words. 185 type WordDecoder struct { 186 // CharsetReader, if non-nil, defines a function to generate 187 // charset-conversion readers, converting from the provided 188 // charset into UTF-8. 189 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets 190 // are handled by default. 191 // One of the the CharsetReader's result values must be non-nil. 192 CharsetReader func(charset string, input io.Reader) (io.Reader, error) 193 } 194 195 // Decode decodes an RFC 2047 encoded-word. 196 func (d *WordDecoder) Decode(word string) (string, error) { 197 if !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 { 198 return "", errInvalidWord 199 } 200 word = word[2 : len(word)-2] 201 202 // split delimits the first 2 fields 203 split := strings.IndexByte(word, '?') 204 // the field after split must only be one byte 205 if word[split+2] != '?' { 206 return "", errInvalidWord 207 } 208 209 // split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii" 210 charset := word[:split] 211 encoding := word[split+1] 212 text := word[split+3:] 213 214 content, err := decode(encoding, text) 215 if err != nil { 216 return "", err 217 } 218 219 buf := getBuffer() 220 defer putBuffer(buf) 221 222 if err := d.convert(buf, charset, content); err != nil { 223 return "", err 224 } 225 226 return buf.String(), nil 227 } 228 229 // DecodeHeader decodes all encoded-words of the given string. It returns an 230 // error if and only if CharsetReader of d returns an error. 231 func (d *WordDecoder) DecodeHeader(header string) (string, error) { 232 // If there is no encoded-word, returns before creating a buffer. 233 i := strings.Index(header, "=?") 234 if i == -1 { 235 return header, nil 236 } 237 238 buf := getBuffer() 239 defer putBuffer(buf) 240 241 buf.WriteString(header[:i]) 242 header = header[i:] 243 244 betweenWords := false 245 for { 246 start := strings.Index(header, "=?") 247 if start == -1 { 248 break 249 } 250 cur := start + len("=?") 251 252 i := strings.Index(header[cur:], "?") 253 if i == -1 { 254 break 255 } 256 charset := header[cur : cur+i] 257 cur += i + len("?") 258 259 if len(header) < cur+len("Q??=") { 260 break 261 } 262 encoding := header[cur] 263 cur++ 264 265 if header[cur] != '?' { 266 break 267 } 268 cur++ 269 270 j := strings.Index(header[cur:], "?=") 271 if j == -1 { 272 break 273 } 274 text := header[cur : cur+j] 275 end := cur + j + len("?=") 276 277 content, err := decode(encoding, text) 278 if err != nil { 279 betweenWords = false 280 buf.WriteString(header[:start+2]) 281 header = header[start+2:] 282 continue 283 } 284 285 // Write characters before the encoded-word. White-space and newline 286 // characters separating two encoded-words must be deleted. 287 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) { 288 buf.WriteString(header[:start]) 289 } 290 291 if err := d.convert(buf, charset, content); err != nil { 292 return "", err 293 } 294 295 header = header[end:] 296 betweenWords = true 297 } 298 299 if len(header) > 0 { 300 buf.WriteString(header) 301 } 302 303 return buf.String(), nil 304 } 305 306 func decode(encoding byte, text string) ([]byte, error) { 307 switch encoding { 308 case 'B', 'b': 309 return base64.StdEncoding.DecodeString(text) 310 case 'Q', 'q': 311 return qDecode(text) 312 default: 313 return nil, errInvalidWord 314 } 315 } 316 317 func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error { 318 switch { 319 case strings.EqualFold("utf-8", charset): 320 buf.Write(content) 321 case strings.EqualFold("iso-8859-1", charset): 322 for _, c := range content { 323 buf.WriteRune(rune(c)) 324 } 325 case strings.EqualFold("us-ascii", charset): 326 for _, c := range content { 327 if c >= utf8.RuneSelf { 328 buf.WriteRune(unicode.ReplacementChar) 329 } else { 330 buf.WriteByte(c) 331 } 332 } 333 default: 334 if d.CharsetReader == nil { 335 return fmt.Errorf("mime: unhandled charset %q", charset) 336 } 337 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content)) 338 if err != nil { 339 return err 340 } 341 if _, err = buf.ReadFrom(r); err != nil { 342 return err 343 } 344 } 345 return nil 346 } 347 348 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least 349 // one byte of non-whitespace. 350 func hasNonWhitespace(s string) bool { 351 for _, b := range s { 352 switch b { 353 // Encoded-words can only be separated by linear white spaces which does 354 // not include vertical tabs (\v). 355 case ' ', '\t', '\n', '\r': 356 default: 357 return true 358 } 359 } 360 return false 361 } 362 363 // qDecode decodes a Q encoded string. 364 func qDecode(s string) ([]byte, error) { 365 dec := make([]byte, len(s)) 366 n := 0 367 for i := 0; i < len(s); i++ { 368 switch c := s[i]; { 369 case c == '_': 370 dec[n] = ' ' 371 case c == '=': 372 if i+2 >= len(s) { 373 return nil, errInvalidWord 374 } 375 b, err := readHexByte(s[i+1], s[i+2]) 376 if err != nil { 377 return nil, err 378 } 379 dec[n] = b 380 i += 2 381 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t': 382 dec[n] = c 383 default: 384 return nil, errInvalidWord 385 } 386 n++ 387 } 388 389 return dec[:n], nil 390 } 391 392 // readHexByte returns the byte from its quoted-printable representation. 393 func readHexByte(a, b byte) (byte, error) { 394 var hb, lb byte 395 var err error 396 if hb, err = fromHex(a); err != nil { 397 return 0, err 398 } 399 if lb, err = fromHex(b); err != nil { 400 return 0, err 401 } 402 return hb<<4 | lb, nil 403 } 404 405 func fromHex(b byte) (byte, error) { 406 switch { 407 case b >= '0' && b <= '9': 408 return b - '0', nil 409 case b >= 'A' && b <= 'F': 410 return b - 'A' + 10, nil 411 // Accept badly encoded bytes. 412 case b >= 'a' && b <= 'f': 413 return b - 'a' + 10, nil 414 } 415 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b) 416 } 417 418 var bufPool = sync.Pool{ 419 New: func() interface{} { 420 return new(bytes.Buffer) 421 }, 422 } 423 424 func getBuffer() *bytes.Buffer { 425 return bufPool.Get().(*bytes.Buffer) 426 } 427 428 func putBuffer(buf *bytes.Buffer) { 429 if buf.Len() > 1024 { 430 return 431 } 432 buf.Reset() 433 bufPool.Put(buf) 434 }