github.com/c12o16h1/go/src@v0.0.0-20200114212001-5a151c0f00ed/mime/encodedword.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package mime 6 7 import ( 8 "bytes" 9 "encoding/base64" 10 "errors" 11 "fmt" 12 "io" 13 "strings" 14 "unicode" 15 "unicode/utf8" 16 ) 17 18 // A WordEncoder is an RFC 2047 encoded-word encoder. 19 type WordEncoder byte 20 21 const ( 22 // BEncoding represents Base64 encoding scheme as defined by RFC 2045. 23 BEncoding = WordEncoder('b') 24 // QEncoding represents the Q-encoding scheme as defined by RFC 2047. 25 QEncoding = WordEncoder('q') 26 ) 27 28 var ( 29 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word") 30 ) 31 32 // Encode returns the encoded-word form of s. If s is ASCII without special 33 // characters, it is returned unchanged. The provided charset is the IANA 34 // charset name of s. It is case insensitive. 35 func (e WordEncoder) Encode(charset, s string) string { 36 if !needsEncoding(s) { 37 return s 38 } 39 return e.encodeWord(charset, s) 40 } 41 42 func needsEncoding(s string) bool { 43 for _, b := range s { 44 if (b < ' ' || b > '~') && b != '\t' { 45 return true 46 } 47 } 48 return false 49 } 50 51 // encodeWord encodes a string into an encoded-word. 52 func (e WordEncoder) encodeWord(charset, s string) string { 53 var buf strings.Builder 54 // Could use a hint like len(s)*3, but that's not enough for cases 55 // with word splits and too much for simpler inputs. 56 // 48 is close to maxEncodedWordLen/2, but adjusted to allocator size class. 57 buf.Grow(48) 58 59 e.openWord(&buf, charset) 60 if e == BEncoding { 61 e.bEncode(&buf, charset, s) 62 } else { 63 e.qEncode(&buf, charset, s) 64 } 65 closeWord(&buf) 66 67 return buf.String() 68 } 69 70 const ( 71 // The maximum length of an encoded-word is 75 characters. 72 // See RFC 2047, section 2. 73 maxEncodedWordLen = 75 74 // maxContentLen is how much content can be encoded, ignoring the header and 75 // 2-byte footer. 76 maxContentLen = maxEncodedWordLen - len("=?UTF-8?q?") - len("?=") 77 ) 78 79 var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen) 80 81 // bEncode encodes s using base64 encoding and writes it to buf. 82 func (e WordEncoder) bEncode(buf *strings.Builder, charset, s string) { 83 w := base64.NewEncoder(base64.StdEncoding, buf) 84 // If the charset is not UTF-8 or if the content is short, do not bother 85 // splitting the encoded-word. 86 if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen { 87 io.WriteString(w, s) 88 w.Close() 89 return 90 } 91 92 var currentLen, last, runeLen int 93 for i := 0; i < len(s); i += runeLen { 94 // Multi-byte characters must not be split across encoded-words. 95 // See RFC 2047, section 5.3. 96 _, runeLen = utf8.DecodeRuneInString(s[i:]) 97 98 if currentLen+runeLen <= maxBase64Len { 99 currentLen += runeLen 100 } else { 101 io.WriteString(w, s[last:i]) 102 w.Close() 103 e.splitWord(buf, charset) 104 last = i 105 currentLen = runeLen 106 } 107 } 108 io.WriteString(w, s[last:]) 109 w.Close() 110 } 111 112 // qEncode encodes s using Q encoding and writes it to buf. It splits the 113 // encoded-words when necessary. 114 func (e WordEncoder) qEncode(buf *strings.Builder, charset, s string) { 115 // We only split encoded-words when the charset is UTF-8. 116 if !isUTF8(charset) { 117 writeQString(buf, s) 118 return 119 } 120 121 var currentLen, runeLen int 122 for i := 0; i < len(s); i += runeLen { 123 b := s[i] 124 // Multi-byte characters must not be split across encoded-words. 125 // See RFC 2047, section 5.3. 126 var encLen int 127 if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' { 128 runeLen, encLen = 1, 1 129 } else { 130 _, runeLen = utf8.DecodeRuneInString(s[i:]) 131 encLen = 3 * runeLen 132 } 133 134 if currentLen+encLen > maxContentLen { 135 e.splitWord(buf, charset) 136 currentLen = 0 137 } 138 writeQString(buf, s[i:i+runeLen]) 139 currentLen += encLen 140 } 141 } 142 143 // writeQString encodes s using Q encoding and writes it to buf. 144 func writeQString(buf *strings.Builder, s string) { 145 for i := 0; i < len(s); i++ { 146 switch b := s[i]; { 147 case b == ' ': 148 buf.WriteByte('_') 149 case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_': 150 buf.WriteByte(b) 151 default: 152 buf.WriteByte('=') 153 buf.WriteByte(upperhex[b>>4]) 154 buf.WriteByte(upperhex[b&0x0f]) 155 } 156 } 157 } 158 159 // openWord writes the beginning of an encoded-word into buf. 160 func (e WordEncoder) openWord(buf *strings.Builder, charset string) { 161 buf.WriteString("=?") 162 buf.WriteString(charset) 163 buf.WriteByte('?') 164 buf.WriteByte(byte(e)) 165 buf.WriteByte('?') 166 } 167 168 // closeWord writes the end of an encoded-word into buf. 169 func closeWord(buf *strings.Builder) { 170 buf.WriteString("?=") 171 } 172 173 // splitWord closes the current encoded-word and opens a new one. 174 func (e WordEncoder) splitWord(buf *strings.Builder, charset string) { 175 closeWord(buf) 176 buf.WriteByte(' ') 177 e.openWord(buf, charset) 178 } 179 180 func isUTF8(charset string) bool { 181 return strings.EqualFold(charset, "UTF-8") 182 } 183 184 const upperhex = "0123456789ABCDEF" 185 186 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words. 187 type WordDecoder struct { 188 // CharsetReader, if non-nil, defines a function to generate 189 // charset-conversion readers, converting from the provided 190 // charset into UTF-8. 191 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets 192 // are handled by default. 193 // One of the CharsetReader's result values must be non-nil. 194 CharsetReader func(charset string, input io.Reader) (io.Reader, error) 195 } 196 197 // Decode decodes an RFC 2047 encoded-word. 198 func (d *WordDecoder) Decode(word string) (string, error) { 199 // See https://tools.ietf.org/html/rfc2047#section-2 for details. 200 // Our decoder is permissive, we accept empty encoded-text. 201 if len(word) < 8 || !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 { 202 return "", errInvalidWord 203 } 204 word = word[2 : len(word)-2] 205 206 // split delimits the first 2 fields 207 split := strings.IndexByte(word, '?') 208 209 // split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii" 210 charset := word[:split] 211 if len(charset) == 0 { 212 return "", errInvalidWord 213 } 214 if len(word) < split+3 { 215 return "", errInvalidWord 216 } 217 encoding := word[split+1] 218 // the field after split must only be one byte 219 if word[split+2] != '?' { 220 return "", errInvalidWord 221 } 222 text := word[split+3:] 223 224 content, err := decode(encoding, text) 225 if err != nil { 226 return "", err 227 } 228 229 var buf strings.Builder 230 231 if err := d.convert(&buf, charset, content); err != nil { 232 return "", err 233 } 234 235 return buf.String(), nil 236 } 237 238 // DecodeHeader decodes all encoded-words of the given string. It returns an 239 // error if and only if CharsetReader of d returns an error. 240 func (d *WordDecoder) DecodeHeader(header string) (string, error) { 241 // If there is no encoded-word, returns before creating a buffer. 242 i := strings.Index(header, "=?") 243 if i == -1 { 244 return header, nil 245 } 246 247 var buf strings.Builder 248 249 buf.WriteString(header[:i]) 250 header = header[i:] 251 252 betweenWords := false 253 for { 254 start := strings.Index(header, "=?") 255 if start == -1 { 256 break 257 } 258 cur := start + len("=?") 259 260 i := strings.Index(header[cur:], "?") 261 if i == -1 { 262 break 263 } 264 charset := header[cur : cur+i] 265 cur += i + len("?") 266 267 if len(header) < cur+len("Q??=") { 268 break 269 } 270 encoding := header[cur] 271 cur++ 272 273 if header[cur] != '?' { 274 break 275 } 276 cur++ 277 278 j := strings.Index(header[cur:], "?=") 279 if j == -1 { 280 break 281 } 282 text := header[cur : cur+j] 283 end := cur + j + len("?=") 284 285 content, err := decode(encoding, text) 286 if err != nil { 287 betweenWords = false 288 buf.WriteString(header[:start+2]) 289 header = header[start+2:] 290 continue 291 } 292 293 // Write characters before the encoded-word. White-space and newline 294 // characters separating two encoded-words must be deleted. 295 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) { 296 buf.WriteString(header[:start]) 297 } 298 299 if err := d.convert(&buf, charset, content); err != nil { 300 return "", err 301 } 302 303 header = header[end:] 304 betweenWords = true 305 } 306 307 if len(header) > 0 { 308 buf.WriteString(header) 309 } 310 311 return buf.String(), nil 312 } 313 314 func decode(encoding byte, text string) ([]byte, error) { 315 switch encoding { 316 case 'B', 'b': 317 return base64.StdEncoding.DecodeString(text) 318 case 'Q', 'q': 319 return qDecode(text) 320 default: 321 return nil, errInvalidWord 322 } 323 } 324 325 func (d *WordDecoder) convert(buf *strings.Builder, charset string, content []byte) error { 326 switch { 327 case strings.EqualFold("utf-8", charset): 328 buf.Write(content) 329 case strings.EqualFold("iso-8859-1", charset): 330 for _, c := range content { 331 buf.WriteRune(rune(c)) 332 } 333 case strings.EqualFold("us-ascii", charset): 334 for _, c := range content { 335 if c >= utf8.RuneSelf { 336 buf.WriteRune(unicode.ReplacementChar) 337 } else { 338 buf.WriteByte(c) 339 } 340 } 341 default: 342 if d.CharsetReader == nil { 343 return fmt.Errorf("mime: unhandled charset %q", charset) 344 } 345 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content)) 346 if err != nil { 347 return err 348 } 349 if _, err = io.Copy(buf, r); err != nil { 350 return err 351 } 352 } 353 return nil 354 } 355 356 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least 357 // one byte of non-whitespace. 358 func hasNonWhitespace(s string) bool { 359 for _, b := range s { 360 switch b { 361 // Encoded-words can only be separated by linear white spaces which does 362 // not include vertical tabs (\v). 363 case ' ', '\t', '\n', '\r': 364 default: 365 return true 366 } 367 } 368 return false 369 } 370 371 // qDecode decodes a Q encoded string. 372 func qDecode(s string) ([]byte, error) { 373 dec := make([]byte, len(s)) 374 n := 0 375 for i := 0; i < len(s); i++ { 376 switch c := s[i]; { 377 case c == '_': 378 dec[n] = ' ' 379 case c == '=': 380 if i+2 >= len(s) { 381 return nil, errInvalidWord 382 } 383 b, err := readHexByte(s[i+1], s[i+2]) 384 if err != nil { 385 return nil, err 386 } 387 dec[n] = b 388 i += 2 389 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t': 390 dec[n] = c 391 default: 392 return nil, errInvalidWord 393 } 394 n++ 395 } 396 397 return dec[:n], nil 398 } 399 400 // readHexByte returns the byte from its quoted-printable representation. 401 func readHexByte(a, b byte) (byte, error) { 402 var hb, lb byte 403 var err error 404 if hb, err = fromHex(a); err != nil { 405 return 0, err 406 } 407 if lb, err = fromHex(b); err != nil { 408 return 0, err 409 } 410 return hb<<4 | lb, nil 411 } 412 413 func fromHex(b byte) (byte, error) { 414 switch { 415 case b >= '0' && b <= '9': 416 return b - '0', nil 417 case b >= 'A' && b <= 'F': 418 return b - 'A' + 10, nil 419 // Accept badly encoded bytes. 420 case b >= 'a' && b <= 'f': 421 return b - 'a' + 10, nil 422 } 423 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b) 424 }