github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/mime/encodedword.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package mime 6 7 import ( 8 "bytes" 9 "encoding/base64" 10 "errors" 11 "fmt" 12 "io" 13 "strings" 14 "sync" 15 "unicode" 16 "unicode/utf8" 17 ) 18 19 // A WordEncoder is a RFC 2047 encoded-word encoder. 20 type WordEncoder byte 21 22 const ( 23 // BEncoding represents Base64 encoding scheme as defined by RFC 2045. 24 BEncoding = WordEncoder('b') 25 // QEncoding represents the Q-encoding scheme as defined by RFC 2047. 26 QEncoding = WordEncoder('q') 27 ) 28 29 var ( 30 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word") 31 ) 32 33 // Encode returns the encoded-word form of s. If s is ASCII without special 34 // characters, it is returned unchanged. The provided charset is the IANA 35 // charset name of s. It is case insensitive. 36 func (e WordEncoder) Encode(charset, s string) string { 37 if !needsEncoding(s) { 38 return s 39 } 40 return e.encodeWord(charset, s) 41 } 42 43 func needsEncoding(s string) bool { 44 for _, b := range s { 45 if (b < ' ' || b > '~') && b != '\t' { 46 return true 47 } 48 } 49 return false 50 } 51 52 // encodeWord encodes a string into an encoded-word. 53 func (e WordEncoder) encodeWord(charset, s string) string { 54 buf := getBuffer() 55 defer putBuffer(buf) 56 57 buf.WriteString("=?") 58 buf.WriteString(charset) 59 buf.WriteByte('?') 60 buf.WriteByte(byte(e)) 61 buf.WriteByte('?') 62 63 if e == BEncoding { 64 w := base64.NewEncoder(base64.StdEncoding, buf) 65 io.WriteString(w, s) 66 w.Close() 67 } else { 68 enc := make([]byte, 3) 69 for i := 0; i < len(s); i++ { 70 b := s[i] 71 switch { 72 case b == ' ': 73 buf.WriteByte('_') 74 case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_': 75 buf.WriteByte(b) 76 default: 77 enc[0] = '=' 78 enc[1] = upperhex[b>>4] 79 enc[2] = upperhex[b&0x0f] 80 buf.Write(enc) 81 } 82 } 83 } 84 buf.WriteString("?=") 85 return buf.String() 86 } 87 88 const upperhex = "0123456789ABCDEF" 89 90 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words. 91 type WordDecoder struct { 92 // CharsetReader, if non-nil, defines a function to generate 93 // charset-conversion readers, converting from the provided 94 // charset into UTF-8. 95 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets 96 // are handled by default. 97 // One of the the CharsetReader's result values must be non-nil. 98 CharsetReader func(charset string, input io.Reader) (io.Reader, error) 99 } 100 101 // Decode decodes an RFC 2047 encoded-word. 102 func (d *WordDecoder) Decode(word string) (string, error) { 103 fields := strings.Split(word, "?") // TODO: remove allocation? 104 if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 { 105 return "", errInvalidWord 106 } 107 108 content, err := decode(fields[2][0], fields[3]) 109 if err != nil { 110 return "", err 111 } 112 113 buf := getBuffer() 114 defer putBuffer(buf) 115 116 if err := d.convert(buf, fields[1], content); err != nil { 117 return "", err 118 } 119 120 return buf.String(), nil 121 } 122 123 // DecodeHeader decodes all encoded-words of the given string. It returns an 124 // error if and only if CharsetReader of d returns an error. 125 func (d *WordDecoder) DecodeHeader(header string) (string, error) { 126 // If there is no encoded-word, returns before creating a buffer. 127 i := strings.Index(header, "=?") 128 if i == -1 { 129 return header, nil 130 } 131 132 buf := getBuffer() 133 defer putBuffer(buf) 134 135 buf.WriteString(header[:i]) 136 header = header[i:] 137 138 betweenWords := false 139 for { 140 start := strings.Index(header, "=?") 141 if start == -1 { 142 break 143 } 144 cur := start + len("=?") 145 146 i := strings.Index(header[cur:], "?") 147 if i == -1 { 148 break 149 } 150 charset := header[cur : cur+i] 151 cur += i + len("?") 152 153 if len(header) < cur+len("Q??=") { 154 break 155 } 156 encoding := header[cur] 157 cur++ 158 159 if header[cur] != '?' { 160 break 161 } 162 cur++ 163 164 j := strings.Index(header[cur:], "?=") 165 if j == -1 { 166 break 167 } 168 text := header[cur : cur+j] 169 end := cur + j + len("?=") 170 171 content, err := decode(encoding, text) 172 if err != nil { 173 betweenWords = false 174 buf.WriteString(header[:start+2]) 175 header = header[start+2:] 176 continue 177 } 178 179 // Write characters before the encoded-word. White-space and newline 180 // characters separating two encoded-words must be deleted. 181 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) { 182 buf.WriteString(header[:start]) 183 } 184 185 if err := d.convert(buf, charset, content); err != nil { 186 return "", err 187 } 188 189 header = header[end:] 190 betweenWords = true 191 } 192 193 if len(header) > 0 { 194 buf.WriteString(header) 195 } 196 197 return buf.String(), nil 198 } 199 200 func decode(encoding byte, text string) ([]byte, error) { 201 switch encoding { 202 case 'B', 'b': 203 return base64.StdEncoding.DecodeString(text) 204 case 'Q', 'q': 205 return qDecode(text) 206 default: 207 return nil, errInvalidWord 208 } 209 } 210 211 func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error { 212 switch { 213 case strings.EqualFold("utf-8", charset): 214 buf.Write(content) 215 case strings.EqualFold("iso-8859-1", charset): 216 for _, c := range content { 217 buf.WriteRune(rune(c)) 218 } 219 case strings.EqualFold("us-ascii", charset): 220 for _, c := range content { 221 if c >= utf8.RuneSelf { 222 buf.WriteRune(unicode.ReplacementChar) 223 } else { 224 buf.WriteByte(c) 225 } 226 } 227 default: 228 if d.CharsetReader == nil { 229 return fmt.Errorf("mime: unhandled charset %q", charset) 230 } 231 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content)) 232 if err != nil { 233 return err 234 } 235 if _, err = buf.ReadFrom(r); err != nil { 236 return err 237 } 238 } 239 return nil 240 } 241 242 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least 243 // one byte of non-whitespace. 244 func hasNonWhitespace(s string) bool { 245 for _, b := range s { 246 switch b { 247 // Encoded-words can only be separated by linear white spaces which does 248 // not include vertical tabs (\v). 249 case ' ', '\t', '\n', '\r': 250 default: 251 return true 252 } 253 } 254 return false 255 } 256 257 // qDecode decodes a Q encoded string. 258 func qDecode(s string) ([]byte, error) { 259 dec := make([]byte, len(s)) 260 n := 0 261 for i := 0; i < len(s); i++ { 262 switch c := s[i]; { 263 case c == '_': 264 dec[n] = ' ' 265 case c == '=': 266 if i+2 >= len(s) { 267 return nil, errInvalidWord 268 } 269 b, err := readHexByte(s[i+1], s[i+2]) 270 if err != nil { 271 return nil, err 272 } 273 dec[n] = b 274 i += 2 275 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t': 276 dec[n] = c 277 default: 278 return nil, errInvalidWord 279 } 280 n++ 281 } 282 283 return dec[:n], nil 284 } 285 286 // readHexByte returns the byte from its quoted-printable representation. 287 func readHexByte(a, b byte) (byte, error) { 288 var hb, lb byte 289 var err error 290 if hb, err = fromHex(a); err != nil { 291 return 0, err 292 } 293 if lb, err = fromHex(b); err != nil { 294 return 0, err 295 } 296 return hb<<4 | lb, nil 297 } 298 299 func fromHex(b byte) (byte, error) { 300 switch { 301 case b >= '0' && b <= '9': 302 return b - '0', nil 303 case b >= 'A' && b <= 'F': 304 return b - 'A' + 10, nil 305 // Accept badly encoded bytes. 306 case b >= 'a' && b <= 'f': 307 return b - 'a' + 10, nil 308 } 309 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b) 310 } 311 312 var bufPool = sync.Pool{ 313 New: func() interface{} { 314 return new(bytes.Buffer) 315 }, 316 } 317 318 func getBuffer() *bytes.Buffer { 319 return bufPool.Get().(*bytes.Buffer) 320 } 321 322 func putBuffer(buf *bytes.Buffer) { 323 if buf.Len() > 1024 { 324 return 325 } 326 buf.Reset() 327 bufPool.Put(buf) 328 }