github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/encoding/base64/base64.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package base64 implements base64 encoding as specified by RFC 4648. 6 package base64 7 8 import ( 9 "io" 10 "strconv" 11 ) 12 13 /* 14 * Encodings 15 */ 16 17 // An Encoding is a radix 64 encoding/decoding scheme, defined by a 18 // 64-character alphabet. The most common encoding is the "base64" 19 // encoding defined in RFC 4648 and used in MIME (RFC 2045) and PEM 20 // (RFC 1421). RFC 4648 also defines an alternate encoding, which is 21 // the standard encoding with - and _ substituted for + and /. 22 type Encoding struct { 23 encode [64]byte 24 decodeMap [256]byte 25 padChar rune 26 strict bool 27 } 28 29 const ( 30 StdPadding rune = '=' // Standard padding character 31 NoPadding rune = -1 // No padding 32 ) 33 34 const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 35 const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 36 37 // NewEncoding returns a new padded Encoding defined by the given alphabet, 38 // which must be a 64-byte string that does not contain the padding character 39 // or CR / LF ('\r', '\n'). 40 // The resulting Encoding uses the default padding character ('='), 41 // which may be changed or disabled via WithPadding. 42 func NewEncoding(encoder string) *Encoding { 43 if len(encoder) != 64 { 44 panic("encoding alphabet is not 64-bytes long") 45 } 46 for i := 0; i < len(encoder); i++ { 47 if encoder[i] == '\n' || encoder[i] == '\r' { 48 panic("encoding alphabet contains newline character") 49 } 50 } 51 52 e := new(Encoding) 53 e.padChar = StdPadding 54 copy(e.encode[:], encoder) 55 56 for i := 0; i < len(e.decodeMap); i++ { 57 e.decodeMap[i] = 0xFF 58 } 59 for i := 0; i < len(encoder); i++ { 60 e.decodeMap[encoder[i]] = byte(i) 61 } 62 return e 63 } 64 65 // WithPadding creates a new encoding identical to enc except 66 // with a specified padding character, or NoPadding to disable padding. 67 // The padding character must not be '\r' or '\n', must not 68 // be contained in the encoding's alphabet and must be a rune equal or 69 // below '\xff'. 70 func (enc Encoding) WithPadding(padding rune) *Encoding { 71 if padding == '\r' || padding == '\n' || padding > 0xff { 72 panic("invalid padding") 73 } 74 75 for i := 0; i < len(enc.encode); i++ { 76 if rune(enc.encode[i]) == padding { 77 panic("padding contained in alphabet") 78 } 79 } 80 81 enc.padChar = padding 82 return &enc 83 } 84 85 // Strict creates a new encoding identical to enc except with 86 // strict decoding enabled. In this mode, the decoder requires that 87 // trailing padding bits are zero, as described in RFC 4648 section 3.5. 88 func (enc Encoding) Strict() *Encoding { 89 enc.strict = true 90 return &enc 91 } 92 93 // StdEncoding is the standard base64 encoding, as defined in 94 // RFC 4648. 95 var StdEncoding = NewEncoding(encodeStd) 96 97 // URLEncoding is the alternate base64 encoding defined in RFC 4648. 98 // It is typically used in URLs and file names. 99 var URLEncoding = NewEncoding(encodeURL) 100 101 // RawStdEncoding is the standard raw, unpadded base64 encoding, 102 // as defined in RFC 4648 section 3.2. 103 // This is the same as StdEncoding but omits padding characters. 104 var RawStdEncoding = StdEncoding.WithPadding(NoPadding) 105 106 // RawURLEncoding is the unpadded alternate base64 encoding defined in RFC 4648. 107 // It is typically used in URLs and file names. 108 // This is the same as URLEncoding but omits padding characters. 109 var RawURLEncoding = URLEncoding.WithPadding(NoPadding) 110 111 /* 112 * Encoder 113 */ 114 115 // Encode encodes src using the encoding enc, writing 116 // EncodedLen(len(src)) bytes to dst. 117 // 118 // The encoding pads the output to a multiple of 4 bytes, 119 // so Encode is not appropriate for use on individual blocks 120 // of a large data stream. Use NewEncoder() instead. 121 func (enc *Encoding) Encode(dst, src []byte) { 122 if len(src) == 0 { 123 return 124 } 125 126 di, si := 0, 0 127 n := (len(src) / 3) * 3 128 for si < n { 129 // Convert 3x 8bit source bytes into 4 bytes 130 val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2]) 131 132 dst[di+0] = enc.encode[val>>18&0x3F] 133 dst[di+1] = enc.encode[val>>12&0x3F] 134 dst[di+2] = enc.encode[val>>6&0x3F] 135 dst[di+3] = enc.encode[val&0x3F] 136 137 si += 3 138 di += 4 139 } 140 141 remain := len(src) - si 142 if remain == 0 { 143 return 144 } 145 // Add the remaining small block 146 val := uint(src[si+0]) << 16 147 if remain == 2 { 148 val |= uint(src[si+1]) << 8 149 } 150 151 dst[di+0] = enc.encode[val>>18&0x3F] 152 dst[di+1] = enc.encode[val>>12&0x3F] 153 154 switch remain { 155 case 2: 156 dst[di+2] = enc.encode[val>>6&0x3F] 157 if enc.padChar != NoPadding { 158 dst[di+3] = byte(enc.padChar) 159 } 160 case 1: 161 if enc.padChar != NoPadding { 162 dst[di+2] = byte(enc.padChar) 163 dst[di+3] = byte(enc.padChar) 164 } 165 } 166 } 167 168 // EncodeToString returns the base64 encoding of src. 169 func (enc *Encoding) EncodeToString(src []byte) string { 170 buf := make([]byte, enc.EncodedLen(len(src))) 171 enc.Encode(buf, src) 172 return string(buf) 173 } 174 175 type encoder struct { 176 err error 177 enc *Encoding 178 w io.Writer 179 buf [3]byte // buffered data waiting to be encoded 180 nbuf int // number of bytes in buf 181 out [1024]byte // output buffer 182 } 183 184 func (e *encoder) Write(p []byte) (n int, err error) { 185 if e.err != nil { 186 return 0, e.err 187 } 188 189 // Leading fringe. 190 if e.nbuf > 0 { 191 var i int 192 for i = 0; i < len(p) && e.nbuf < 3; i++ { 193 e.buf[e.nbuf] = p[i] 194 e.nbuf++ 195 } 196 n += i 197 p = p[i:] 198 if e.nbuf < 3 { 199 return 200 } 201 e.enc.Encode(e.out[:], e.buf[:]) 202 if _, e.err = e.w.Write(e.out[:4]); e.err != nil { 203 return n, e.err 204 } 205 e.nbuf = 0 206 } 207 208 // Large interior chunks. 209 for len(p) >= 3 { 210 nn := len(e.out) / 4 * 3 211 if nn > len(p) { 212 nn = len(p) 213 nn -= nn % 3 214 } 215 e.enc.Encode(e.out[:], p[:nn]) 216 if _, e.err = e.w.Write(e.out[0 : nn/3*4]); e.err != nil { 217 return n, e.err 218 } 219 n += nn 220 p = p[nn:] 221 } 222 223 // Trailing fringe. 224 for i := 0; i < len(p); i++ { 225 e.buf[i] = p[i] 226 } 227 e.nbuf = len(p) 228 n += len(p) 229 return 230 } 231 232 // Close flushes any pending output from the encoder. 233 // It is an error to call Write after calling Close. 234 func (e *encoder) Close() error { 235 // If there's anything left in the buffer, flush it out 236 if e.err == nil && e.nbuf > 0 { 237 e.enc.Encode(e.out[:], e.buf[:e.nbuf]) 238 _, e.err = e.w.Write(e.out[:e.enc.EncodedLen(e.nbuf)]) 239 e.nbuf = 0 240 } 241 return e.err 242 } 243 244 // NewEncoder returns a new base64 stream encoder. Data written to 245 // the returned writer will be encoded using enc and then written to w. 246 // Base64 encodings operate in 4-byte blocks; when finished 247 // writing, the caller must Close the returned encoder to flush any 248 // partially written blocks. 249 func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser { 250 return &encoder{enc: enc, w: w} 251 } 252 253 // EncodedLen returns the length in bytes of the base64 encoding 254 // of an input buffer of length n. 255 func (enc *Encoding) EncodedLen(n int) int { 256 if enc.padChar == NoPadding { 257 return (n*8 + 5) / 6 // minimum # chars at 6 bits per char 258 } 259 return (n + 2) / 3 * 4 // minimum # 4-char quanta, 3 bytes each 260 } 261 262 /* 263 * Decoder 264 */ 265 266 type CorruptInputError int64 267 268 func (e CorruptInputError) Error() string { 269 return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10) 270 } 271 272 // decode is like Decode but returns an additional 'end' value, which 273 // indicates if end-of-message padding or a partial quantum was encountered 274 // and thus any additional data is an error. 275 func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { 276 var inIdx int 277 si := 0 278 279 // skip over newlines 280 for si < len(src) && (src[si] == '\n' || src[si] == '\r') { 281 si++ 282 } 283 284 for si < len(src) && !end { 285 // Decode quantum using the base64 alphabet 286 var dbuf [4]byte 287 dinc, dlen := 3, 4 288 289 for j := range dbuf { 290 if len(src) == si { 291 if enc.padChar != NoPadding || j < 2 { 292 return n, false, CorruptInputError(si - j) 293 } 294 dinc, dlen, end = j-1, j, true 295 break 296 } 297 in := src[si] 298 inIdx = si 299 300 si++ 301 // skip over newlines 302 for si < len(src) && (src[si] == '\n' || src[si] == '\r') { 303 si++ 304 } 305 306 if rune(in) == enc.padChar { 307 // We've reached the end and there's padding 308 switch j { 309 case 0, 1: 310 // incorrect padding 311 return n, false, CorruptInputError(inIdx) 312 case 2: 313 // "==" is expected, the first "=" is already consumed. 314 if si == len(src) { 315 // not enough padding 316 return n, false, CorruptInputError(len(src)) 317 } 318 if rune(src[si]) != enc.padChar { 319 // incorrect padding 320 return n, false, CorruptInputError(si - 1) 321 } 322 323 si++ 324 // skip over newlines 325 for si < len(src) && (src[si] == '\n' || src[si] == '\r') { 326 si++ 327 } 328 } 329 if si < len(src) { 330 // trailing garbage 331 err = CorruptInputError(si) 332 } 333 dinc, dlen, end = 3, j, true 334 break 335 } 336 dbuf[j] = enc.decodeMap[in] 337 if dbuf[j] == 0xFF { 338 return n, false, CorruptInputError(inIdx) 339 } 340 } 341 342 // Convert 4x 6bit source bytes into 3 bytes 343 val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3]) 344 dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16) 345 switch dlen { 346 case 4: 347 dst[2] = dbuf[2] 348 dbuf[2] = 0 349 fallthrough 350 case 3: 351 dst[1] = dbuf[1] 352 if enc.strict && dbuf[2] != 0 { 353 return n, end, CorruptInputError(si - 1) 354 } 355 dbuf[1] = 0 356 fallthrough 357 case 2: 358 dst[0] = dbuf[0] 359 if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) { 360 return n, end, CorruptInputError(si - 2) 361 } 362 } 363 dst = dst[dinc:] 364 n += dlen - 1 365 } 366 367 return n, end, err 368 } 369 370 // Decode decodes src using the encoding enc. It writes at most 371 // DecodedLen(len(src)) bytes to dst and returns the number of bytes 372 // written. If src contains invalid base64 data, it will return the 373 // number of bytes successfully written and CorruptInputError. 374 // New line characters (\r and \n) are ignored. 375 func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { 376 n, _, err = enc.decode(dst, src) 377 return 378 } 379 380 // DecodeString returns the bytes represented by the base64 string s. 381 func (enc *Encoding) DecodeString(s string) ([]byte, error) { 382 dbuf := make([]byte, enc.DecodedLen(len(s))) 383 n, _, err := enc.decode(dbuf, []byte(s)) 384 return dbuf[:n], err 385 } 386 387 type decoder struct { 388 err error 389 readErr error // error from r.Read 390 enc *Encoding 391 r io.Reader 392 end bool // saw end of message 393 buf [1024]byte // leftover input 394 nbuf int 395 out []byte // leftover decoded output 396 outbuf [1024 / 4 * 3]byte 397 } 398 399 func (d *decoder) Read(p []byte) (n int, err error) { 400 // Use leftover decoded output from last read. 401 if len(d.out) > 0 { 402 n = copy(p, d.out) 403 d.out = d.out[n:] 404 return n, nil 405 } 406 407 if d.err != nil { 408 return 0, d.err 409 } 410 411 // This code assumes that d.r strips supported whitespace ('\r' and '\n'). 412 413 // Refill buffer. 414 for d.nbuf < 4 && d.readErr == nil { 415 nn := len(p) / 3 * 4 416 if nn < 4 { 417 nn = 4 418 } 419 if nn > len(d.buf) { 420 nn = len(d.buf) 421 } 422 nn, d.readErr = d.r.Read(d.buf[d.nbuf:nn]) 423 d.nbuf += nn 424 } 425 426 if d.nbuf < 4 { 427 if d.enc.padChar == NoPadding && d.nbuf > 0 { 428 // Decode final fragment, without padding. 429 var nw int 430 nw, _, d.err = d.enc.decode(d.outbuf[:], d.buf[:d.nbuf]) 431 d.nbuf = 0 432 d.end = true 433 d.out = d.outbuf[:nw] 434 n = copy(p, d.out) 435 d.out = d.out[n:] 436 if n > 0 || len(p) == 0 && len(d.out) > 0 { 437 return n, nil 438 } 439 if d.err != nil { 440 return 0, d.err 441 } 442 } 443 d.err = d.readErr 444 if d.err == io.EOF && d.nbuf > 0 { 445 d.err = io.ErrUnexpectedEOF 446 } 447 return 0, d.err 448 } 449 450 // Decode chunk into p, or d.out and then p if p is too small. 451 nr := d.nbuf / 4 * 4 452 nw := d.nbuf / 4 * 3 453 if nw > len(p) { 454 nw, d.end, d.err = d.enc.decode(d.outbuf[:], d.buf[:nr]) 455 d.out = d.outbuf[:nw] 456 n = copy(p, d.out) 457 d.out = d.out[n:] 458 } else { 459 n, d.end, d.err = d.enc.decode(p, d.buf[:nr]) 460 } 461 d.nbuf -= nr 462 copy(d.buf[:d.nbuf], d.buf[nr:]) 463 return n, d.err 464 } 465 466 type newlineFilteringReader struct { 467 wrapped io.Reader 468 } 469 470 func (r *newlineFilteringReader) Read(p []byte) (int, error) { 471 n, err := r.wrapped.Read(p) 472 for n > 0 { 473 offset := 0 474 for i, b := range p[:n] { 475 if b != '\r' && b != '\n' { 476 if i != offset { 477 p[offset] = b 478 } 479 offset++ 480 } 481 } 482 if offset > 0 { 483 return offset, err 484 } 485 // Previous buffer entirely whitespace, read again 486 n, err = r.wrapped.Read(p) 487 } 488 return n, err 489 } 490 491 // NewDecoder constructs a new base64 stream decoder. 492 func NewDecoder(enc *Encoding, r io.Reader) io.Reader { 493 return &decoder{enc: enc, r: &newlineFilteringReader{r}} 494 } 495 496 // DecodedLen returns the maximum length in bytes of the decoded data 497 // corresponding to n bytes of base64-encoded data. 498 func (enc *Encoding) DecodedLen(n int) int { 499 if enc.padChar == NoPadding { 500 // Unpadded data may end with partial block of 2-3 characters. 501 return n * 6 / 8 502 } 503 // Padded base64 should always be a multiple of 4 characters in length. 504 return n / 4 * 3 505 }