github.com/ltltlt/go-source-code@v0.0.0-20190830023027-95be009773aa/encoding/base64/base64.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package base64 implements base64 encoding as specified by RFC 4648.
     6  package base64
     7  
     8  import (
     9  	"encoding/binary"
    10  	"io"
    11  	"strconv"
    12  )
    13  
    14  /*
    15   * Encodings
    16   */
    17  
    18  // An Encoding is a radix 64 encoding/decoding scheme, defined by a
    19  // 64-character alphabet. The most common encoding is the "base64"
    20  // encoding defined in RFC 4648 and used in MIME (RFC 2045) and PEM
    21  // (RFC 1421).  RFC 4648 also defines an alternate encoding, which is
    22  // the standard encoding with - and _ substituted for + and /(for url).
    23  type Encoding struct {
    24  	encode    [64]byte
    25  	decodeMap [256]byte // use array and not map to save space
    26  	padChar   rune
    27  	strict    bool
    28  }
    29  
    30  const (
    31  	StdPadding rune = '=' // Standard padding character
    32  	NoPadding  rune = -1  // No padding
    33  )
    34  
    35  const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
    36  const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
    37  
    38  // NewEncoding returns a new padded Encoding defined by the given alphabet,
    39  // which must be a 64-byte string that does not contain the padding character
    40  // or CR / LF ('\r', '\n').
    41  // The resulting Encoding uses the default padding character ('='),
    42  // which may be changed or disabled via WithPadding.
    43  // 支持自定义编码转换,只要传入的encoder长度为64
    44  func NewEncoding(encoder string) *Encoding {
    45  	if len(encoder) != 64 {
    46  		panic("encoding alphabet is not 64-bytes long")
    47  	}
    48  	for i := 0; i < len(encoder); i++ {
    49  		if encoder[i] == '\n' || encoder[i] == '\r' {
    50  			panic("encoding alphabet contains newline character")
    51  		}
    52  	}
    53  
    54  	e := new(Encoding)
    55  	e.padChar = StdPadding
    56  	copy(e.encode[:], encoder) // have to copy, since encode is array
    57  
    58  	for i := 0; i < len(e.decodeMap); i++ {
    59  		e.decodeMap[i] = 0xFF
    60  	}
    61  	for i := 0; i < len(encoder); i++ {
    62  		e.decodeMap[encoder[i]] = byte(i)
    63  	}
    64  	return e
    65  }
    66  
    67  // WithPadding creates a new encoding identical to enc except
    68  // with a specified padding character, or NoPadding to disable padding.
    69  // The padding character must not be '\r' or '\n', must not
    70  // be contained in the encoding's alphabet and must be a rune equal or
    71  // below '\xff'.
    72  func (enc Encoding) WithPadding(padding rune) *Encoding {
    73  	if padding == '\r' || padding == '\n' || padding > 0xff {
    74  		panic("invalid padding")
    75  	}
    76  
    77  	for i := 0; i < len(enc.encode); i++ {
    78  		if rune(enc.encode[i]) == padding {
    79  			panic("padding contained in alphabet")
    80  		}
    81  	}
    82  
    83  	enc.padChar = padding
    84  	return &enc
    85  }
    86  
    87  // Strict creates a new encoding identical to enc except with
    88  // strict decoding enabled. In this mode, the decoder requires that
    89  // trailing padding bits are zero, as described in RFC 4648 section 3.5.
    90  // 严格模式解码要求尾部填充位为0(不可省略=?)
    91  func (enc Encoding) Strict() *Encoding {
    92  	enc.strict = true
    93  	return &enc
    94  }
    95  
    96  // StdEncoding is the standard base64 encoding, as defined in
    97  // RFC 4648.
    98  var StdEncoding = NewEncoding(encodeStd)
    99  
   100  // URLEncoding is the alternate base64 encoding defined in RFC 4648.
   101  // It is typically used in URLs and file names.
   102  var URLEncoding = NewEncoding(encodeURL)
   103  
   104  // RawStdEncoding is the standard raw, unpadded base64 encoding,
   105  // as defined in RFC 4648 section 3.2.
   106  // This is the same as StdEncoding but omits padding characters.
   107  var RawStdEncoding = StdEncoding.WithPadding(NoPadding)
   108  
   109  // RawURLEncoding is the unpadded alternate base64 encoding defined in RFC 4648.
   110  // It is typically used in URLs and file names.
   111  // This is the same as URLEncoding but omits padding characters.
   112  var RawURLEncoding = URLEncoding.WithPadding(NoPadding)
   113  
   114  /*
   115   * Encoder
   116   */
   117  
   118  // Encode encodes src using the encoding enc, writing
   119  // EncodedLen(len(src)) bytes to dst.
   120  //
   121  // The encoding pads the output to a multiple of 4 bytes,
   122  // so Encode is not appropriate for use on individual blocks
   123  // of a large data stream. Use NewEncoder() instead.
   124  // 不进行append处理,dst完全使用index访问,当dst长度不足时可能panic: index out of range
   125  // 这样速度更快,缺点是需要提前计算dst的长度并分配
   126  func (enc *Encoding) Encode(dst, src []byte) {
   127  	if len(src) == 0 {
   128  		return
   129  	}
   130  
   131  	di, si := 0, 0 // destinition pointer, source pointer
   132  	n := (len(src) / 3) * 3
   133  	for si < n {
   134  		// Convert 3x 8bit source bytes into 4 bytes
   135  		// 3字节整数存入四字节
   136  		val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2])
   137  
   138  		dst[di+0] = enc.encode[val>>18&0x3F]
   139  		dst[di+1] = enc.encode[val>>12&0x3F]
   140  		dst[di+2] = enc.encode[val>>6&0x3F]
   141  		dst[di+3] = enc.encode[val&0x3F]
   142  
   143  		si += 3
   144  		di += 4
   145  	}
   146  
   147  	remain := len(src) - si // 多出的字节, 1 or 2
   148  	if remain == 0 {
   149  		return
   150  	}
   151  	// Add the remaining small block
   152  	val := uint(src[si+0]) << 16
   153  	if remain == 2 {
   154  		val |= uint(src[si+1]) << 8
   155  	}
   156  
   157  	dst[di+0] = enc.encode[val>>18&0x3F]
   158  	dst[di+1] = enc.encode[val>>12&0x3F]
   159  
   160  	switch remain {
   161  	case 2:
   162  		dst[di+2] = enc.encode[val>>6&0x3F]
   163  		if enc.padChar != NoPadding {
   164  			dst[di+3] = byte(enc.padChar)
   165  		}
   166  	case 1:
   167  		if enc.padChar != NoPadding {
   168  			dst[di+2] = byte(enc.padChar)
   169  			dst[di+3] = byte(enc.padChar)
   170  		}
   171  	}
   172  }
   173  
   174  // EncodeToString returns the base64 encoding of src.
   175  // 一般直接call this, 而不是直接使用Encode
   176  func (enc *Encoding) EncodeToString(src []byte) string {
   177  	buf := make([]byte, enc.EncodedLen(len(src)))
   178  	enc.Encode(buf, src)
   179  	return string(buf)
   180  }
   181  
   182  // EncodedLen returns the length in bytes of the base64 encoding
   183  // of an input buffer of length n.
   184  func (enc *Encoding) EncodedLen(n int) int {
   185  	if enc.padChar == NoPadding {
   186  		return (n*8 + 5) / 6 // minimum # chars at 6 bits per char
   187  	}
   188  	return (n + 2) / 3 * 4 // minimum # 4-char quanta, 3 bytes each
   189  }
   190  
   191  // encoder is streaming base64 encoder
   192  type encoder struct {
   193  	err  error
   194  	enc  *Encoding
   195  	w    io.Writer
   196  	buf  [3]byte    // buffered data waiting to be encoded
   197  	nbuf int        // number of bytes in buf
   198  	out  [1024]byte // output buffer
   199  }
   200  
   201  // NewEncoder returns a new base64 stream encoder. Data written to
   202  // the returned writer will be encoded using enc and then written to w.
   203  // Base64 encodings operate in 4-byte blocks; when finished
   204  // writing, the caller must Close the returned encoder to flush any
   205  // partially written blocks.
   206  // 结束写后Close是必须操作,否则buf可能有残留数据未flush
   207  func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser {
   208  	return &encoder{enc: enc, w: w}
   209  }
   210  
   211  func (e *encoder) Write(p []byte) (n int, err error) {
   212  	if e.err != nil {
   213  		return 0, e.err
   214  	}
   215  
   216  	// Leading fringe.
   217  	// “起始外围”
   218  	if e.nbuf > 0 {
   219  		var i int
   220  		for i = 0; i < len(p) && e.nbuf < 3; i++ {
   221  			e.buf[e.nbuf] = p[i]
   222  			e.nbuf++
   223  		}
   224  		n += i
   225  		p = p[i:]
   226  		if e.nbuf < 3 {
   227  			return
   228  		}
   229  		e.enc.Encode(e.out[:], e.buf[:])
   230  		if _, e.err = e.w.Write(e.out[:4]); e.err != nil {
   231  			return n, e.err
   232  		}
   233  		e.nbuf = 0
   234  	}
   235  
   236  	// Large interior chunks.
   237  	// “内陆”
   238  	for len(p) >= 3 {
   239  		nn := len(e.out) / 4 * 3 // 一次能处理多少字节
   240  		if nn > len(p) {
   241  			nn = len(p)
   242  			nn -= nn % 3
   243  		}
   244  		e.enc.Encode(e.out[:], p[:nn])
   245  		if _, e.err = e.w.Write(e.out[0 : nn/3*4]); e.err != nil {
   246  			return n, e.err
   247  		}
   248  		n += nn
   249  		p = p[nn:]
   250  	}
   251  
   252  	// Trailing fringe.
   253  	// “尾部外围”
   254  	for i := 0; i < len(p); i++ {
   255  		e.buf[i] = p[i]
   256  	}
   257  	e.nbuf = len(p)
   258  	n += len(p)
   259  	return
   260  }
   261  
   262  // Close flushes any pending output from the encoder.
   263  // It is an error to call Write after calling Close.
   264  func (e *encoder) Close() error {
   265  	// If there's anything left in the buffer, flush it out
   266  	if e.err == nil && e.nbuf > 0 {
   267  		e.enc.Encode(e.out[:], e.buf[:e.nbuf])
   268  		_, e.err = e.w.Write(e.out[:e.enc.EncodedLen(e.nbuf)])
   269  		e.nbuf = 0
   270  	}
   271  	return e.err
   272  }
   273  
   274  /*
   275   * Decoder
   276   */
   277  
   278  type CorruptInputError int64
   279  
   280  func (e CorruptInputError) Error() string {
   281  	return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10)
   282  }
   283  
   284  // decodeQuantum decodes up to 4 base64 bytes. It takes for parameters
   285  // the destination buffer dst, the source buffer src and an index in the
   286  // source buffer si.
   287  // It returns the number of bytes read from src, the number of bytes written
   288  // to dst, and an error, if any.
   289  func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err error) {
   290  	// Decode quantum using the base64 alphabet
   291  	var dbuf [4]byte
   292  	dinc, dlen := 3, 4
   293  
   294  	for j := 0; j < len(dbuf); j++ {
   295  		if len(src) == si {
   296  			switch {
   297  			case j == 0:
   298  				return si, 0, nil
   299  			case j == 1, enc.padChar != NoPadding:
   300  				return si, 0, CorruptInputError(si - j)
   301  			}
   302  			dinc, dlen = j-1, j
   303  			break
   304  		}
   305  		in := src[si]
   306  		si++
   307  
   308  		out := enc.decodeMap[in]
   309  		if out != 0xff {
   310  			dbuf[j] = out
   311  			continue
   312  		}
   313  
   314  		if in == '\n' || in == '\r' {
   315  			j--
   316  			continue
   317  		}
   318  
   319  		if rune(in) != enc.padChar {
   320  			return si, 0, CorruptInputError(si - 1)
   321  		}
   322  
   323  		// We've reached the end and there's padding
   324  		switch j {
   325  		case 0, 1:
   326  			// incorrect padding
   327  			return si, 0, CorruptInputError(si - 1)
   328  		case 2:
   329  			// "==" is expected, the first "=" is already consumed.
   330  			// skip over newlines
   331  			for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
   332  				si++
   333  			}
   334  			if si == len(src) {
   335  				// not enough padding
   336  				return si, 0, CorruptInputError(len(src))
   337  			}
   338  			if rune(src[si]) != enc.padChar {
   339  				// incorrect padding
   340  				return si, 0, CorruptInputError(si - 1)
   341  			}
   342  
   343  			si++
   344  		}
   345  
   346  		// skip over newlines
   347  		for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
   348  			si++
   349  		}
   350  		if si < len(src) {
   351  			// trailing garbage
   352  			err = CorruptInputError(si)
   353  		}
   354  		dinc, dlen = 3, j
   355  		break
   356  	}
   357  
   358  	// Convert 4x 6bit source bytes into 3 bytes
   359  	val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
   360  	dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
   361  	switch dlen {
   362  	case 4:
   363  		dst[2] = dbuf[2]
   364  		dbuf[2] = 0
   365  		fallthrough
   366  	case 3:
   367  		dst[1] = dbuf[1]
   368  		if enc.strict && dbuf[2] != 0 {
   369  			return si, 0, CorruptInputError(si - 1)
   370  		}
   371  		dbuf[1] = 0
   372  		fallthrough
   373  	case 2:
   374  		dst[0] = dbuf[0]
   375  		if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
   376  			return si, 0, CorruptInputError(si - 2)
   377  		}
   378  	}
   379  	dst = dst[dinc:]
   380  
   381  	return si, dlen - 1, err
   382  }
   383  
   384  // DecodeString returns the bytes represented by the base64 string s.
   385  func (enc *Encoding) DecodeString(s string) ([]byte, error) {
   386  	dbuf := make([]byte, enc.DecodedLen(len(s)))
   387  	n, err := enc.Decode(dbuf, []byte(s))
   388  	return dbuf[:n], err
   389  }
   390  
   391  type decoder struct {
   392  	err     error
   393  	readErr error // error from r.Read
   394  	enc     *Encoding
   395  	r       io.Reader
   396  	buf     [1024]byte // leftover input
   397  	nbuf    int
   398  	out     []byte // leftover decoded output
   399  	outbuf  [1024 / 4 * 3]byte
   400  }
   401  
   402  func (d *decoder) Read(p []byte) (n int, err error) {
   403  	// Use leftover decoded output from last read.
   404  	if len(d.out) > 0 {
   405  		n = copy(p, d.out)
   406  		d.out = d.out[n:]
   407  		return n, nil
   408  	}
   409  
   410  	if d.err != nil {
   411  		return 0, d.err
   412  	}
   413  
   414  	// This code assumes that d.r strips supported whitespace ('\r' and '\n').
   415  
   416  	// Refill buffer.
   417  	for d.nbuf < 4 && d.readErr == nil {
   418  		nn := len(p) / 3 * 4
   419  		if nn < 4 {
   420  			nn = 4
   421  		}
   422  		if nn > len(d.buf) {
   423  			nn = len(d.buf)
   424  		}
   425  		nn, d.readErr = d.r.Read(d.buf[d.nbuf:nn])
   426  		d.nbuf += nn
   427  	}
   428  
   429  	if d.nbuf < 4 {
   430  		if d.enc.padChar == NoPadding && d.nbuf > 0 {
   431  			// Decode final fragment, without padding.
   432  			var nw int
   433  			nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:d.nbuf])
   434  			d.nbuf = 0
   435  			d.out = d.outbuf[:nw]
   436  			n = copy(p, d.out)
   437  			d.out = d.out[n:]
   438  			if n > 0 || len(p) == 0 && len(d.out) > 0 {
   439  				return n, nil
   440  			}
   441  			if d.err != nil {
   442  				return 0, d.err
   443  			}
   444  		}
   445  		d.err = d.readErr
   446  		if d.err == io.EOF && d.nbuf > 0 {
   447  			d.err = io.ErrUnexpectedEOF
   448  		}
   449  		return 0, d.err
   450  	}
   451  
   452  	// Decode chunk into p, or d.out and then p if p is too small.
   453  	nr := d.nbuf / 4 * 4
   454  	nw := d.nbuf / 4 * 3
   455  	if nw > len(p) {
   456  		nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:nr])
   457  		d.out = d.outbuf[:nw]
   458  		n = copy(p, d.out)
   459  		d.out = d.out[n:]
   460  	} else {
   461  		n, d.err = d.enc.Decode(p, d.buf[:nr])
   462  	}
   463  	d.nbuf -= nr
   464  	copy(d.buf[:d.nbuf], d.buf[nr:])
   465  	return n, d.err
   466  }
   467  
   468  // Decode decodes src using the encoding enc. It writes at most
   469  // DecodedLen(len(src)) bytes to dst and returns the number of bytes
   470  // written. If src contains invalid base64 data, it will return the
   471  // number of bytes successfully written and CorruptInputError.
   472  // New line characters (\r and \n) are ignored.
   473  func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
   474  	if len(src) == 0 {
   475  		return 0, nil
   476  	}
   477  
   478  	si := 0
   479  	ilen := len(src)
   480  	olen := len(dst)
   481  	for strconv.IntSize >= 64 && ilen-si >= 8 && olen-n >= 8 {
   482  		if ok := enc.decode64(dst[n:], src[si:]); ok {
   483  			n += 6
   484  			si += 8
   485  		} else {
   486  			var ninc int
   487  			si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   488  			n += ninc
   489  			if err != nil {
   490  				return n, err
   491  			}
   492  		}
   493  	}
   494  
   495  	for ilen-si >= 4 && olen-n >= 4 {
   496  		if ok := enc.decode32(dst[n:], src[si:]); ok {
   497  			n += 3
   498  			si += 4
   499  		} else {
   500  			var ninc int
   501  			si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   502  			n += ninc
   503  			if err != nil {
   504  				return n, err
   505  			}
   506  		}
   507  	}
   508  
   509  	for si < len(src) {
   510  		var ninc int
   511  		si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   512  		n += ninc
   513  		if err != nil {
   514  			return n, err
   515  		}
   516  	}
   517  	return n, err
   518  }
   519  
   520  // decode32 tries to decode 4 base64 char into 3 bytes.
   521  // len(dst) and len(src) must both be >= 4.
   522  // Returns true if decode succeeded.
   523  // 保证解码中间出错不影响dst, 只有src中的4个byte对应的字符均不是0xff时才写dst
   524  func (enc *Encoding) decode32(dst, src []byte) bool {
   525  	var dn, n uint32
   526  	if n = uint32(enc.decodeMap[src[0]]); n == 0xff {
   527  		return false
   528  	}
   529  	dn |= n << 26
   530  	if n = uint32(enc.decodeMap[src[1]]); n == 0xff {
   531  		return false
   532  	}
   533  	dn |= n << 20
   534  	if n = uint32(enc.decodeMap[src[2]]); n == 0xff {
   535  		return false
   536  	}
   537  	dn |= n << 14
   538  	if n = uint32(enc.decodeMap[src[3]]); n == 0xff {
   539  		return false
   540  	}
   541  	dn |= n << 8
   542  
   543  	binary.BigEndian.PutUint32(dst, dn)
   544  	return true
   545  }
   546  
   547  // decode64 tries to decode 8 base64 char into 6 bytes.
   548  // len(dst) and len(src) must both be >= 8.
   549  // Returns true if decode succeeded.
   550  func (enc *Encoding) decode64(dst, src []byte) bool {
   551  	var dn, n uint64
   552  	if n = uint64(enc.decodeMap[src[0]]); n == 0xff {
   553  		return false
   554  	}
   555  	dn |= n << 58
   556  	if n = uint64(enc.decodeMap[src[1]]); n == 0xff {
   557  		return false
   558  	}
   559  	dn |= n << 52
   560  	if n = uint64(enc.decodeMap[src[2]]); n == 0xff {
   561  		return false
   562  	}
   563  	dn |= n << 46
   564  	if n = uint64(enc.decodeMap[src[3]]); n == 0xff {
   565  		return false
   566  	}
   567  	dn |= n << 40
   568  	if n = uint64(enc.decodeMap[src[4]]); n == 0xff {
   569  		return false
   570  	}
   571  	dn |= n << 34
   572  	if n = uint64(enc.decodeMap[src[5]]); n == 0xff {
   573  		return false
   574  	}
   575  	dn |= n << 28
   576  	if n = uint64(enc.decodeMap[src[6]]); n == 0xff {
   577  		return false
   578  	}
   579  	dn |= n << 22
   580  	if n = uint64(enc.decodeMap[src[7]]); n == 0xff {
   581  		return false
   582  	}
   583  	dn |= n << 16
   584  
   585  	binary.BigEndian.PutUint64(dst, dn)
   586  	return true
   587  }
   588  
   589  type newlineFilteringReader struct {
   590  	wrapped io.Reader
   591  }
   592  
   593  func (r *newlineFilteringReader) Read(p []byte) (int, error) {
   594  	n, err := r.wrapped.Read(p)
   595  	for n > 0 {
   596  		offset := 0
   597  		for i, b := range p[:n] { // left shift, watch closely
   598  			if b != '\r' && b != '\n' {
   599  				if i != offset {
   600  					p[offset] = b
   601  				}
   602  				offset++
   603  			}
   604  		}
   605  		if offset > 0 {
   606  			return offset, err
   607  		}
   608  		// Previous buffer entirely LR or CR, read again
   609  		n, err = r.wrapped.Read(p)
   610  	}
   611  	return n, err
   612  }
   613  
   614  // NewDecoder constructs a new base64 stream decoder.
   615  // 其会自动过滤\n和\r
   616  func NewDecoder(enc *Encoding, r io.Reader) io.Reader {
   617  	return &decoder{enc: enc, r: &newlineFilteringReader{r}}
   618  }
   619  
   620  // DecodedLen returns the maximum length in bytes of the decoded data
   621  // corresponding to n bytes of base64-encoded data.
   622  func (enc *Encoding) DecodedLen(n int) int {
   623  	if enc.padChar == NoPadding {
   624  		// Unpadded data may end with partial block of 2-3 characters.
   625  		return n * 6 / 8
   626  	}
   627  	// Padded base64 should always be a multiple of 4 characters in length.
   628  	return n / 4 * 3
   629  }