github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/gnovm/stdlibs/encoding/base64/base64.gno (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package base64 implements base64 encoding as specified by RFC 4648.
     6  package base64
     7  
     8  import (
     9  	"encoding/binary"
    10  	"io"
    11  	"strconv"
    12  )
    13  
    14  /*
    15   * Encodings
    16   */
    17  
    18  // An Encoding is a radix 64 encoding/decoding scheme, defined by a
    19  // 64-character alphabet. The most common encoding is the "base64"
    20  // encoding defined in RFC 4648 and used in MIME (RFC 2045) and PEM
    21  // (RFC 1421).  RFC 4648 also defines an alternate encoding, which is
    22  // the standard encoding with - and _ substituted for + and /.
    23  type Encoding struct {
    24  	encode    [64]byte
    25  	decodeMap [256]byte
    26  	padChar   rune
    27  	strict    bool
    28  }
    29  
    30  const (
    31  	StdPadding rune = '=' // Standard padding character
    32  	NoPadding  rune = -1  // No padding
    33  )
    34  
    35  const (
    36  	encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
    37  	encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
    38  )
    39  
    40  // NewEncoding returns a new padded Encoding defined by the given alphabet,
    41  // which must be a 64-byte string that does not contain the padding character
    42  // or CR / LF ('\r', '\n').
    43  // The resulting Encoding uses the default padding character ('='),
    44  // which may be changed or disabled via WithPadding.
    45  func NewEncoding(encoder string) *Encoding {
    46  	if len(encoder) != 64 {
    47  		panic("encoding alphabet is not 64-bytes long")
    48  	}
    49  	for i := 0; i < len(encoder); i++ {
    50  		if encoder[i] == '\n' || encoder[i] == '\r' {
    51  			panic("encoding alphabet contains newline character")
    52  		}
    53  	}
    54  
    55  	e := new(Encoding)
    56  	e.padChar = StdPadding
    57  	copy(e.encode[:], encoder)
    58  
    59  	for i := 0; i < len(e.decodeMap); i++ {
    60  		e.decodeMap[i] = 0xFF
    61  	}
    62  	for i := 0; i < len(encoder); i++ {
    63  		e.decodeMap[encoder[i]] = byte(i)
    64  	}
    65  	return e
    66  }
    67  
    68  // WithPadding creates a new encoding identical to enc except
    69  // with a specified padding character, or NoPadding to disable padding.
    70  // The padding character must not be '\r' or '\n', must not
    71  // be contained in the encoding's alphabet and must be a rune equal or
    72  // below '\xff'.
    73  func (enc Encoding) WithPadding(padding rune) *Encoding {
    74  	if padding == '\r' || padding == '\n' || padding > 0xff {
    75  		panic("invalid padding")
    76  	}
    77  
    78  	for i := 0; i < len(enc.encode); i++ {
    79  		if rune(enc.encode[i]) == padding {
    80  			panic("padding contained in alphabet")
    81  		}
    82  	}
    83  
    84  	enc.padChar = padding
    85  	return &enc
    86  }
    87  
    88  // Strict creates a new encoding identical to enc except with
    89  // strict decoding enabled. In this mode, the decoder requires that
    90  // trailing padding bits are zero, as described in RFC 4648 section 3.5.
    91  //
    92  // Note that the input is still malleable, as new line characters
    93  // (CR and LF) are still ignored.
    94  func (enc Encoding) Strict() *Encoding {
    95  	enc.strict = true
    96  	return &enc
    97  }
    98  
    99  // StdEncoding is the standard base64 encoding, as defined in
   100  // RFC 4648.
   101  var StdEncoding = NewEncoding(encodeStd)
   102  
   103  // URLEncoding is the alternate base64 encoding defined in RFC 4648.
   104  // It is typically used in URLs and file names.
   105  var URLEncoding = NewEncoding(encodeURL)
   106  
   107  // RawStdEncoding is the standard raw, unpadded base64 encoding,
   108  // as defined in RFC 4648 section 3.2.
   109  // This is the same as StdEncoding but omits padding characters.
   110  var RawStdEncoding = StdEncoding.WithPadding(NoPadding)
   111  
   112  // RawURLEncoding is the unpadded alternate base64 encoding defined in RFC 4648.
   113  // It is typically used in URLs and file names.
   114  // This is the same as URLEncoding but omits padding characters.
   115  var RawURLEncoding = URLEncoding.WithPadding(NoPadding)
   116  
   117  /*
   118   * Encoder
   119   */
   120  
   121  // Encode encodes src using the encoding enc, writing
   122  // EncodedLen(len(src)) bytes to dst.
   123  //
   124  // The encoding pads the output to a multiple of 4 bytes,
   125  // so Encode is not appropriate for use on individual blocks
   126  // of a large data stream. Use NewEncoder() instead.
   127  func (enc *Encoding) Encode(dst, src []byte) {
   128  	if len(src) == 0 {
   129  		return
   130  	}
   131  	// enc is a pointer receiver, so the use of enc.encode within the hot
   132  	// loop below means a nil check at every operation. Lift that nil check
   133  	// outside of the loop to speed up the encoder.
   134  	_ = enc.encode
   135  
   136  	di, si := 0, 0
   137  	n := (len(src) / 3) * 3
   138  	for si < n {
   139  		// Convert 3x 8bit source bytes into 4 bytes
   140  		val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2])
   141  
   142  		dst[di+0] = enc.encode[val>>18&0x3F]
   143  		dst[di+1] = enc.encode[val>>12&0x3F]
   144  		dst[di+2] = enc.encode[val>>6&0x3F]
   145  		dst[di+3] = enc.encode[val&0x3F]
   146  
   147  		si += 3
   148  		di += 4
   149  	}
   150  
   151  	remain := len(src) - si
   152  	if remain == 0 {
   153  		return
   154  	}
   155  	// Add the remaining small block
   156  	val := uint(src[si+0]) << 16
   157  	if remain == 2 {
   158  		val |= uint(src[si+1]) << 8
   159  	}
   160  
   161  	dst[di+0] = enc.encode[val>>18&0x3F]
   162  	dst[di+1] = enc.encode[val>>12&0x3F]
   163  
   164  	switch remain {
   165  	case 2:
   166  		dst[di+2] = enc.encode[val>>6&0x3F]
   167  		if enc.padChar != NoPadding {
   168  			dst[di+3] = byte(enc.padChar)
   169  		}
   170  	case 1:
   171  		if enc.padChar != NoPadding {
   172  			dst[di+2] = byte(enc.padChar)
   173  			dst[di+3] = byte(enc.padChar)
   174  		}
   175  	}
   176  }
   177  
   178  // EncodeToString returns the base64 encoding of src.
   179  func (enc *Encoding) EncodeToString(src []byte) string {
   180  	buf := make([]byte, enc.EncodedLen(len(src)))
   181  	enc.Encode(buf, src)
   182  	return string(buf)
   183  }
   184  
   185  type encoder struct {
   186  	err  error
   187  	enc  *Encoding
   188  	w    io.Writer
   189  	buf  [3]byte    // buffered data waiting to be encoded
   190  	nbuf int        // number of bytes in buf
   191  	out  [1024]byte // output buffer
   192  }
   193  
   194  func (e *encoder) Write(p []byte) (n int, err error) {
   195  	if e.err != nil {
   196  		return 0, e.err
   197  	}
   198  
   199  	// Leading fringe.
   200  	if e.nbuf > 0 {
   201  		var i int
   202  		for i = 0; i < len(p) && e.nbuf < 3; i++ {
   203  			e.buf[e.nbuf] = p[i]
   204  			e.nbuf++
   205  		}
   206  		n += i
   207  		p = p[i:]
   208  		if e.nbuf < 3 {
   209  			return
   210  		}
   211  		e.enc.Encode(e.out[:], e.buf[:])
   212  		if _, e.err = e.w.Write(e.out[:4]); e.err != nil {
   213  			return n, e.err
   214  		}
   215  		e.nbuf = 0
   216  	}
   217  
   218  	// Large interior chunks.
   219  	for len(p) >= 3 {
   220  		nn := len(e.out) / 4 * 3
   221  		if nn > len(p) {
   222  			nn = len(p)
   223  			nn -= nn % 3
   224  		}
   225  		e.enc.Encode(e.out[:], p[:nn])
   226  		if _, e.err = e.w.Write(e.out[0 : nn/3*4]); e.err != nil {
   227  			return n, e.err
   228  		}
   229  		n += nn
   230  		p = p[nn:]
   231  	}
   232  
   233  	// Trailing fringe.
   234  	for i := 0; i < len(p); i++ {
   235  		e.buf[i] = p[i]
   236  	}
   237  	e.nbuf = len(p)
   238  	n += len(p)
   239  	return
   240  }
   241  
   242  // Close flushes any pending output from the encoder.
   243  // It is an error to call Write after calling Close.
   244  func (e *encoder) Close() error {
   245  	// If there's anything left in the buffer, flush it out
   246  	if e.err == nil && e.nbuf > 0 {
   247  		e.enc.Encode(e.out[:], e.buf[:e.nbuf])
   248  		_, e.err = e.w.Write(e.out[:e.enc.EncodedLen(e.nbuf)])
   249  		e.nbuf = 0
   250  	}
   251  	return e.err
   252  }
   253  
   254  // NewEncoder returns a new base64 stream encoder. Data written to
   255  // the returned writer will be encoded using enc and then written to w.
   256  // Base64 encodings operate in 4-byte blocks; when finished
   257  // writing, the caller must Close the returned encoder to flush any
   258  // partially written blocks.
   259  func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser {
   260  	return &encoder{enc: enc, w: w}
   261  }
   262  
   263  // EncodedLen returns the length in bytes of the base64 encoding
   264  // of an input buffer of length n.
   265  func (enc *Encoding) EncodedLen(n int) int {
   266  	if enc.padChar == NoPadding {
   267  		return (n*8 + 5) / 6 // minimum # chars at 6 bits per char
   268  	}
   269  	return (n + 2) / 3 * 4 // minimum # 4-char quanta, 3 bytes each
   270  }
   271  
   272  /*
   273   * Decoder
   274   */
   275  
   276  type CorruptInputError int64
   277  
   278  func (e CorruptInputError) Error() string {
   279  	return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10)
   280  }
   281  
   282  // decodeQuantum decodes up to 4 base64 bytes. The received parameters are
   283  // the destination buffer dst, the source buffer src and an index in the
   284  // source buffer si.
   285  // It returns the number of bytes read from src, the number of bytes written
   286  // to dst, and an error, if any.
   287  func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err error) {
   288  	// Decode quantum using the base64 alphabet
   289  	var dbuf [4]byte
   290  	dlen := 4
   291  
   292  	// Lift the nil check outside of the loop.
   293  	_ = enc.decodeMap
   294  
   295  	for j := 0; j < len(dbuf); j++ {
   296  		if len(src) == si {
   297  			switch {
   298  			case j == 0:
   299  				return si, 0, nil
   300  			case j == 1, enc.padChar != NoPadding:
   301  				return si, 0, CorruptInputError(si - j)
   302  			}
   303  			dlen = j
   304  			break
   305  		}
   306  		in := src[si]
   307  		si++
   308  
   309  		out := enc.decodeMap[in]
   310  		if out != 0xff {
   311  			dbuf[j] = out
   312  			continue
   313  		}
   314  
   315  		if in == '\n' || in == '\r' {
   316  			j--
   317  			continue
   318  		}
   319  
   320  		if rune(in) != enc.padChar {
   321  			return si, 0, CorruptInputError(si - 1)
   322  		}
   323  
   324  		// We've reached the end and there's padding
   325  		switch j {
   326  		case 0, 1:
   327  			// incorrect padding
   328  			return si, 0, CorruptInputError(si - 1)
   329  		case 2:
   330  			// "==" is expected, the first "=" is already consumed.
   331  			// skip over newlines
   332  			for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
   333  				si++
   334  			}
   335  			if si == len(src) {
   336  				// not enough padding
   337  				return si, 0, CorruptInputError(len(src))
   338  			}
   339  			if rune(src[si]) != enc.padChar {
   340  				// incorrect padding
   341  				return si, 0, CorruptInputError(si - 1)
   342  			}
   343  
   344  			si++
   345  		}
   346  
   347  		// skip over newlines
   348  		for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
   349  			si++
   350  		}
   351  		if si < len(src) {
   352  			// trailing garbage
   353  			err = CorruptInputError(si)
   354  		}
   355  		dlen = j
   356  		break
   357  	}
   358  
   359  	// Convert 4x 6bit source bytes into 3 bytes
   360  	val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
   361  	dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
   362  	switch dlen {
   363  	case 4:
   364  		dst[2] = dbuf[2]
   365  		dbuf[2] = 0
   366  		// XXX fallthrough not yet implemented
   367  		// fallthrough
   368  		dst[1] = dbuf[1]
   369  		if enc.strict && dbuf[2] != 0 {
   370  			return si, 0, CorruptInputError(si - 1)
   371  		}
   372  		dbuf[1] = 0
   373  		dst[0] = dbuf[0]
   374  		if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
   375  			return si, 0, CorruptInputError(si - 2)
   376  		}
   377  	case 3:
   378  		dst[1] = dbuf[1]
   379  		if enc.strict && dbuf[2] != 0 {
   380  			return si, 0, CorruptInputError(si - 1)
   381  		}
   382  		dbuf[1] = 0
   383  		// XXX fallthrough not yet implemented
   384  		// fallthrough
   385  		dst[0] = dbuf[0]
   386  		if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
   387  			return si, 0, CorruptInputError(si - 2)
   388  		}
   389  	case 2:
   390  		dst[0] = dbuf[0]
   391  		if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
   392  			return si, 0, CorruptInputError(si - 2)
   393  		}
   394  	}
   395  
   396  	return si, dlen - 1, err
   397  }
   398  
   399  // DecodeString returns the bytes represented by the base64 string s.
   400  func (enc *Encoding) DecodeString(s string) ([]byte, error) {
   401  	dbuf := make([]byte, enc.DecodedLen(len(s)))
   402  	n, err := enc.Decode(dbuf, []byte(s))
   403  	return dbuf[:n], err
   404  }
   405  
   406  type decoder struct {
   407  	err     error
   408  	readErr error // error from r.Read
   409  	enc     *Encoding
   410  	r       io.Reader
   411  	buf     [1024]byte // leftover input
   412  	nbuf    int
   413  	out     []byte // leftover decoded output
   414  	outbuf  [1024 / 4 * 3]byte
   415  }
   416  
   417  func (d *decoder) Read(p []byte) (n int, err error) {
   418  	// Use leftover decoded output from last read.
   419  	if len(d.out) > 0 {
   420  		n = copy(p, d.out)
   421  		d.out = d.out[n:]
   422  		return n, nil
   423  	}
   424  
   425  	if d.err != nil {
   426  		return 0, d.err
   427  	}
   428  
   429  	// This code assumes that d.r strips supported whitespace ('\r' and '\n').
   430  
   431  	// Refill buffer.
   432  	for d.nbuf < 4 && d.readErr == nil {
   433  		nn := len(p) / 3 * 4
   434  		if nn < 4 {
   435  			nn = 4
   436  		}
   437  		if nn > len(d.buf) {
   438  			nn = len(d.buf)
   439  		}
   440  		nn, d.readErr = d.r.Read(d.buf[d.nbuf:nn])
   441  		d.nbuf += nn
   442  	}
   443  
   444  	if d.nbuf < 4 {
   445  		if d.enc.padChar == NoPadding && d.nbuf > 0 {
   446  			// Decode final fragment, without padding.
   447  			var nw int
   448  			nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:d.nbuf])
   449  			d.nbuf = 0
   450  			d.out = d.outbuf[:nw]
   451  			n = copy(p, d.out)
   452  			d.out = d.out[n:]
   453  			if n > 0 || len(p) == 0 && len(d.out) > 0 {
   454  				return n, nil
   455  			}
   456  			if d.err != nil {
   457  				return 0, d.err
   458  			}
   459  		}
   460  		d.err = d.readErr
   461  		if d.err == io.EOF && d.nbuf > 0 {
   462  			d.err = io.ErrUnexpectedEOF
   463  		}
   464  		return 0, d.err
   465  	}
   466  
   467  	// Decode chunk into p, or d.out and then p if p is too small.
   468  	nr := d.nbuf / 4 * 4
   469  	nw := d.nbuf / 4 * 3
   470  	if nw > len(p) {
   471  		nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:nr])
   472  		d.out = d.outbuf[:nw]
   473  		n = copy(p, d.out)
   474  		d.out = d.out[n:]
   475  	} else {
   476  		n, d.err = d.enc.Decode(p, d.buf[:nr])
   477  	}
   478  	d.nbuf -= nr
   479  	copy(d.buf[:d.nbuf], d.buf[nr:])
   480  	return n, d.err
   481  }
   482  
   483  // Decode decodes src using the encoding enc. It writes at most
   484  // DecodedLen(len(src)) bytes to dst and returns the number of bytes
   485  // written. If src contains invalid base64 data, it will return the
   486  // number of bytes successfully written and CorruptInputError.
   487  // New line characters (\r and \n) are ignored.
   488  func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
   489  	if len(src) == 0 {
   490  		return 0, nil
   491  	}
   492  
   493  	// Lift the nil check outside of the loop. enc.decodeMap is directly
   494  	// used later in this function, to let the compiler know that the
   495  	// receiver can't be nil.
   496  	_ = enc.decodeMap
   497  
   498  	si := 0
   499  	// XXX: Go source checks for strconv.IntSize >= 64 as well in this loop.
   500  	// In the gnovm, int size is always guaranteed to be 64 bits.
   501  	for len(src)-si >= 8 && len(dst)-n >= 8 {
   502  		src2 := src[si : si+8]
   503  		if dn, ok := assemble64(
   504  			enc.decodeMap[src2[0]],
   505  			enc.decodeMap[src2[1]],
   506  			enc.decodeMap[src2[2]],
   507  			enc.decodeMap[src2[3]],
   508  			enc.decodeMap[src2[4]],
   509  			enc.decodeMap[src2[5]],
   510  			enc.decodeMap[src2[6]],
   511  			enc.decodeMap[src2[7]],
   512  		); ok {
   513  			binary.BigEndian.PutUint64(dst[n:], dn)
   514  			n += 6
   515  			si += 8
   516  		} else {
   517  			var ninc int
   518  			si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   519  			n += ninc
   520  			if err != nil {
   521  				return n, err
   522  			}
   523  		}
   524  	}
   525  
   526  	for len(src)-si >= 4 && len(dst)-n >= 4 {
   527  		src2 := src[si : si+4]
   528  		if dn, ok := assemble32(
   529  			enc.decodeMap[src2[0]],
   530  			enc.decodeMap[src2[1]],
   531  			enc.decodeMap[src2[2]],
   532  			enc.decodeMap[src2[3]],
   533  		); ok {
   534  			binary.BigEndian.PutUint32(dst[n:], dn)
   535  			n += 3
   536  			si += 4
   537  		} else {
   538  			var ninc int
   539  			si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   540  			n += ninc
   541  			if err != nil {
   542  				return n, err
   543  			}
   544  		}
   545  	}
   546  
   547  	for si < len(src) {
   548  		var ninc int
   549  		si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
   550  		n += ninc
   551  		if err != nil {
   552  			return n, err
   553  		}
   554  	}
   555  	return n, err
   556  }
   557  
   558  // assemble32 assembles 4 base64 digits into 3 bytes.
   559  // Each digit comes from the decode map, and will be 0xff
   560  // if it came from an invalid character.
   561  func assemble32(n1, n2, n3, n4 byte) (dn uint32, ok bool) {
   562  	// Check that all the digits are valid. If any of them was 0xff, their
   563  	// bitwise OR will be 0xff.
   564  	if n1|n2|n3|n4 == 0xff {
   565  		return 0, false
   566  	}
   567  	return uint32(n1)<<26 |
   568  			uint32(n2)<<20 |
   569  			uint32(n3)<<14 |
   570  			uint32(n4)<<8,
   571  		true
   572  }
   573  
   574  // assemble64 assembles 8 base64 digits into 6 bytes.
   575  // Each digit comes from the decode map, and will be 0xff
   576  // if it came from an invalid character.
   577  func assemble64(n1, n2, n3, n4, n5, n6, n7, n8 byte) (dn uint64, ok bool) {
   578  	// Check that all the digits are valid. If any of them was 0xff, their
   579  	// bitwise OR will be 0xff.
   580  	if n1|n2|n3|n4|n5|n6|n7|n8 == 0xff {
   581  		return 0, false
   582  	}
   583  	return uint64(n1)<<58 |
   584  			uint64(n2)<<52 |
   585  			uint64(n3)<<46 |
   586  			uint64(n4)<<40 |
   587  			uint64(n5)<<34 |
   588  			uint64(n6)<<28 |
   589  			uint64(n7)<<22 |
   590  			uint64(n8)<<16,
   591  		true
   592  }
   593  
   594  type newlineFilteringReader struct {
   595  	wrapped io.Reader
   596  }
   597  
   598  func (r *newlineFilteringReader) Read(p []byte) (int, error) {
   599  	n, err := r.wrapped.Read(p)
   600  	for n > 0 {
   601  		offset := 0
   602  		for i, b := range p[:n] {
   603  			if b != '\r' && b != '\n' {
   604  				if i != offset {
   605  					p[offset] = b
   606  				}
   607  				offset++
   608  			}
   609  		}
   610  		if offset > 0 {
   611  			return offset, err
   612  		}
   613  		// Previous buffer entirely whitespace, read again
   614  		n, err = r.wrapped.Read(p)
   615  	}
   616  	return n, err
   617  }
   618  
   619  // NewDecoder constructs a new base64 stream decoder.
   620  func NewDecoder(enc *Encoding, r io.Reader) io.Reader {
   621  	return &decoder{enc: enc, r: &newlineFilteringReader{r}}
   622  }
   623  
   624  // DecodedLen returns the maximum length in bytes of the decoded data
   625  // corresponding to n bytes of base64-encoded data.
   626  func (enc *Encoding) DecodedLen(n int) int {
   627  	if enc.padChar == NoPadding {
   628  		// Unpadded data may end with partial block of 2-3 characters.
   629  		return n * 6 / 8
   630  	}
   631  	// Padded base64 should always be a multiple of 4 characters in length.
   632  	return n / 4 * 3
   633  }