github.com/influx6/npkg@v0.8.8/nzip/nzip.go (about)

     1  package nzip
     2  
     3  import (
     4  	"encoding/binary"
     5  	"encoding/hex"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"math"
    10  	"math/bits"
    11  	"time"
    12  	"unicode"
    13  	"unicode/utf16"
    14  	"unicode/utf8"
    15  
    16  	"github.com/influx6/npkg/nerror"
    17  )
    18  
    19  const (
    20  	off = 0
    21  	on  = 1
    22  )
    23  
    24  // UnzipBool unzips giving byte slice value into a boolean
    25  func UnzipBool(val []byte) (bool, error) {
    26  	if len(val) != 1 {
    27  		return false, nerror.New("byte slice must be of length 1")
    28  	}
    29  	if val[0] == 0 {
    30  		return false, nil
    31  	}
    32  	return true, nil
    33  }
    34  
    35  // ZipBool zips giving boolean into giving byte slice, returning appended byte slice.
    36  func ZipBool(b bool, c []byte) ([]byte, error) {
    37  	if b {
    38  		return append(c, on), nil
    39  	}
    40  	return append(c, off), nil
    41  }
    42  
    43  // UnzipFloat32 converts a byte slice into an float32.
    44  func UnzipFloat32(val []byte) (float32, error) {
    45  	dl, n := DecodeVarInt32(val)
    46  	if n == 0 {
    47  		return 0, nerror.New("failed to decode float32")
    48  	}
    49  
    50  	return DecodeFloat32(dl), nil
    51  }
    52  
    53  // UnzipFloat64 converts a byte slice into an float64.
    54  func UnzipFloat64(val []byte) (float64, error) {
    55  	dl, n := DecodeVarInt64(val)
    56  	if n == 0 {
    57  		return 0, nerror.New("failed to decode float32")
    58  	}
    59  
    60  	return DecodeFloat64(dl), nil
    61  }
    62  
    63  // ZipFloat32 converts a float64 into a byte slice.
    64  func ZipFloat32(val float32, c []byte) ([]byte, error) {
    65  	enc := EncodeVarInt32(EncodeFloat32(val))
    66  	return append(c, enc...), nil
    67  }
    68  
    69  // ZipFloat64 converts a float64 into a byte slice.
    70  func ZipFloat64(val float64, c []byte) ([]byte, error) {
    71  	enc := EncodeVarInt64(EncodeFloat64(val))
    72  	return append(c, enc...), nil
    73  }
    74  
    75  // UnzipUint converts a byte slice into an int.
    76  func UnzipUint(val []byte) (uint, error) {
    77  	var dl, n = DecodeVarInt64(val)
    78  	if n == 0 {
    79  		return 0, nerror.New("failed to decode float32")
    80  	}
    81  	return uint(dl), nil
    82  }
    83  
    84  // UnzipInt converts a byte slice into an int.
    85  func UnzipInt(val []byte) (int, error) {
    86  	var dl, n = DecodeVarInt64(val)
    87  	if n == 0 {
    88  		return 0, nerror.New("failed to decode float32")
    89  	}
    90  	return int(dl), nil
    91  }
    92  
    93  // UnzipInt converts a byte slice into an uint32.
    94  func UnzipUint32(val []byte) (uint32, error) {
    95  	dl, n := DecodeVarInt32(val)
    96  	if n == 0 {
    97  		return 0, nerror.New("not found")
    98  	}
    99  	return uint32(dl), nil
   100  }
   101  
   102  // UnzipInt converts a byte slice into an int32.
   103  func UnzipInt32(val []byte) (int32, error) {
   104  	dl, n := DecodeVarInt32(val)
   105  	if n == 0 {
   106  		return 0, nerror.New("not found")
   107  	}
   108  	return int32(dl), nil
   109  }
   110  
   111  // UnzipInt converts a byte slice into an int64.
   112  func UnzipInt64(val []byte) (int64, error) {
   113  	dl, n := DecodeVarInt64(val)
   114  	if n == 0 {
   115  		return 0, nerror.New("failed to decode float32")
   116  	}
   117  	return int64(dl), nil
   118  }
   119  
   120  // UnzipUint64 converts a byte slice into an uint64.
   121  func UnzipUint64(val []byte) (uint64, error) {
   122  	dl, n := DecodeVarInt64(val)
   123  	if n == 0 {
   124  		return 0, nerror.New("failed to decode float32")
   125  	}
   126  	return uint64(dl), nil
   127  }
   128  
   129  // UnzipUint16 converts a byte slice into an uint64.
   130  func UnzipUint16(val []byte) (uint16, error) {
   131  	dl, err := DecodeUint16FromBytes(val)
   132  	if err != nil {
   133  		return 0, err
   134  	}
   135  
   136  	return dl, nil
   137  }
   138  
   139  // UnzipInt16 converts a byte slice into an uint64.
   140  func UnzipInt16(val []byte) (int16, error) {
   141  	dl, err := DecodeUint16FromBytes(val)
   142  	if err != nil {
   143  		return 0, err
   144  	}
   145  
   146  	return int16(dl), nil
   147  }
   148  
   149  // ZipInt converts provided value in the range of {int, uint} types
   150  // in all supported arch of 8,16,32,64 into a byte slice.
   151  func ZipInt(b interface{}, c []byte) ([]byte, error) {
   152  	switch val := b.(type) {
   153  	case uint:
   154  		if val < math.MaxUint32 {
   155  			return append(c, EncodeVarInt32(uint32(val))...), nil
   156  		} else {
   157  			return append(c, EncodeVarInt64(uint64(val))...), nil
   158  		}
   159  	case int:
   160  		if val < math.MaxInt32 {
   161  			return append(c, EncodeVarInt32(uint32(val))...), nil
   162  		} else {
   163  			return append(c, EncodeVarInt64(uint64(val))...), nil
   164  		}
   165  	case uint8:
   166  		return append(c, val), nil
   167  	case uint16:
   168  		return append(c, EncodeUInt16(val)...), nil
   169  	case uint32:
   170  		return append(c, EncodeVarInt32(val)...), nil
   171  	case uint64:
   172  		return append(c, EncodeVarInt64(val)...), nil
   173  	case int8:
   174  		return append(c, uint8(val)), nil
   175  	case int16:
   176  		return append(c, EncodeUInt16(uint16(val))...), nil
   177  	case int32:
   178  		return append(c, EncodeVarInt32(uint32(val))...), nil
   179  	case int64:
   180  		return append(c, EncodeVarInt64(uint64(val))...), nil
   181  	}
   182  	return nil, nerror.New("type is not a range of int/uint types")
   183  }
   184  
   185  // UnzipTime converts byte slice into a time.Time object using time.RFC3339 as format.
   186  func UnzipTime(val []byte) (time.Time, error) {
   187  	return UnzipTimeWithFormat(val, time.RFC3339)
   188  }
   189  
   190  // UnzipTimeWithFormat converts byte slice into a time.Time object using provided format string.
   191  func UnzipTimeWithFormat(val []byte, format string) (time.Time, error) {
   192  	return time.Parse(format, string(val))
   193  }
   194  
   195  // ZipTime converts giving time.Time object into a string using time.RFC3339 format.
   196  func ZipTime(b time.Time, c []byte) ([]byte, error) {
   197  	return ZipTimeWithFormat(time.RFC3339, b, c)
   198  }
   199  
   200  // ZipTimeWithFormat converts giving time.Time object into a string using a giving format.
   201  func ZipTimeWithFormat(format string, b time.Time, c []byte) ([]byte, error) {
   202  	formatted := b.Format(format)
   203  	return append(c, formatted...), nil
   204  }
   205  
   206  //******************************************
   207  // Codec Functions
   208  //******************************************
   209  
   210  // DecodeInt16FromBytes attempts to decode provided byte slice
   211  // into a int16 ensuring that it has minimum length of 2.
   212  // It uses binary.BigEndian.
   213  func DecodeInt16FromBytes(val []byte) (int16, error) {
   214  	de, err := DecodeUint16FromBytes(val)
   215  	return int16(de), err
   216  }
   217  
   218  // DecodeUint16FromBytes attempts to decode provided byte slice
   219  // into a uint16 ensuring that it has minimum length of 2.
   220  // It uses binary.BigEndian.
   221  func DecodeUint16FromBytes(val []byte) (uint16, error) {
   222  	if len(val) < 2 {
   223  		return 0, errors.New("byte slice length too small, must be 2")
   224  	}
   225  
   226  	var err error
   227  	defer func() {
   228  		if it := recover(); it != nil {
   229  			err = errors.New("failed to decode byte slice with binary.BigEndian")
   230  		}
   231  	}()
   232  	return binary.BigEndian.Uint16(val), err
   233  }
   234  
   235  // DecodeInt64FromBytes attempts to decode provided byte slice
   236  // into a int64 ensuring that it has minimum length of 8.
   237  // It uses binary.BigEndian.
   238  func DecodeInt64FromBytes(val []byte) (int64, error) {
   239  	de, err := DecodeUint64FromBytes(val)
   240  	return int64(de), err
   241  }
   242  
   243  // DecodeUint64FromBytes attempts to decode provided byte slice
   244  // into a uint64 ensuring that it has minimum length of 8.
   245  // It uses binary.BigEndian.
   246  func DecodeUint64FromBytes(val []byte) (uint64, error) {
   247  	if len(val) < 8 {
   248  		return 0, errors.New("byte slice length too small, must be 8")
   249  	}
   250  
   251  	var err error
   252  	defer func() {
   253  		if it := recover(); it != nil {
   254  			err = errors.New("failed to decode byte slice with binary.BigEndian")
   255  		}
   256  	}()
   257  	return binary.BigEndian.Uint64(val), err
   258  }
   259  
   260  // DecodeInt32FromBytes attempts to decode provided byte slice
   261  // into a int32 ensuring that it has minimum length of 4.
   262  // It uses binary.BigEndian.
   263  func DecodeInt32FromByte(val []byte) (int32, error) {
   264  	de, err := DecodeUint32FromBytes(val)
   265  	return int32(de), err
   266  }
   267  
   268  // DecodeUint32FromBytes attempts to decode provided byte slice
   269  // into a uint32 ensuring that it has minimum length of 4.
   270  // It uses binary.BigEndian.
   271  func DecodeUint32FromBytes(val []byte) (uint32, error) {
   272  	if len(val) < 4 {
   273  		return 0, errors.New("byte slice length too small, must be 4")
   274  	}
   275  
   276  	var err error
   277  	defer func() {
   278  		if it := recover(); it != nil {
   279  			err = errors.New("failed to decode byte slice with binary.BigEndian")
   280  		}
   281  	}()
   282  	return binary.BigEndian.Uint32(val), err
   283  }
   284  
   285  // EncodeInt32ToBytes encodes provided uint32 into provided
   286  // byte ensuring byte slice has minimum of length 4.
   287  // It uses binary.BigEndian.
   288  func EncodeInt32ToBytes(f int32, val []byte) error {
   289  	return EncodeUint32ToBytes(uint32(f), val)
   290  }
   291  
   292  // EncodeUint16ToBytes encodes provided uint16 into provided
   293  // byte ensuring byte slice has minimum of length 2.
   294  // It uses binary.BigEndian.
   295  func EncodeUint16ToBytes(f uint16, val []byte) error {
   296  	if cap(val) < 2 {
   297  		return errors.New("required 8 length for size")
   298  	}
   299  
   300  	binary.BigEndian.PutUint16(val, f)
   301  	return nil
   302  }
   303  
   304  // EncodeUint32ToBytes encodes provided uint32 into provided
   305  // byte ensuring byte slice has minimum of length 4.
   306  // It uses binary.BigEndian.
   307  func EncodeUint32ToBytes(f uint32, val []byte) error {
   308  	if cap(val) < 4 {
   309  		return errors.New("required 8 length for size")
   310  	}
   311  
   312  	binary.BigEndian.PutUint32(val, f)
   313  	return nil
   314  }
   315  
   316  // EncodeInt64ToBytes encodes provided uint64 into provided
   317  // byte ensuring byte slice has minimum of length 8.
   318  // It uses binary.BigEndian.
   319  func EncodeInt64ToBytes(f int64, val []byte) error {
   320  	return EncodeUint64ToBytes(uint64(f), val)
   321  }
   322  
   323  // EncodeUint64ToBytes encodes provided uint64 into provided
   324  // byte ensuring byte slice has minimum of length 8.
   325  // It uses binary.BigEndian.
   326  func EncodeUint64ToBytes(f uint64, val []byte) error {
   327  	if cap(val) < 8 {
   328  		return errors.New("required 8 length for size")
   329  	}
   330  
   331  	binary.BigEndian.PutUint64(val, f)
   332  	return nil
   333  }
   334  
   335  // DecodeFloat32 will decode provided uint64 value which should be in
   336  // standard IEEE 754 binary representation, where it bit has been reversed,
   337  // where having it's exponent appears first. It returns the float32 value.
   338  func DecodeFloat32(f uint32) float32 {
   339  	rbit := bits.ReverseBytes32(f)
   340  	return math.Float32frombits(rbit)
   341  }
   342  
   343  // EncodeFloat64 will encode provided float value into the standard
   344  // IEEE 754 binary representation and has it's bit reversed, having
   345  // the exponent appearing first.
   346  func EncodeFloat32(f float32) uint32 {
   347  	fbit := math.Float32bits(f)
   348  	return bits.ReverseBytes32(fbit)
   349  }
   350  
   351  // DecodeFloat64 will decode provided uint64 value which should be in
   352  // standard IEEE 754 binary representation, where it bit has been reversed,
   353  // where having it's exponent appears first. It returns the float64 value.
   354  func DecodeFloat64(f uint64) float64 {
   355  	rbit := bits.ReverseBytes64(f)
   356  	return math.Float64frombits(rbit)
   357  }
   358  
   359  // EncodeFloat64 will encode provided float value into the standard
   360  // IEEE 754 binary representation and has it's bit reversed, having
   361  // the exponent appearing first.
   362  func EncodeFloat64(f float64) uint64 {
   363  	fbit := math.Float64bits(f)
   364  	return bits.ReverseBytes64(fbit)
   365  }
   366  
   367  // EncodeVarInt32 encodes uint32 into a byte slice
   368  // using EncodeVarInt64 after turing uint32 into uin64.
   369  func EncodeVarInt32(x uint32) []byte {
   370  	return EncodeVarInt64(uint64(x))
   371  }
   372  
   373  // EncodeUInt16 returns the encoded byte slice of a uint16 value.
   374  func EncodeUInt16(x uint16) []byte {
   375  	data := make([]byte, 2)
   376  	binary.BigEndian.PutUint16(data, x)
   377  	return data
   378  }
   379  
   380  // EncodeVarInt64 returns the varint encoding of x.
   381  // This is the format for the
   382  // int32, int64, uint32, uint64, bool, and enum.
   383  func EncodeVarInt64(x uint64) []byte {
   384  	var buf [10]byte
   385  	var n int
   386  	for n = 0; x > 127; n++ {
   387  		buf[n] = 0x80 | uint8(x&0x7F)
   388  		x >>= 7
   389  	}
   390  	buf[n] = uint8(x)
   391  	n++
   392  	return buf[0:n]
   393  }
   394  
   395  // DecodeVarInt32 encodes uint32 into a byte slice
   396  // using EncodeVarInt64 after turing uint32 into uin64.
   397  func DecodeVarInt32(val []byte) (uint32, int) {
   398  	v, d := DecodeVarInt64(val)
   399  	return uint32(v), d
   400  }
   401  
   402  // DecodeUInt16 returns the decoded uint16 of provided byte slice which
   403  // must be of length 2.
   404  func DecodeUInt16(d []byte) uint16 {
   405  	return binary.BigEndian.Uint16(d)
   406  }
   407  
   408  // DecodeVarInt64 reads a varint-encoded integer from the slice.
   409  // It returns the integer and the number of bytes consumed, or
   410  // zero if there is not enough.
   411  // This is the format for the
   412  // int32, int64, uint32, uint64, bool.
   413  func DecodeVarInt64(buf []byte) (x uint64, n int) {
   414  	for shift := uint(0); shift < 64; shift += 7 {
   415  		if n >= len(buf) {
   416  			return 0, 0
   417  		}
   418  		b := uint64(buf[n])
   419  		n++
   420  		x |= (b & 0x7F) << shift
   421  		if (b & 0x80) == 0 {
   422  			return x, n
   423  		}
   424  	}
   425  
   426  	// The number is too large to represent in a 64-bit value.
   427  	return 0, 0
   428  }
   429  
   430  //*******************************************************************
   431  // PageName Zipping : Code taken from https://github.com/linkedin/goavro
   432  //*******************************************************************
   433  
   434  // While slices in Go are never constants, we can initialize them once and reuse
   435  // them many times. We define these slices at library load time and reuse them
   436  // when encoding JSON.
   437  var (
   438  	sliceQuote          = []byte("\\\"")
   439  	sliceBackslash      = []byte("\\\\")
   440  	sliceSlash          = []byte("\\/")
   441  	sliceBackspace      = []byte("\\b")
   442  	sliceFormfeed       = []byte("\\f")
   443  	sliceNewline        = []byte("\\n")
   444  	sliceCarriageReturn = []byte("\\r")
   445  	sliceTab            = []byte("\\t")
   446  	sliceUnicode        = []byte("\\u")
   447  )
   448  
   449  func ZipText(buf []byte) (interface{}, []byte, error) {
   450  	buflen := len(buf)
   451  	if buflen < 2 {
   452  		return nil, nil, fmt.Errorf("cannot decode textual bytes: %s", io.ErrShortBuffer)
   453  	}
   454  	if buf[0] != '"' {
   455  		return nil, nil, fmt.Errorf("cannot decode textual bytes: expected initial \"; found: %#U", buf[0])
   456  	}
   457  	var newBytes []byte
   458  	var escaped bool
   459  	// Loop through bytes following initial double quote, but note we will
   460  	// return immediately when find unescaped double quote.
   461  	for i := 1; i < buflen; i++ {
   462  		b := buf[i]
   463  		if escaped {
   464  			escaped = false
   465  			if b2, ok := unescapeSpecialJSON(b); ok {
   466  				newBytes = append(newBytes, b2)
   467  				continue
   468  			}
   469  			if b == 'u' {
   470  				// NOTE: Need at least 4 more bytes to read uint16, but subtract
   471  				// 1 because do not want to count the trailing quote and
   472  				// subtract another 1 because already consumed u but have yet to
   473  				// increment i.
   474  				if i > buflen-6 {
   475  					return nil, nil, fmt.Errorf("cannot decode textual bytes: %s", io.ErrShortBuffer)
   476  				}
   477  				// NOTE: Avro bytes represent binary data, and do not
   478  				// necessarily represent text. Therefore, Avro bytes are not
   479  				// encoded in UTF-16. Each \u is followed by 4 hexadecimal
   480  				// digits, the first and second of which must be 0.
   481  				v, err := parseUint64FromHexSlice(buf[i+3 : i+5])
   482  				if err != nil {
   483  					return nil, nil, fmt.Errorf("cannot decode textual bytes: %s", err)
   484  				}
   485  				i += 4 // absorb 4 characters: one 'u' and three of the digits
   486  				newBytes = append(newBytes, byte(v))
   487  				continue
   488  			}
   489  			newBytes = append(newBytes, b)
   490  			continue
   491  		}
   492  		if b == '\\' {
   493  			escaped = true
   494  			continue
   495  		}
   496  		if b == '"' {
   497  			return newBytes, buf[i+1:], nil
   498  		}
   499  		newBytes = append(newBytes, b)
   500  	}
   501  	return nil, nil, fmt.Errorf("cannot decode textual bytes: expected final \"; found: %#U", buf[buflen-1])
   502  }
   503  
   504  func UnzipText(buf []byte) (interface{}, []byte, error) {
   505  	buflen := len(buf)
   506  	if buflen < 2 {
   507  		return nil, nil, fmt.Errorf("cannot decode textual string: %s", io.ErrShortBuffer)
   508  	}
   509  	if buf[0] != '"' {
   510  		return nil, nil, fmt.Errorf("cannot decode textual string: expected initial \"; found: %#U", buf[0])
   511  	}
   512  	var newBytes []byte
   513  	var escaped bool
   514  	// Loop through bytes following initial double quote, but note we will
   515  	// return immediately when find unescaped double quote.
   516  	for i := 1; i < buflen; i++ {
   517  		b := buf[i]
   518  		if escaped {
   519  			escaped = false
   520  			if b2, ok := unescapeSpecialJSON(b); ok {
   521  				newBytes = append(newBytes, b2)
   522  				continue
   523  			}
   524  			if b == 'u' {
   525  				// NOTE: Need at least 4 more bytes to read uint16, but subtract
   526  				// 1 because do not want to count the trailing quote and
   527  				// subtract another 1 because already consumed u but have yet to
   528  				// increment i.
   529  				if i > buflen-6 {
   530  					return nil, nil, fmt.Errorf("cannot decode textual string: %s", io.ErrShortBuffer)
   531  				}
   532  				v, err := parseUint64FromHexSlice(buf[i+1 : i+5])
   533  				if err != nil {
   534  					return nil, nil, fmt.Errorf("cannot decode textual string: %s", err)
   535  				}
   536  				i += 4 // absorb 4 characters: one 'u' and three of the digits
   537  
   538  				nbl := len(newBytes)
   539  				newBytes = append(newBytes, []byte{0, 0, 0, 0}...) // grow to make room for UTF-8 encoded rune
   540  
   541  				r := rune(v)
   542  				if utf16.IsSurrogate(r) {
   543  					i++ // absorb final hexadecimal digit from previous value
   544  
   545  					// Expect second half of surrogate pair
   546  					if i > buflen-6 || buf[i] != '\\' || buf[i+1] != 'u' {
   547  						return nil, nil, errors.New("cannot decode textual string: missing second half of surrogate pair")
   548  					}
   549  
   550  					v, err = parseUint64FromHexSlice(buf[i+2 : i+6])
   551  					if err != nil {
   552  						return nil, nil, fmt.Errorf("cannot decode textual string: %s", err)
   553  					}
   554  					i += 5 // absorb 5 characters: two for '\u', and 3 of the 4 digits
   555  
   556  					// Get code point by combining high and low surrogate bits
   557  					r = utf16.DecodeRune(r, rune(v))
   558  				}
   559  
   560  				width := utf8.EncodeRune(newBytes[nbl:], r) // append UTF-8 encoded version of code point
   561  				newBytes = newBytes[:nbl+width]             // trim off excess bytes
   562  				continue
   563  			}
   564  			newBytes = append(newBytes, b)
   565  			continue
   566  		}
   567  		if b == '\\' {
   568  			escaped = true
   569  			continue
   570  		}
   571  		if b == '"' {
   572  			return string(newBytes), buf[i+1:], nil
   573  		}
   574  		newBytes = append(newBytes, b)
   575  	}
   576  	if escaped {
   577  		return nil, nil, fmt.Errorf("cannot decode textual string: %s", io.ErrShortBuffer)
   578  	}
   579  	return nil, nil, fmt.Errorf("cannot decode textual string: expected final \"; found: %x", buf[buflen-1])
   580  }
   581  
   582  func QuoteBytes(buf []byte, datum []byte) ([]byte, error) {
   583  	buf = append(buf, '"') // prefix buffer with double quote
   584  	for _, b := range datum {
   585  		if escaped, ok := escapeSpecialJSON(b); ok {
   586  			buf = append(buf, escaped...)
   587  			continue
   588  		}
   589  		if r := rune(b); r < utf8.RuneSelf && unicode.IsPrint(r) {
   590  			buf = append(buf, b)
   591  			continue
   592  		}
   593  		// This Code Point _could_ be encoded as a single byte, however, it's
   594  		// above standard ASCII range (b > 127), therefore must encode using its
   595  		// four-byte hexadecimal equivalent, which will always start with the
   596  		// high byte 00
   597  		buf = appendUnicodeHex(buf, uint16(b))
   598  	}
   599  	return append(buf, '"'), nil // postfix buffer with double quote
   600  }
   601  
   602  func QuoteString(buf []byte, datum string) ([]byte, error) {
   603  	buf = append(buf, '"') // prefix buffer with double quote
   604  	for _, r := range datum {
   605  		if escaped, ok := escapeSpecialJSON(byte(r)); ok {
   606  			buf = append(buf, escaped...)
   607  			continue
   608  		}
   609  		if r < utf8.RuneSelf && unicode.IsPrint(r) {
   610  			buf = append(buf, byte(r))
   611  			continue
   612  		}
   613  		// NOTE: Attempt to encode code point as UTF-16 surrogate pair
   614  		r1, r2 := utf16.EncodeRune(r)
   615  		if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
   616  			// code point does require surrogate pair, and thus two uint16 values
   617  			buf = appendUnicodeHex(buf, uint16(r1))
   618  			buf = appendUnicodeHex(buf, uint16(r2))
   619  			continue
   620  		}
   621  		// Code Point does not require surrogate pair.
   622  		buf = appendUnicodeHex(buf, uint16(r))
   623  	}
   624  	return append(buf, '"'), nil // postfix buffer with double quote
   625  }
   626  
   627  func UnescapeUnicodeString(some string) (string, error) {
   628  	if some == "" {
   629  		return "", nil
   630  	}
   631  	buf := []byte(some)
   632  	buflen := len(buf)
   633  	var i int
   634  	var newBytes []byte
   635  	var escaped bool
   636  	// Loop through bytes following initial double quote, but note we will
   637  	// return immediately when find unescaped double quote.
   638  	for i = 0; i < buflen; i++ {
   639  		b := buf[i]
   640  		if escaped {
   641  			escaped = false
   642  			if b == 'u' {
   643  				// NOTE: Need at least 4 more bytes to read uint16, but subtract
   644  				// 1 because do not want to count the trailing quote and
   645  				// subtract another 1 because already consumed u but have yet to
   646  				// increment i.
   647  				if i > buflen-6 {
   648  					return "", fmt.Errorf("cannot replace escaped characters with UTF-8 equivalent: %s", io.ErrShortBuffer)
   649  				}
   650  				v, err := parseUint64FromHexSlice(buf[i+1 : i+5])
   651  				if err != nil {
   652  					return "", fmt.Errorf("cannot replace escaped characters with UTF-8 equivalent: %s", err)
   653  				}
   654  				i += 4 // absorb 4 characters: one 'u' and three of the digits
   655  
   656  				nbl := len(newBytes)
   657  				newBytes = append(newBytes, []byte{0, 0, 0, 0}...) // grow to make room for UTF-8 encoded rune
   658  
   659  				r := rune(v)
   660  				if utf16.IsSurrogate(r) {
   661  					i++ // absorb final hexadecimal digit from previous value
   662  
   663  					// Expect second half of surrogate pair
   664  					if i > buflen-6 || buf[i] != '\\' || buf[i+1] != 'u' {
   665  						return "", errors.New("cannot replace escaped characters with UTF-8 equivalent: missing second half of surrogate pair")
   666  					}
   667  
   668  					v, err = parseUint64FromHexSlice(buf[i+2 : i+6])
   669  					if err != nil {
   670  						return "", fmt.Errorf("cannot replace escaped characters with UTF-8 equivalents: %s", err)
   671  					}
   672  					i += 5 // absorb 5 characters: two for '\u', and 3 of the 4 digits
   673  
   674  					// Get code point by combining high and low surrogate bits
   675  					r = utf16.DecodeRune(r, rune(v))
   676  				}
   677  
   678  				width := utf8.EncodeRune(newBytes[nbl:], r) // append UTF-8 encoded version of code point
   679  				newBytes = newBytes[:nbl+width]             // trim off excess bytes
   680  				continue
   681  			}
   682  			newBytes = append(newBytes, b)
   683  			continue
   684  		}
   685  		if b == '\\' {
   686  			escaped = true
   687  			continue
   688  		}
   689  		newBytes = append(newBytes, b)
   690  	}
   691  	if escaped {
   692  		return "", fmt.Errorf("cannot replace escaped characters with UTF-8 equivalents: %s", io.ErrShortBuffer)
   693  	}
   694  	return string(newBytes), nil
   695  }
   696  
   697  func parseUint64FromHexSlice(buf []byte) (uint64, error) {
   698  	var value uint64
   699  	for _, b := range buf {
   700  		diff := uint64(b - '0')
   701  		if diff < 10 {
   702  			value = (value << 4) | diff
   703  			continue
   704  		}
   705  		b10 := b + 10
   706  		diff = uint64(b10 - 'A')
   707  		if diff < 10 {
   708  			return 0, hex.InvalidByteError(b)
   709  		}
   710  		if diff < 16 {
   711  			value = (value << 4) | diff
   712  			continue
   713  		}
   714  		diff = uint64(b10 - 'a')
   715  		if diff < 10 {
   716  			return 0, hex.InvalidByteError(b)
   717  		}
   718  		if diff < 16 {
   719  			value = (value << 4) | diff
   720  			continue
   721  		}
   722  		return 0, hex.InvalidByteError(b)
   723  	}
   724  	return value, nil
   725  }
   726  
   727  func unescapeSpecialJSON(b byte) (byte, bool) {
   728  	// NOTE: The following 8 special JSON characters must be escaped:
   729  	switch b {
   730  	case '"', '\\', '/':
   731  		return b, true
   732  	case 'b':
   733  		return '\b', true
   734  	case 'f':
   735  		return '\f', true
   736  	case 'n':
   737  		return '\n', true
   738  	case 'r':
   739  		return '\r', true
   740  	case 't':
   741  		return '\t', true
   742  	}
   743  	return b, false
   744  }
   745  
   746  func appendUnicodeHex(buf []byte, v uint16) []byte {
   747  	// Start with '\u' prefix:
   748  	buf = append(buf, sliceUnicode...)
   749  	// And tack on 4 hexadecimal digits:
   750  	buf = append(buf, hexDigits[(v&0xF000)>>12])
   751  	buf = append(buf, hexDigits[(v&0xF00)>>8])
   752  	buf = append(buf, hexDigits[(v&0xF0)>>4])
   753  	buf = append(buf, hexDigits[(v&0xF)])
   754  	return buf
   755  }
   756  
   757  const hexDigits = "0123456789ABCDEF"
   758  
   759  func escapeSpecialJSON(b byte) ([]byte, bool) {
   760  	// NOTE: The following 8 special JSON characters must be escaped:
   761  	switch b {
   762  	case '"':
   763  		return sliceQuote, true
   764  	case '\\':
   765  		return sliceBackslash, true
   766  	case '/':
   767  		return sliceSlash, true
   768  	case '\b':
   769  		return sliceBackspace, true
   770  	case '\f':
   771  		return sliceFormfeed, true
   772  	case '\n':
   773  		return sliceNewline, true
   774  	case '\r':
   775  		return sliceCarriageReturn, true
   776  	case '\t':
   777  		return sliceTab, true
   778  	}
   779  	return nil, false
   780  }