github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/codec/codec.go

github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/codec/codec.go (about)

     1  // Copyright 2022 zGraph Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package codec
    16  
    17  import (
    18  	"encoding/binary"
    19  	"math"
    20  	"runtime"
    21  	"unsafe"
    22  
    23  	"github.com/pingcap/errors"
    24  )
    25  
    26  const (
    27  	encGroupSize        = 8
    28  	encMarker           = byte(0xFF)
    29  	encPad              = byte(0x0)
    30  	signMask     uint64 = 0x8000000000000000
    31  )
    32  
    33  var (
    34  	pads = make([]byte, encGroupSize)
    35  )
    36  
    37  // reallocBytes is like realloc.
    38  func reallocBytes(b []byte, n int) []byte {
    39  	newSize := len(b) + n
    40  	if cap(b) < newSize {
    41  		bs := make([]byte, len(b), newSize)
    42  		copy(bs, b)
    43  		return bs
    44  	}
    45  
    46  	// slice b has capability to store n bytes
    47  	return b
    48  }
    49  
    50  // EncodeBytes guarantees the encoded value is in ascending order for comparison,
    51  // encoding with the following rule:
    52  //
    53  //	[group1][marker1]...[groupN][markerN]
    54  //	group is 8 bytes slice which is padding with 0.
    55  //	marker is `0xFF - padding 0 count`
    56  //
    57  // For example:
    58  //
    59  //	[] -> [0, 0, 0, 0, 0, 0, 0, 0, 247]
    60  //	[1, 2, 3] -> [1, 2, 3, 0, 0, 0, 0, 0, 250]
    61  //	[1, 2, 3, 0] -> [1, 2, 3, 0, 0, 0, 0, 0, 251]
    62  //	[1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247]
    63  //
    64  // Refer: https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format
    65  func EncodeBytes(b []byte, data []byte) []byte {
    66  	// Allocate more space to avoid unnecessary slice growing.
    67  	// Assume that the byte slice size is about `(len(data) / encGroupSize + 1) * (encGroupSize + 1)` bytes,
    68  	// that is `(len(data) / 8 + 1) * 9` in our implement.
    69  	dLen := len(data)
    70  	reallocSize := (dLen/encGroupSize + 1) * (encGroupSize + 1)
    71  	result := reallocBytes(b, reallocSize)
    72  	for idx := 0; idx <= dLen; idx += encGroupSize {
    73  		remain := dLen - idx
    74  		padCount := 0
    75  		if remain >= encGroupSize {
    76  			result = append(result, data[idx:idx+encGroupSize]...)
    77  		} else {
    78  			padCount = encGroupSize - remain
    79  			result = append(result, data[idx:]...)
    80  			result = append(result, pads[:padCount]...)
    81  		}
    82  
    83  		marker := encMarker - byte(padCount)
    84  		result = append(result, marker)
    85  	}
    86  
    87  	return result
    88  }
    89  
    90  // EncodeUintDesc appends the encoded value to slice b and returns the appended slice.
    91  // EncodeUintDesc guarantees that the encoded value is in descending order for comparison.
    92  func EncodeUintDesc(b []byte, v uint64) []byte {
    93  	var data [8]byte
    94  	binary.BigEndian.PutUint64(data[:], ^v)
    95  	return append(b, data[:]...)
    96  }
    97  
    98  func decodeBytes(b []byte, buf []byte, reverse bool) ([]byte, []byte, error) {
    99  	if buf == nil {
   100  		buf = make([]byte, 0, len(b))
   101  	}
   102  	buf = buf[:0]
   103  	for {
   104  		if len(b) < encGroupSize+1 {
   105  			return nil, nil, errors.New("insufficient bytes to decode value")
   106  		}
   107  
   108  		groupBytes := b[:encGroupSize+1]
   109  
   110  		group := groupBytes[:encGroupSize]
   111  		marker := groupBytes[encGroupSize]
   112  
   113  		var padCount byte
   114  		if reverse {
   115  			padCount = marker
   116  		} else {
   117  			padCount = encMarker - marker
   118  		}
   119  		if padCount > encGroupSize {
   120  			return nil, nil, errors.Errorf("invalid marker byte, group bytes %q", groupBytes)
   121  		}
   122  
   123  		realGroupSize := encGroupSize - padCount
   124  		buf = append(buf, group[:realGroupSize]...)
   125  		b = b[encGroupSize+1:]
   126  
   127  		if padCount != 0 {
   128  			var padByte = encPad
   129  			if reverse {
   130  				padByte = encMarker
   131  			}
   132  			// Check validity of padding bytes.
   133  			for _, v := range group[realGroupSize:] {
   134  				if v != padByte {
   135  					return nil, nil, errors.Errorf("invalid padding byte, group bytes %q", groupBytes)
   136  				}
   137  			}
   138  			break
   139  		}
   140  	}
   141  	if reverse {
   142  		reverseBytes(buf)
   143  	}
   144  	return b, buf, nil
   145  }
   146  
   147  // DecodeBytes decodes bytes which is encoded by EncodeBytes before,
   148  // returns the leftover bytes and decoded value if no error.
   149  // `buf` is used to buffer data to avoid the cost of makeslice in decodeBytes when DecodeBytes is called by Decoder.DecodeOne.
   150  func DecodeBytes(b []byte, buf []byte) ([]byte, []byte, error) {
   151  	return decodeBytes(b, buf, false)
   152  }
   153  
   154  // See https://golang.org/src/crypto/cipher/xor.go
   155  const wordSize = int(unsafe.Sizeof(uintptr(0)))
   156  const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64"
   157  
   158  func fastReverseBytes(b []byte) {
   159  	n := len(b)
   160  	w := n / wordSize
   161  	if w > 0 {
   162  		bw := *(*[]uintptr)(unsafe.Pointer(&b))
   163  		for i := 0; i < w; i++ {
   164  			bw[i] = ^bw[i]
   165  		}
   166  	}
   167  
   168  	for i := w * wordSize; i < n; i++ {
   169  		b[i] = ^b[i]
   170  	}
   171  }
   172  
   173  func safeReverseBytes(b []byte) {
   174  	for i := range b {
   175  		b[i] = ^b[i]
   176  	}
   177  }
   178  
   179  func reverseBytes(b []byte) {
   180  	if supportsUnaligned {
   181  		fastReverseBytes(b)
   182  		return
   183  	}
   184  
   185  	safeReverseBytes(b)
   186  }
   187  
   188  // EncodeIntToCmpUint make int v to comparable uint type
   189  func EncodeIntToCmpUint(v int64) uint64 {
   190  	return uint64(v) ^ signMask
   191  }
   192  
   193  // DecodeCmpUintToInt decodes the u that encoded by EncodeIntToCmpUint
   194  func DecodeCmpUintToInt(u uint64) int64 {
   195  	return int64(u ^ signMask)
   196  }
   197  
   198  // EncodeInt appends the encoded value to slice b and returns the appended slice.
   199  // EncodeInt guarantees that the encoded value is in ascending order for comparison.
   200  func EncodeInt(b []byte, v int64) []byte {
   201  	var data [8]byte
   202  	u := EncodeIntToCmpUint(v)
   203  	binary.BigEndian.PutUint64(data[:], u)
   204  	return append(b, data[:]...)
   205  }
   206  
   207  // EncodeIntDesc appends the encoded value to slice b and returns the appended slice.
   208  // EncodeIntDesc guarantees that the encoded value is in descending order for comparison.
   209  func EncodeIntDesc(b []byte, v int64) []byte {
   210  	var data [8]byte
   211  	u := EncodeIntToCmpUint(v)
   212  	binary.BigEndian.PutUint64(data[:], ^u)
   213  	return append(b, data[:]...)
   214  }
   215  
   216  // DecodeInt decodes value encoded by EncodeInt before.
   217  // It returns the leftover un-decoded slice, decoded value if no error.
   218  func DecodeInt(b []byte) ([]byte, int64, error) {
   219  	if len(b) < 8 {
   220  		return nil, 0, errors.New("insufficient bytes to decode value")
   221  	}
   222  
   223  	u := binary.BigEndian.Uint64(b[:8])
   224  	v := DecodeCmpUintToInt(u)
   225  	b = b[8:]
   226  	return b, v, nil
   227  }
   228  
   229  // DecodeIntDesc decodes value encoded by EncodeInt before.
   230  // It returns the leftover un-decoded slice, decoded value if no error.
   231  func DecodeIntDesc(b []byte) ([]byte, int64, error) {
   232  	if len(b) < 8 {
   233  		return nil, 0, errors.New("insufficient bytes to decode value")
   234  	}
   235  
   236  	u := binary.BigEndian.Uint64(b[:8])
   237  	v := DecodeCmpUintToInt(^u)
   238  	b = b[8:]
   239  	return b, v, nil
   240  }
   241  
   242  // EncodeUint appends the encoded value to slice b and returns the appended slice.
   243  // EncodeUint guarantees that the encoded value is in ascending order for comparison.
   244  func EncodeUint(b []byte, v uint64) []byte {
   245  	var data [8]byte
   246  	binary.BigEndian.PutUint64(data[:], v)
   247  	return append(b, data[:]...)
   248  }
   249  
   250  // DecodeUint decodes value encoded by EncodeUint before.
   251  // It returns the leftover un-decoded slice, decoded value if no error.
   252  func DecodeUint(b []byte) ([]byte, uint64, error) {
   253  	if len(b) < 8 {
   254  		return nil, 0, errors.New("insufficient bytes to decode value")
   255  	}
   256  
   257  	v := binary.BigEndian.Uint64(b[:8])
   258  	b = b[8:]
   259  	return b, v, nil
   260  }
   261  
   262  // DecodeUintDesc decodes value encoded by EncodeInt before.
   263  // It returns the leftover un-decoded slice, decoded value if no error.
   264  func DecodeUintDesc(b []byte) ([]byte, uint64, error) {
   265  	if len(b) < 8 {
   266  		return nil, 0, errors.New("insufficient bytes to decode value")
   267  	}
   268  
   269  	data := b[:8]
   270  	v := binary.BigEndian.Uint64(data)
   271  	b = b[8:]
   272  	return b, ^v, nil
   273  }
   274  
   275  // EncodeVarint appends the encoded value to slice b and returns the appended slice.
   276  // Note that the encoded result is not memcomparable.
   277  func EncodeVarint(b []byte, v int64) []byte {
   278  	var data [binary.MaxVarintLen64]byte
   279  	n := binary.PutVarint(data[:], v)
   280  	return append(b, data[:n]...)
   281  }
   282  
   283  // DecodeVarint decodes value encoded by EncodeVarint before.
   284  // It returns the leftover un-decoded slice, decoded value if no error.
   285  func DecodeVarint(b []byte) ([]byte, int64, error) {
   286  	v, n := binary.Varint(b)
   287  	if n > 0 {
   288  		return b[n:], v, nil
   289  	}
   290  	if n < 0 {
   291  		return nil, 0, errors.New("value larger than 64 bits")
   292  	}
   293  	return nil, 0, errors.New("insufficient bytes to decode value")
   294  }
   295  
   296  // EncodeUvarint appends the encoded value to slice b and returns the appended slice.
   297  // Note that the encoded result is not memcomparable.
   298  func EncodeUvarint(b []byte, v uint64) []byte {
   299  	var data [binary.MaxVarintLen64]byte
   300  	n := binary.PutUvarint(data[:], v)
   301  	return append(b, data[:n]...)
   302  }
   303  
   304  // DecodeUvarint decodes value encoded by EncodeUvarint before.
   305  // It returns the leftover un-decoded slice, decoded value if no error.
   306  func DecodeUvarint(b []byte) ([]byte, uint64, error) {
   307  	v, n := binary.Uvarint(b)
   308  	if n > 0 {
   309  		return b[n:], v, nil
   310  	}
   311  	if n < 0 {
   312  		return nil, 0, errors.New("value larger than 64 bits")
   313  	}
   314  	return nil, 0, errors.New("insufficient bytes to decode value")
   315  }
   316  
   317  const (
   318  	negativeTagEnd   = 8        // negative tag is (negativeTagEnd - length).
   319  	positiveTagStart = 0xff - 8 // Positive tag is (positiveTagStart + length).
   320  )
   321  
   322  // EncodeComparableVarint encodes an int64 to a mem-comparable bytes.
   323  func EncodeComparableVarint(b []byte, v int64) []byte {
   324  	if v < 0 {
   325  		// All negative value has a tag byte prefix (negativeTagEnd - length).
   326  		// Smaller negative value encodes to more bytes, has smaller tag.
   327  		if v >= -0xff {
   328  			return append(b, negativeTagEnd-1, byte(v))
   329  		} else if v >= -0xffff {
   330  			return append(b, negativeTagEnd-2, byte(v>>8), byte(v))
   331  		} else if v >= -0xffffff {
   332  			return append(b, negativeTagEnd-3, byte(v>>16), byte(v>>8), byte(v))
   333  		} else if v >= -0xffffffff {
   334  			return append(b, negativeTagEnd-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
   335  		} else if v >= -0xffffffffff {
   336  			return append(b, negativeTagEnd-5, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
   337  		} else if v >= -0xffffffffffff {
   338  			return append(b, negativeTagEnd-6, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8),
   339  				byte(v))
   340  		} else if v >= -0xffffffffffffff {
   341  			return append(b, negativeTagEnd-7, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16),
   342  				byte(v>>8), byte(v))
   343  		}
   344  		return append(b, negativeTagEnd-8, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24),
   345  			byte(v>>16), byte(v>>8), byte(v))
   346  	}
   347  	return EncodeComparableUvarint(b, uint64(v))
   348  }
   349  
   350  // EncodeComparableUvarint encodes uint64 into mem-comparable bytes.
   351  func EncodeComparableUvarint(b []byte, v uint64) []byte {
   352  	// The first byte has 256 values, [0, 7] is reserved for negative tags,
   353  	// [248, 255] is reserved for larger positive tags,
   354  	// So we can store value [0, 239] in a single byte.
   355  	// Values cannot be stored in single byte has a tag byte prefix (positiveTagStart+length).
   356  	// Larger value encodes to more bytes, has larger tag.
   357  	if v <= positiveTagStart-negativeTagEnd {
   358  		return append(b, byte(v)+negativeTagEnd)
   359  	} else if v <= 0xff {
   360  		return append(b, positiveTagStart+1, byte(v))
   361  	} else if v <= 0xffff {
   362  		return append(b, positiveTagStart+2, byte(v>>8), byte(v))
   363  	} else if v <= 0xffffff {
   364  		return append(b, positiveTagStart+3, byte(v>>16), byte(v>>8), byte(v))
   365  	} else if v <= 0xffffffff {
   366  		return append(b, positiveTagStart+4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
   367  	} else if v <= 0xffffffffff {
   368  		return append(b, positiveTagStart+5, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
   369  	} else if v <= 0xffffffffffff {
   370  		return append(b, positiveTagStart+6, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8),
   371  			byte(v))
   372  	} else if v <= 0xffffffffffffff {
   373  		return append(b, positiveTagStart+7, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16),
   374  			byte(v>>8), byte(v))
   375  	}
   376  	return append(b, positiveTagStart+8, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24),
   377  		byte(v>>16), byte(v>>8), byte(v))
   378  }
   379  
   380  var (
   381  	errDecodeInsufficient = errors.New("insufficient bytes to decode value")
   382  	errDecodeInvalid      = errors.New("invalid bytes to decode value")
   383  )
   384  
   385  // DecodeComparableUvarint decodes mem-comparable uvarint.
   386  func DecodeComparableUvarint(b []byte) ([]byte, uint64, error) {
   387  	if len(b) == 0 {
   388  		return nil, 0, errDecodeInsufficient
   389  	}
   390  	first := b[0]
   391  	b = b[1:]
   392  	if first < negativeTagEnd {
   393  		return nil, 0, errors.WithStack(errDecodeInvalid)
   394  	}
   395  	if first <= positiveTagStart {
   396  		return b, uint64(first) - negativeTagEnd, nil
   397  	}
   398  	length := int(first) - positiveTagStart
   399  	if len(b) < length {
   400  		return nil, 0, errors.WithStack(errDecodeInsufficient)
   401  	}
   402  	var v uint64
   403  	for _, c := range b[:length] {
   404  		v = (v << 8) | uint64(c)
   405  	}
   406  	return b[length:], v, nil
   407  }
   408  
   409  // DecodeComparableVarint decodes mem-comparable varint.
   410  func DecodeComparableVarint(b []byte) ([]byte, int64, error) {
   411  	if len(b) == 0 {
   412  		return nil, 0, errors.WithStack(errDecodeInsufficient)
   413  	}
   414  	first := b[0]
   415  	if first >= negativeTagEnd && first <= positiveTagStart {
   416  		return b, int64(first) - negativeTagEnd, nil
   417  	}
   418  	b = b[1:]
   419  	var length int
   420  	var v uint64
   421  	if first < negativeTagEnd {
   422  		length = negativeTagEnd - int(first)
   423  		v = math.MaxUint64 // negative value has all bits on by default.
   424  	} else {
   425  		length = int(first) - positiveTagStart
   426  	}
   427  	if len(b) < length {
   428  		return nil, 0, errors.WithStack(errDecodeInsufficient)
   429  	}
   430  	for _, c := range b[:length] {
   431  		v = (v << 8) | uint64(c)
   432  	}
   433  	if first > positiveTagStart && v > math.MaxInt64 {
   434  		return nil, 0, errors.WithStack(errDecodeInvalid)
   435  	} else if first < negativeTagEnd && v <= math.MaxInt64 {
   436  		return nil, 0, errors.WithStack(errDecodeInvalid)
   437  	}
   438  	return b[length:], int64(v), nil
   439  }
   440  
   441  // EncodedBytesLength returns the length of data after encoded
   442  func EncodedBytesLength(dataLen int) int {
   443  	mod := dataLen % encGroupSize
   444  	padCount := encGroupSize - mod
   445  	return dataLen + padCount + 1 + dataLen/encGroupSize
   446  }
   447  
   448  // EncodeBytesDesc first encodes bytes using EncodeBytes, then bitwise reverses
   449  // encoded value to guarantee the encoded value is in descending order for comparison.
   450  func EncodeBytesDesc(b []byte, data []byte) []byte {
   451  	n := len(b)
   452  	b = EncodeBytes(b, data)
   453  	reverseBytes(b[n:])
   454  	return b
   455  }
   456  
   457  // DecodeBytesDesc decodes bytes which is encoded by EncodeBytesDesc before,
   458  // returns the leftover bytes and decoded value if no error.
   459  func DecodeBytesDesc(b []byte, buf []byte) ([]byte, []byte, error) {
   460  	return decodeBytes(b, buf, true)
   461  }
   462  
   463  // EncodeBytesExt is an extension of `EncodeBytes`, which will not encode for `isRawKv = true` but just append `data` to `b`.
   464  func EncodeBytesExt(b []byte, data []byte, isRawKv bool) []byte {
   465  	if isRawKv {
   466  		return append(b, data...)
   467  	}
   468  	return EncodeBytes(b, data)
   469  }
   470  
   471  func encodeFloatToCmpUint64(f float64) uint64 {
   472  	u := math.Float64bits(f)
   473  	if f >= 0 {
   474  		u |= signMask
   475  	} else {
   476  		u = ^u
   477  	}
   478  	return u
   479  }
   480  
   481  func decodeCmpUintToFloat(u uint64) float64 {
   482  	if u&signMask > 0 {
   483  		u &= ^signMask
   484  	} else {
   485  		u = ^u
   486  	}
   487  	return math.Float64frombits(u)
   488  }
   489  
   490  // EncodeFloat encodes a float v into a byte slice which can be sorted lexicographically later.
   491  // EncodeFloat guarantees that the encoded value is in ascending order for comparison.
   492  func EncodeFloat(b []byte, v float64) []byte {
   493  	u := encodeFloatToCmpUint64(v)
   494  	return EncodeUint(b, u)
   495  }
   496  
   497  // DecodeFloat decodes a float from a byte slice generated with EncodeFloat before.
   498  func DecodeFloat(b []byte) ([]byte, float64, error) {
   499  	b, u, err := DecodeUint(b)
   500  	return b, decodeCmpUintToFloat(u), errors.Trace(err)
   501  }
   502  
   503  // EncodeFloatDesc encodes a float v into a byte slice which can be sorted lexicographically later.
   504  // EncodeFloatDesc guarantees that the encoded value is in descending order for comparison.
   505  func EncodeFloatDesc(b []byte, v float64) []byte {
   506  	u := encodeFloatToCmpUint64(v)
   507  	return EncodeUintDesc(b, u)
   508  }
   509  
   510  // DecodeFloatDesc decodes a float from a byte slice generated with EncodeFloatDesc before.
   511  func DecodeFloatDesc(b []byte) ([]byte, float64, error) {
   512  	b, u, err := DecodeUintDesc(b)
   513  	return b, decodeCmpUintToFloat(u), errors.Trace(err)
   514  }