github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/soliton/codec/bytes.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package codec
    15  
    16  import (
    17  	"encoding/binary"
    18  	"runtime"
    19  	"unsafe"
    20  
    21  	"github.com/whtcorpsinc/errors"
    22  )
    23  
    24  const (
    25  	encGroupSize = 8
    26  	encMarker    = byte(0xFF)
    27  	encPad       = byte(0x0)
    28  )
    29  
    30  var (
    31  	pads = make([]byte, encGroupSize)
    32  )
    33  
    34  // EncodeBytes guarantees the encoded value is in ascending order for comparison,
    35  // encoding with the following rule:
    36  //  [group1][marker1]...[groupN][markerN]
    37  //  group is 8 bytes slice which is padding with 0.
    38  //  marker is `0xFF - padding 0 count`
    39  // For example:
    40  //   [] -> [0, 0, 0, 0, 0, 0, 0, 0, 247]
    41  //   [1, 2, 3] -> [1, 2, 3, 0, 0, 0, 0, 0, 250]
    42  //   [1, 2, 3, 0] -> [1, 2, 3, 0, 0, 0, 0, 0, 251]
    43  //   [1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247]
    44  // Refer: https://github.com/facebook/allegrosql-5.6/wiki/MyRocks-record-format#memcomparable-format
    45  func EncodeBytes(b []byte, data []byte) []byte {
    46  	// Allocate more space to avoid unnecessary slice growing.
    47  	// Assume that the byte slice size is about `(len(data) / encGroupSize + 1) * (encGroupSize + 1)` bytes,
    48  	// that is `(len(data) / 8 + 1) * 9` in our implement.
    49  	dLen := len(data)
    50  	reallocSize := (dLen/encGroupSize + 1) * (encGroupSize + 1)
    51  	result := reallocBytes(b, reallocSize)
    52  	for idx := 0; idx <= dLen; idx += encGroupSize {
    53  		remain := dLen - idx
    54  		padCount := 0
    55  		if remain >= encGroupSize {
    56  			result = append(result, data[idx:idx+encGroupSize]...)
    57  		} else {
    58  			padCount = encGroupSize - remain
    59  			result = append(result, data[idx:]...)
    60  			result = append(result, pads[:padCount]...)
    61  		}
    62  
    63  		marker := encMarker - byte(padCount)
    64  		result = append(result, marker)
    65  	}
    66  
    67  	return result
    68  }
    69  
    70  // EncodedBytesLength returns the length of data after encoded
    71  func EncodedBytesLength(dataLen int) int {
    72  	mod := dataLen % encGroupSize
    73  	padCount := encGroupSize - mod
    74  	return dataLen + padCount + 1 + dataLen/encGroupSize
    75  }
    76  
    77  func decodeBytes(b []byte, buf []byte, reverse bool) ([]byte, []byte, error) {
    78  	if buf == nil {
    79  		buf = make([]byte, 0, len(b))
    80  	}
    81  	buf = buf[:0]
    82  	for {
    83  		if len(b) < encGroupSize+1 {
    84  			return nil, nil, errors.New("insufficient bytes to decode value")
    85  		}
    86  
    87  		groupBytes := b[:encGroupSize+1]
    88  
    89  		group := groupBytes[:encGroupSize]
    90  		marker := groupBytes[encGroupSize]
    91  
    92  		var padCount byte
    93  		if reverse {
    94  			padCount = marker
    95  		} else {
    96  			padCount = encMarker - marker
    97  		}
    98  		if padCount > encGroupSize {
    99  			return nil, nil, errors.Errorf("invalid marker byte, group bytes %q", groupBytes)
   100  		}
   101  
   102  		realGroupSize := encGroupSize - padCount
   103  		buf = append(buf, group[:realGroupSize]...)
   104  		b = b[encGroupSize+1:]
   105  
   106  		if padCount != 0 {
   107  			var padByte = encPad
   108  			if reverse {
   109  				padByte = encMarker
   110  			}
   111  			// Check validity of padding bytes.
   112  			for _, v := range group[realGroupSize:] {
   113  				if v != padByte {
   114  					return nil, nil, errors.Errorf("invalid padding byte, group bytes %q", groupBytes)
   115  				}
   116  			}
   117  			break
   118  		}
   119  	}
   120  	if reverse {
   121  		reverseBytes(buf)
   122  	}
   123  	return b, buf, nil
   124  }
   125  
   126  // DecodeBytes decodes bytes which is encoded by EncodeBytes before,
   127  // returns the leftover bytes and decoded value if no error.
   128  // `buf` is used to buffer data to avoid the cost of makeslice in decodeBytes when DecodeBytes is called by CausetDecoder.DecodeOne.
   129  func DecodeBytes(b []byte, buf []byte) ([]byte, []byte, error) {
   130  	return decodeBytes(b, buf, false)
   131  }
   132  
   133  // EncodeBytesDesc first encodes bytes using EncodeBytes, then bitwise reverses
   134  // encoded value to guarantee the encoded value is in descending order for comparison.
   135  func EncodeBytesDesc(b []byte, data []byte) []byte {
   136  	n := len(b)
   137  	b = EncodeBytes(b, data)
   138  	reverseBytes(b[n:])
   139  	return b
   140  }
   141  
   142  // DecodeBytesDesc decodes bytes which is encoded by EncodeBytesDesc before,
   143  // returns the leftover bytes and decoded value if no error.
   144  func DecodeBytesDesc(b []byte, buf []byte) ([]byte, []byte, error) {
   145  	return decodeBytes(b, buf, true)
   146  }
   147  
   148  // EncodeCompactBytes joins bytes with its length into a byte slice. It is more
   149  // efficient in both space and time compare to EncodeBytes. Note that the encoded
   150  // result is not memcomparable.
   151  func EncodeCompactBytes(b []byte, data []byte) []byte {
   152  	b = reallocBytes(b, binary.MaxVarintLen64+len(data))
   153  	b = EncodeVarint(b, int64(len(data)))
   154  	return append(b, data...)
   155  }
   156  
   157  // DecodeCompactBytes decodes bytes which is encoded by EncodeCompactBytes before.
   158  func DecodeCompactBytes(b []byte) ([]byte, []byte, error) {
   159  	b, n, err := DecodeVarint(b)
   160  	if err != nil {
   161  		return nil, nil, errors.Trace(err)
   162  	}
   163  	if int64(len(b)) < n {
   164  		return nil, nil, errors.Errorf("insufficient bytes to decode value, expected length: %v", n)
   165  	}
   166  	return b[n:], b[:n], nil
   167  }
   168  
   169  // See https://golang.org/src/crypto/cipher/xor.go
   170  const wordSize = int(unsafe.Sizeof(uintptr(0)))
   171  const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64"
   172  
   173  func fastReverseBytes(b []byte) {
   174  	n := len(b)
   175  	w := n / wordSize
   176  	if w > 0 {
   177  		bw := *(*[]uintptr)(unsafe.Pointer(&b))
   178  		for i := 0; i < w; i++ {
   179  			bw[i] = ^bw[i]
   180  		}
   181  	}
   182  
   183  	for i := w * wordSize; i < n; i++ {
   184  		b[i] = ^b[i]
   185  	}
   186  }
   187  
   188  func safeReverseBytes(b []byte) {
   189  	for i := range b {
   190  		b[i] = ^b[i]
   191  	}
   192  }
   193  
   194  func reverseBytes(b []byte) {
   195  	if supportsUnaligned {
   196  		fastReverseBytes(b)
   197  		return
   198  	}
   199  
   200  	safeReverseBytes(b)
   201  }
   202  
   203  // reallocBytes is like realloc.
   204  func reallocBytes(b []byte, n int) []byte {
   205  	newSize := len(b) + n
   206  	if cap(b) < newSize {
   207  		bs := make([]byte, len(b), newSize)
   208  		copy(bs, b)
   209  		return bs
   210  	}
   211  
   212  	// slice b has capability to causetstore n bytes
   213  	return b
   214  }