github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/sqlparse/tidbparser/dependency/util/codec/bytes.go (about)

     1  // Copyright 2015 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package codec
    15  
    16  import (
    17  	"encoding/binary"
    18  	"runtime"
    19  	"unsafe"
    20  
    21  	"github.com/juju/errors"
    22  )
    23  
    24  const (
    25  	encGroupSize = 8
    26  	encMarker    = byte(0xFF)
    27  	encPad       = byte(0x0)
    28  )
    29  
    30  var (
    31  	pads    = make([]byte, encGroupSize)
    32  	encPads = []byte{encPad}
    33  )
    34  
    35  // EncodeBytes guarantees the encoded value is in ascending order for comparison,
    36  // encoding with the following rule:
    37  //
    38  //	[group1][marker1]...[groupN][markerN]
    39  //	group is 8 bytes slice which is padding with 0.
    40  //	marker is `0xFF - padding 0 count`
    41  //
    42  // For example:
    43  //
    44  //	[] -> [0, 0, 0, 0, 0, 0, 0, 0, 247]
    45  //	[1, 2, 3] -> [1, 2, 3, 0, 0, 0, 0, 0, 250]
    46  //	[1, 2, 3, 0] -> [1, 2, 3, 0, 0, 0, 0, 0, 251]
    47  //	[1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247]
    48  //
    49  // Refer: https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format
    50  func EncodeBytes(b []byte, data []byte) []byte {
    51  	// Allocate more space to avoid unnecessary slice growing.
    52  	// Assume that the byte slice size is about `(len(data) / encGroupSize + 1) * (encGroupSize + 1)` bytes,
    53  	// that is `(len(data) / 8 + 1) * 9` in our implement.
    54  	dLen := len(data)
    55  	reallocSize := (dLen/encGroupSize + 1) * (encGroupSize + 1)
    56  	result := reallocBytes(b, reallocSize)
    57  	for idx := 0; idx <= dLen; idx += encGroupSize {
    58  		remain := dLen - idx
    59  		padCount := 0
    60  		if remain >= encGroupSize {
    61  			result = append(result, data[idx:idx+encGroupSize]...)
    62  		} else {
    63  			padCount = encGroupSize - remain
    64  			result = append(result, data[idx:]...)
    65  			result = append(result, pads[:padCount]...)
    66  		}
    67  
    68  		marker := encMarker - byte(padCount)
    69  		result = append(result, marker)
    70  	}
    71  
    72  	return result
    73  }
    74  
    75  func decodeBytes(b []byte, reverse bool) ([]byte, []byte, error) {
    76  	data := make([]byte, 0, len(b))
    77  	for {
    78  		if len(b) < encGroupSize+1 {
    79  			return nil, nil, errors.New("insufficient bytes to decode value")
    80  		}
    81  
    82  		groupBytes := b[:encGroupSize+1]
    83  
    84  		group := groupBytes[:encGroupSize]
    85  		marker := groupBytes[encGroupSize]
    86  
    87  		var padCount byte
    88  		if reverse {
    89  			padCount = marker
    90  		} else {
    91  			padCount = encMarker - marker
    92  		}
    93  		if padCount > encGroupSize {
    94  			return nil, nil, errors.Errorf("invalid marker byte, group bytes %q", groupBytes)
    95  		}
    96  
    97  		realGroupSize := encGroupSize - padCount
    98  		data = append(data, group[:realGroupSize]...)
    99  		b = b[encGroupSize+1:]
   100  
   101  		if padCount != 0 {
   102  			padByte := encPad
   103  			if reverse {
   104  				padByte = encMarker
   105  			}
   106  			// Check validity of padding bytes.
   107  			for _, v := range group[realGroupSize:] {
   108  				if v != padByte {
   109  					return nil, nil, errors.Errorf("invalid padding byte, group bytes %q", groupBytes)
   110  				}
   111  			}
   112  			break
   113  		}
   114  	}
   115  	if reverse {
   116  		reverseBytes(data)
   117  	}
   118  	return b, data, nil
   119  }
   120  
   121  // DecodeBytes decodes bytes which is encoded by EncodeBytes before,
   122  // returns the leftover bytes and decoded value if no error.
   123  func DecodeBytes(b []byte) ([]byte, []byte, error) {
   124  	return decodeBytes(b, false)
   125  }
   126  
   127  // EncodeBytesDesc first encodes bytes using EncodeBytes, then bitwise reverses
   128  // encoded value to guarantee the encoded value is in descending order for comparison.
   129  func EncodeBytesDesc(b []byte, data []byte) []byte {
   130  	n := len(b)
   131  	b = EncodeBytes(b, data)
   132  	reverseBytes(b[n:])
   133  	return b
   134  }
   135  
   136  // DecodeBytesDesc decodes bytes which is encoded by EncodeBytesDesc before,
   137  // returns the leftover bytes and decoded value if no error.
   138  func DecodeBytesDesc(b []byte) ([]byte, []byte, error) {
   139  	return decodeBytes(b, true)
   140  }
   141  
   142  // EncodeCompactBytes joins bytes with its length into a byte slice. It is more
   143  // efficient in both space and time compare to EncodeBytes. Note that the encoded
   144  // result is not memcomparable.
   145  func EncodeCompactBytes(b []byte, data []byte) []byte {
   146  	b = reallocBytes(b, binary.MaxVarintLen64+len(data))
   147  	b = EncodeVarint(b, int64(len(data)))
   148  	return append(b, data...)
   149  }
   150  
   151  // DecodeCompactBytes decodes bytes which is encoded by EncodeCompactBytes before.
   152  func DecodeCompactBytes(b []byte) ([]byte, []byte, error) {
   153  	b, n, err := DecodeVarint(b)
   154  	if err != nil {
   155  		return nil, nil, errors.Trace(err)
   156  	}
   157  	if int64(len(b)) < n {
   158  		return nil, nil, errors.Errorf("insufficient bytes to decode value, expected length: %v", n)
   159  	}
   160  	return b[n:], b[:n], nil
   161  }
   162  
   163  // See https://golang.org/src/crypto/cipher/xor.go
   164  const (
   165  	wordSize          = int(unsafe.Sizeof(uintptr(0)))
   166  	supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64"
   167  )
   168  
   169  func fastReverseBytes(b []byte) {
   170  	n := len(b)
   171  	w := n / wordSize
   172  	if w > 0 {
   173  		bw := *(*[]uintptr)(unsafe.Pointer(&b))
   174  		for i := 0; i < w; i++ {
   175  			bw[i] = ^bw[i]
   176  		}
   177  	}
   178  
   179  	for i := w * wordSize; i < n; i++ {
   180  		b[i] = ^b[i]
   181  	}
   182  }
   183  
   184  func safeReverseBytes(b []byte) {
   185  	for i := range b {
   186  		b[i] = ^b[i]
   187  	}
   188  }
   189  
   190  func reverseBytes(b []byte) {
   191  	if supportsUnaligned {
   192  		fastReverseBytes(b)
   193  		return
   194  	}
   195  
   196  	safeReverseBytes(b)
   197  }
   198  
   199  // reallocBytes is like realloc.
   200  func reallocBytes(b []byte, n int) []byte {
   201  	newSize := len(b) + n
   202  	if cap(b) < newSize {
   203  		bs := make([]byte, len(b), newSize)
   204  		copy(bs, b)
   205  		return bs
   206  	}
   207  
   208  	// slice b has capability to store n bytes
   209  	return b
   210  }