github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/soliton/codec/bytes.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package codec 15 16 import ( 17 "encoding/binary" 18 "runtime" 19 "unsafe" 20 21 "github.com/whtcorpsinc/errors" 22 ) 23 24 const ( 25 encGroupSize = 8 26 encMarker = byte(0xFF) 27 encPad = byte(0x0) 28 ) 29 30 var ( 31 pads = make([]byte, encGroupSize) 32 ) 33 34 // EncodeBytes guarantees the encoded value is in ascending order for comparison, 35 // encoding with the following rule: 36 // [group1][marker1]...[groupN][markerN] 37 // group is 8 bytes slice which is padding with 0. 38 // marker is `0xFF - padding 0 count` 39 // For example: 40 // [] -> [0, 0, 0, 0, 0, 0, 0, 0, 247] 41 // [1, 2, 3] -> [1, 2, 3, 0, 0, 0, 0, 0, 250] 42 // [1, 2, 3, 0] -> [1, 2, 3, 0, 0, 0, 0, 0, 251] 43 // [1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247] 44 // Refer: https://github.com/facebook/allegrosql-5.6/wiki/MyRocks-record-format#memcomparable-format 45 func EncodeBytes(b []byte, data []byte) []byte { 46 // Allocate more space to avoid unnecessary slice growing. 47 // Assume that the byte slice size is about `(len(data) / encGroupSize + 1) * (encGroupSize + 1)` bytes, 48 // that is `(len(data) / 8 + 1) * 9` in our implement. 49 dLen := len(data) 50 reallocSize := (dLen/encGroupSize + 1) * (encGroupSize + 1) 51 result := reallocBytes(b, reallocSize) 52 for idx := 0; idx <= dLen; idx += encGroupSize { 53 remain := dLen - idx 54 padCount := 0 55 if remain >= encGroupSize { 56 result = append(result, data[idx:idx+encGroupSize]...) 57 } else { 58 padCount = encGroupSize - remain 59 result = append(result, data[idx:]...) 60 result = append(result, pads[:padCount]...) 61 } 62 63 marker := encMarker - byte(padCount) 64 result = append(result, marker) 65 } 66 67 return result 68 } 69 70 // EncodedBytesLength returns the length of data after encoded 71 func EncodedBytesLength(dataLen int) int { 72 mod := dataLen % encGroupSize 73 padCount := encGroupSize - mod 74 return dataLen + padCount + 1 + dataLen/encGroupSize 75 } 76 77 func decodeBytes(b []byte, buf []byte, reverse bool) ([]byte, []byte, error) { 78 if buf == nil { 79 buf = make([]byte, 0, len(b)) 80 } 81 buf = buf[:0] 82 for { 83 if len(b) < encGroupSize+1 { 84 return nil, nil, errors.New("insufficient bytes to decode value") 85 } 86 87 groupBytes := b[:encGroupSize+1] 88 89 group := groupBytes[:encGroupSize] 90 marker := groupBytes[encGroupSize] 91 92 var padCount byte 93 if reverse { 94 padCount = marker 95 } else { 96 padCount = encMarker - marker 97 } 98 if padCount > encGroupSize { 99 return nil, nil, errors.Errorf("invalid marker byte, group bytes %q", groupBytes) 100 } 101 102 realGroupSize := encGroupSize - padCount 103 buf = append(buf, group[:realGroupSize]...) 104 b = b[encGroupSize+1:] 105 106 if padCount != 0 { 107 var padByte = encPad 108 if reverse { 109 padByte = encMarker 110 } 111 // Check validity of padding bytes. 112 for _, v := range group[realGroupSize:] { 113 if v != padByte { 114 return nil, nil, errors.Errorf("invalid padding byte, group bytes %q", groupBytes) 115 } 116 } 117 break 118 } 119 } 120 if reverse { 121 reverseBytes(buf) 122 } 123 return b, buf, nil 124 } 125 126 // DecodeBytes decodes bytes which is encoded by EncodeBytes before, 127 // returns the leftover bytes and decoded value if no error. 128 // `buf` is used to buffer data to avoid the cost of makeslice in decodeBytes when DecodeBytes is called by CausetDecoder.DecodeOne. 129 func DecodeBytes(b []byte, buf []byte) ([]byte, []byte, error) { 130 return decodeBytes(b, buf, false) 131 } 132 133 // EncodeBytesDesc first encodes bytes using EncodeBytes, then bitwise reverses 134 // encoded value to guarantee the encoded value is in descending order for comparison. 135 func EncodeBytesDesc(b []byte, data []byte) []byte { 136 n := len(b) 137 b = EncodeBytes(b, data) 138 reverseBytes(b[n:]) 139 return b 140 } 141 142 // DecodeBytesDesc decodes bytes which is encoded by EncodeBytesDesc before, 143 // returns the leftover bytes and decoded value if no error. 144 func DecodeBytesDesc(b []byte, buf []byte) ([]byte, []byte, error) { 145 return decodeBytes(b, buf, true) 146 } 147 148 // EncodeCompactBytes joins bytes with its length into a byte slice. It is more 149 // efficient in both space and time compare to EncodeBytes. Note that the encoded 150 // result is not memcomparable. 151 func EncodeCompactBytes(b []byte, data []byte) []byte { 152 b = reallocBytes(b, binary.MaxVarintLen64+len(data)) 153 b = EncodeVarint(b, int64(len(data))) 154 return append(b, data...) 155 } 156 157 // DecodeCompactBytes decodes bytes which is encoded by EncodeCompactBytes before. 158 func DecodeCompactBytes(b []byte) ([]byte, []byte, error) { 159 b, n, err := DecodeVarint(b) 160 if err != nil { 161 return nil, nil, errors.Trace(err) 162 } 163 if int64(len(b)) < n { 164 return nil, nil, errors.Errorf("insufficient bytes to decode value, expected length: %v", n) 165 } 166 return b[n:], b[:n], nil 167 } 168 169 // See https://golang.org/src/crypto/cipher/xor.go 170 const wordSize = int(unsafe.Sizeof(uintptr(0))) 171 const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" 172 173 func fastReverseBytes(b []byte) { 174 n := len(b) 175 w := n / wordSize 176 if w > 0 { 177 bw := *(*[]uintptr)(unsafe.Pointer(&b)) 178 for i := 0; i < w; i++ { 179 bw[i] = ^bw[i] 180 } 181 } 182 183 for i := w * wordSize; i < n; i++ { 184 b[i] = ^b[i] 185 } 186 } 187 188 func safeReverseBytes(b []byte) { 189 for i := range b { 190 b[i] = ^b[i] 191 } 192 } 193 194 func reverseBytes(b []byte) { 195 if supportsUnaligned { 196 fastReverseBytes(b) 197 return 198 } 199 200 safeReverseBytes(b) 201 } 202 203 // reallocBytes is like realloc. 204 func reallocBytes(b []byte, n int) []byte { 205 newSize := len(b) + n 206 if cap(b) < newSize { 207 bs := make([]byte, len(b), newSize) 208 copy(bs, b) 209 return bs 210 } 211 212 // slice b has capability to causetstore n bytes 213 return b 214 }