github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/soliton/chunk/codec.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package chunk 15 16 import ( 17 "encoding/binary" 18 "reflect" 19 "unsafe" 20 21 "github.com/cznic/mathutil" 22 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 23 "github.com/whtcorpsinc/milevadb/types" 24 ) 25 26 // Codec is used to: 27 // 1. encode a Chunk to a byte slice. 28 // 2. decode a Chunk from a byte slice. 29 type Codec struct { 30 // defCausTypes is used to check whether a DeferredCauset is fixed sized and what the 31 // fixed size for every element. 32 // NOTE: It's only used for decoding. 33 defCausTypes []*types.FieldType 34 } 35 36 // NewCodec creates a new Codec object for encode or decode a Chunk. 37 func NewCodec(defCausTypes []*types.FieldType) *Codec { 38 return &Codec{defCausTypes} 39 } 40 41 // Encode encodes a Chunk to a byte slice. 42 func (c *Codec) Encode(chk *Chunk) []byte { 43 buffer := make([]byte, 0, chk.MemoryUsage()) 44 for _, defCaus := range chk.defCausumns { 45 buffer = c.encodeDeferredCauset(buffer, defCaus) 46 } 47 return buffer 48 } 49 50 func (c *Codec) encodeDeferredCauset(buffer []byte, defCaus *DeferredCauset) []byte { 51 var lenBuffer [4]byte 52 // encode length. 53 binary.LittleEndian.PutUint32(lenBuffer[:], uint32(defCaus.length)) 54 buffer = append(buffer, lenBuffer[:4]...) 55 56 // encode nullCount. 57 binary.LittleEndian.PutUint32(lenBuffer[:], uint32(defCaus.nullCount())) 58 buffer = append(buffer, lenBuffer[:4]...) 59 60 // encode nullBitmap. 61 if defCaus.nullCount() > 0 { 62 numNullBitmapBytes := (defCaus.length + 7) / 8 63 buffer = append(buffer, defCaus.nullBitmap[:numNullBitmapBytes]...) 64 } 65 66 // encode offsets. 67 if !defCaus.isFixed() { 68 numOffsetBytes := (defCaus.length + 1) * 8 69 offsetBytes := i64SliceToBytes(defCaus.offsets) 70 buffer = append(buffer, offsetBytes[:numOffsetBytes]...) 71 } 72 73 // encode data. 74 buffer = append(buffer, defCaus.data...) 75 return buffer 76 } 77 78 func i64SliceToBytes(i64s []int64) (b []byte) { 79 if len(i64s) == 0 { 80 return nil 81 } 82 hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b)) 83 hdr.Len = len(i64s) * 8 84 hdr.Cap = hdr.Len 85 hdr.Data = uintptr(unsafe.Pointer(&i64s[0])) 86 return b 87 } 88 89 // Decode decodes a Chunk from a byte slice, return the remained unused bytes. 90 func (c *Codec) Decode(buffer []byte) (*Chunk, []byte) { 91 chk := &Chunk{} 92 for ordinal := 0; len(buffer) > 0; ordinal++ { 93 defCaus := &DeferredCauset{} 94 buffer = c.decodeDeferredCauset(buffer, defCaus, ordinal) 95 chk.defCausumns = append(chk.defCausumns, defCaus) 96 } 97 return chk, buffer 98 } 99 100 // DecodeToChunk decodes a Chunk from a byte slice, return the remained unused bytes. 101 func (c *Codec) DecodeToChunk(buffer []byte, chk *Chunk) (remained []byte) { 102 for i := 0; i < len(chk.defCausumns); i++ { 103 buffer = c.decodeDeferredCauset(buffer, chk.defCausumns[i], i) 104 } 105 return buffer 106 } 107 108 // decodeDeferredCauset decodes a DeferredCauset from a byte slice, return the remained unused bytes. 109 func (c *Codec) decodeDeferredCauset(buffer []byte, defCaus *DeferredCauset, ordinal int) (remained []byte) { 110 // Todo(Shenghui Wu): Optimize all data is null. 111 // decode length. 112 defCaus.length = int(binary.LittleEndian.Uint32(buffer)) 113 buffer = buffer[4:] 114 115 // decode nullCount. 116 nullCount := int(binary.LittleEndian.Uint32(buffer)) 117 buffer = buffer[4:] 118 119 // decode nullBitmap. 120 if nullCount > 0 { 121 numNullBitmapBytes := (defCaus.length + 7) / 8 122 defCaus.nullBitmap = buffer[:numNullBitmapBytes:numNullBitmapBytes] 123 buffer = buffer[numNullBitmapBytes:] 124 } else { 125 c.setAllNotNull(defCaus) 126 } 127 128 // decode offsets. 129 numFixedBytes := getFixedLen(c.defCausTypes[ordinal]) 130 numDataBytes := int64(numFixedBytes * defCaus.length) 131 if numFixedBytes == -1 { 132 numOffsetBytes := (defCaus.length + 1) * 8 133 defCaus.offsets = bytesToI64Slice(buffer[:numOffsetBytes:numOffsetBytes]) 134 buffer = buffer[numOffsetBytes:] 135 numDataBytes = defCaus.offsets[defCaus.length] 136 } else if cap(defCaus.elemBuf) < numFixedBytes { 137 defCaus.elemBuf = make([]byte, numFixedBytes) 138 } 139 140 // decode data. 141 defCaus.data = buffer[:numDataBytes:numDataBytes] 142 return buffer[numDataBytes:] 143 } 144 145 var allNotNullBitmap [128]byte 146 147 func (c *Codec) setAllNotNull(defCaus *DeferredCauset) { 148 numNullBitmapBytes := (defCaus.length + 7) / 8 149 defCaus.nullBitmap = defCaus.nullBitmap[:0] 150 for i := 0; i < numNullBitmapBytes; { 151 numAppendBytes := mathutil.Min(numNullBitmapBytes-i, cap(allNotNullBitmap)) 152 defCaus.nullBitmap = append(defCaus.nullBitmap, allNotNullBitmap[:numAppendBytes]...) 153 i += numAppendBytes 154 } 155 } 156 157 func bytesToI64Slice(b []byte) (i64s []int64) { 158 if len(b) == 0 { 159 return nil 160 } 161 hdr := (*reflect.SliceHeader)(unsafe.Pointer(&i64s)) 162 hdr.Len = len(b) / 8 163 hdr.Cap = hdr.Len 164 hdr.Data = uintptr(unsafe.Pointer(&b[0])) 165 return i64s 166 } 167 168 // varElemLen indicates this DeferredCauset is a variable length DeferredCauset. 169 const varElemLen = -1 170 171 func getFixedLen(defCausType *types.FieldType) int { 172 switch defCausType.Tp { 173 case allegrosql.TypeFloat: 174 return 4 175 case allegrosql.TypeTiny, allegrosql.TypeShort, allegrosql.TypeInt24, allegrosql.TypeLong, 176 allegrosql.TypeLonglong, allegrosql.TypeDouble, allegrosql.TypeYear, allegrosql.TypeDuration: 177 return 8 178 case allegrosql.TypeDate, allegrosql.TypeDatetime, allegrosql.TypeTimestamp: 179 return sizeTime 180 case allegrosql.TypeNewDecimal: 181 return types.MyDecimalStructSize 182 default: 183 return varElemLen 184 } 185 } 186 187 // GetFixedLen get the memory size of a fixed-length type. 188 // if defCausType is not fixed-length, it returns varElemLen, aka -1. 189 func GetFixedLen(defCausType *types.FieldType) int { 190 return getFixedLen(defCausType) 191 } 192 193 // EstimateTypeWidth estimates the average width of values of the type. 194 // This is used by the causet, which doesn't require absolutely correct results; 195 // it's OK (and expected) to guess if we don't know for sure. 196 // 197 // mostly study from https://github.com/postgres/postgres/blob/REL_12_STABLE/src/backend/utils/cache/lsyscache.c#L2356 198 func EstimateTypeWidth(defCausType *types.FieldType) int { 199 defCausLen := getFixedLen(defCausType) 200 // Easy if it's a fixed-width type 201 if defCausLen != varElemLen { 202 return defCausLen 203 } 204 205 defCausLen = defCausType.Flen 206 if defCausLen > 0 { 207 if defCausLen <= 32 { 208 return defCausLen 209 } 210 if defCausLen < 1000 { 211 return 32 + (defCausLen-32)/2 // assume 50% 212 } 213 /* 214 * Beyond 1000, assume we're looking at something like 215 * "varchar(10000)" where the limit isn't actually reached often, and 216 * use a fixed estimate. 217 */ 218 return 32 + (1000-32)/2 219 } 220 // Oops, we have no idea ... wild guess time. 221 return 32 222 } 223 224 func init() { 225 for i := 0; i < 128; i++ { 226 allNotNullBitmap[i] = 0xFF 227 } 228 } 229 230 // CausetDecoder decodes the data returned from the interlock and stores the result in Chunk. 231 // How CausetDecoder works: 232 // 1. Initialization phase: Decode a whole input byte slice to CausetDecoder.intermChk(intermediate chunk) using Codec.Decode. 233 // intermChk is introduced to simplify the implementation of decode phase. This phase uses pointer operations with 234 // less CPU and memory cost. 235 // 2. Decode phase: 236 // 2.1 Set the number of rows to be decoded to a value that is a multiple of 8 and greater than 237 // `chk.RequiredRows() - chk.NumRows()`. This reduces the overhead of copying the srcDefCaus.nullBitMap into 238 // destDefCaus.nullBitMap. 239 // 2.2 Append srcDefCaus.offsets to destDefCaus.offsets when the elements is of var-length type. And further adjust the 240 // offsets according to descDefCaus.offsets[destDefCaus.length]-srcDefCaus.offsets[0]. 241 // 2.3 Append srcDefCaus.nullBitMap to destDefCaus.nullBitMap. 242 // 3. Go to step 1 when the input byte slice is consumed. 243 type CausetDecoder struct { 244 intermChk *Chunk 245 codec *Codec 246 remainedRows int 247 } 248 249 // NewCausetDecoder creates a new CausetDecoder object for decode a Chunk. 250 func NewCausetDecoder(chk *Chunk, defCausTypes []*types.FieldType) *CausetDecoder { 251 return &CausetDecoder{intermChk: chk, codec: NewCodec(defCausTypes), remainedRows: 0} 252 } 253 254 // Decode decodes multiple rows of CausetDecoder.intermChk and stores the result in chk. 255 func (c *CausetDecoder) Decode(chk *Chunk) { 256 requiredRows := chk.RequiredRows() - chk.NumRows() 257 // Set the requiredRows to a multiple of 8. 258 requiredRows = (requiredRows + 7) >> 3 << 3 259 if requiredRows > c.remainedRows { 260 requiredRows = c.remainedRows 261 } 262 for i := 0; i < chk.NumDefCauss(); i++ { 263 c.decodeDeferredCauset(chk, i, requiredRows) 264 } 265 c.remainedRows -= requiredRows 266 } 267 268 // Reset decodes data and causetstore the result in CausetDecoder.intermChk. This decode phase uses pointer operations with less 269 // CPU and memory costs. 270 func (c *CausetDecoder) Reset(data []byte) { 271 c.codec.DecodeToChunk(data, c.intermChk) 272 c.remainedRows = c.intermChk.NumRows() 273 } 274 275 // IsFinished indicates whether CausetDecoder.intermChk has been dried up. 276 func (c *CausetDecoder) IsFinished() bool { 277 return c.remainedRows == 0 278 } 279 280 // RemainedRows indicates CausetDecoder.intermChk has remained rows. 281 func (c *CausetDecoder) RemainedRows() int { 282 return c.remainedRows 283 } 284 285 // ReuseIntermChk swaps `CausetDecoder.intermChk` with `chk` directly when `CausetDecoder.intermChk.NumRows()` is no less 286 // than `chk.requiredRows * factor` where `factor` is 0.8 now. This can avoid the overhead of appending the 287 // data from `CausetDecoder.intermChk` to `chk`. Moreover, the defCausumn.offsets needs to be further adjusted 288 // according to defCausumn.offset[0]. 289 func (c *CausetDecoder) ReuseIntermChk(chk *Chunk) { 290 for i, defCaus := range c.intermChk.defCausumns { 291 defCaus.length = c.remainedRows 292 elemLen := getFixedLen(c.codec.defCausTypes[i]) 293 if elemLen == varElemLen { 294 // For var-length types, we need to adjust the offsets before reuse. 295 if deltaOffset := defCaus.offsets[0]; deltaOffset != 0 { 296 for j := 0; j < len(defCaus.offsets); j++ { 297 defCaus.offsets[j] -= deltaOffset 298 } 299 } 300 } 301 } 302 chk.SwapDeferredCausets(c.intermChk) 303 c.remainedRows = 0 304 } 305 306 func (c *CausetDecoder) decodeDeferredCauset(chk *Chunk, ordinal int, requiredRows int) { 307 elemLen := getFixedLen(c.codec.defCausTypes[ordinal]) 308 numDataBytes := int64(elemLen * requiredRows) 309 srcDefCaus := c.intermChk.defCausumns[ordinal] 310 destDefCaus := chk.defCausumns[ordinal] 311 312 if elemLen == varElemLen { 313 // For var-length types, we need to adjust the offsets after appending to destDefCaus. 314 numDataBytes = srcDefCaus.offsets[requiredRows] - srcDefCaus.offsets[0] 315 deltaOffset := destDefCaus.offsets[destDefCaus.length] - srcDefCaus.offsets[0] 316 destDefCaus.offsets = append(destDefCaus.offsets, srcDefCaus.offsets[1:requiredRows+1]...) 317 for i := destDefCaus.length + 1; i <= destDefCaus.length+requiredRows; i++ { 318 destDefCaus.offsets[i] = destDefCaus.offsets[i] + deltaOffset 319 } 320 srcDefCaus.offsets = srcDefCaus.offsets[requiredRows:] 321 } 322 323 numNullBitmapBytes := (requiredRows + 7) >> 3 324 if destDefCaus.length%8 == 0 { 325 destDefCaus.nullBitmap = append(destDefCaus.nullBitmap, srcDefCaus.nullBitmap[:numNullBitmapBytes]...) 326 } else { 327 destDefCaus.appendMultiSameNullBitmap(false, requiredRows) 328 bitMapLen := len(destDefCaus.nullBitmap) 329 // bitOffset indicates the number of valid bits in destDefCaus.nullBitmap's last byte. 330 bitOffset := destDefCaus.length % 8 331 startIdx := (destDefCaus.length - 1) >> 3 332 for i := 0; i < numNullBitmapBytes; i++ { 333 destDefCaus.nullBitmap[startIdx+i] |= srcDefCaus.nullBitmap[i] << bitOffset 334 // The high order 8-bitOffset bits in `srcDefCaus.nullBitmap[i]` should be appended to the low order of the next slot. 335 if startIdx+i+1 < bitMapLen { 336 destDefCaus.nullBitmap[startIdx+i+1] |= srcDefCaus.nullBitmap[i] >> (8 - bitOffset) 337 } 338 } 339 } 340 // Set all the redundant bits in the last slot of destDefCaus.nullBitmap to 0. 341 numRedundantBits := uint(len(destDefCaus.nullBitmap)*8 - destDefCaus.length - requiredRows) 342 bitMask := byte(1<<(8-numRedundantBits)) - 1 343 destDefCaus.nullBitmap[len(destDefCaus.nullBitmap)-1] &= bitMask 344 345 srcDefCaus.nullBitmap = srcDefCaus.nullBitmap[numNullBitmapBytes:] 346 destDefCaus.length += requiredRows 347 348 destDefCaus.data = append(destDefCaus.data, srcDefCaus.data[:numDataBytes]...) 349 srcDefCaus.data = srcDefCaus.data[numDataBytes:] 350 }