github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/codec/codec.go (about) 1 // Copyright 2022 zGraph Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package codec 16 17 import ( 18 "encoding/binary" 19 "math" 20 "runtime" 21 "unsafe" 22 23 "github.com/pingcap/errors" 24 ) 25 26 const ( 27 encGroupSize = 8 28 encMarker = byte(0xFF) 29 encPad = byte(0x0) 30 signMask uint64 = 0x8000000000000000 31 ) 32 33 var ( 34 pads = make([]byte, encGroupSize) 35 ) 36 37 // reallocBytes is like realloc. 38 func reallocBytes(b []byte, n int) []byte { 39 newSize := len(b) + n 40 if cap(b) < newSize { 41 bs := make([]byte, len(b), newSize) 42 copy(bs, b) 43 return bs 44 } 45 46 // slice b has capability to store n bytes 47 return b 48 } 49 50 // EncodeBytes guarantees the encoded value is in ascending order for comparison, 51 // encoding with the following rule: 52 // 53 // [group1][marker1]...[groupN][markerN] 54 // group is 8 bytes slice which is padding with 0. 55 // marker is `0xFF - padding 0 count` 56 // 57 // For example: 58 // 59 // [] -> [0, 0, 0, 0, 0, 0, 0, 0, 247] 60 // [1, 2, 3] -> [1, 2, 3, 0, 0, 0, 0, 0, 250] 61 // [1, 2, 3, 0] -> [1, 2, 3, 0, 0, 0, 0, 0, 251] 62 // [1, 2, 3, 4, 5, 6, 7, 8] -> [1, 2, 3, 4, 5, 6, 7, 8, 255, 0, 0, 0, 0, 0, 0, 0, 0, 247] 63 // 64 // Refer: https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format 65 func EncodeBytes(b []byte, data []byte) []byte { 66 // Allocate more space to avoid unnecessary slice growing. 67 // Assume that the byte slice size is about `(len(data) / encGroupSize + 1) * (encGroupSize + 1)` bytes, 68 // that is `(len(data) / 8 + 1) * 9` in our implement. 69 dLen := len(data) 70 reallocSize := (dLen/encGroupSize + 1) * (encGroupSize + 1) 71 result := reallocBytes(b, reallocSize) 72 for idx := 0; idx <= dLen; idx += encGroupSize { 73 remain := dLen - idx 74 padCount := 0 75 if remain >= encGroupSize { 76 result = append(result, data[idx:idx+encGroupSize]...) 77 } else { 78 padCount = encGroupSize - remain 79 result = append(result, data[idx:]...) 80 result = append(result, pads[:padCount]...) 81 } 82 83 marker := encMarker - byte(padCount) 84 result = append(result, marker) 85 } 86 87 return result 88 } 89 90 // EncodeUintDesc appends the encoded value to slice b and returns the appended slice. 91 // EncodeUintDesc guarantees that the encoded value is in descending order for comparison. 92 func EncodeUintDesc(b []byte, v uint64) []byte { 93 var data [8]byte 94 binary.BigEndian.PutUint64(data[:], ^v) 95 return append(b, data[:]...) 96 } 97 98 func decodeBytes(b []byte, buf []byte, reverse bool) ([]byte, []byte, error) { 99 if buf == nil { 100 buf = make([]byte, 0, len(b)) 101 } 102 buf = buf[:0] 103 for { 104 if len(b) < encGroupSize+1 { 105 return nil, nil, errors.New("insufficient bytes to decode value") 106 } 107 108 groupBytes := b[:encGroupSize+1] 109 110 group := groupBytes[:encGroupSize] 111 marker := groupBytes[encGroupSize] 112 113 var padCount byte 114 if reverse { 115 padCount = marker 116 } else { 117 padCount = encMarker - marker 118 } 119 if padCount > encGroupSize { 120 return nil, nil, errors.Errorf("invalid marker byte, group bytes %q", groupBytes) 121 } 122 123 realGroupSize := encGroupSize - padCount 124 buf = append(buf, group[:realGroupSize]...) 125 b = b[encGroupSize+1:] 126 127 if padCount != 0 { 128 var padByte = encPad 129 if reverse { 130 padByte = encMarker 131 } 132 // Check validity of padding bytes. 133 for _, v := range group[realGroupSize:] { 134 if v != padByte { 135 return nil, nil, errors.Errorf("invalid padding byte, group bytes %q", groupBytes) 136 } 137 } 138 break 139 } 140 } 141 if reverse { 142 reverseBytes(buf) 143 } 144 return b, buf, nil 145 } 146 147 // DecodeBytes decodes bytes which is encoded by EncodeBytes before, 148 // returns the leftover bytes and decoded value if no error. 149 // `buf` is used to buffer data to avoid the cost of makeslice in decodeBytes when DecodeBytes is called by Decoder.DecodeOne. 150 func DecodeBytes(b []byte, buf []byte) ([]byte, []byte, error) { 151 return decodeBytes(b, buf, false) 152 } 153 154 // See https://golang.org/src/crypto/cipher/xor.go 155 const wordSize = int(unsafe.Sizeof(uintptr(0))) 156 const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" 157 158 func fastReverseBytes(b []byte) { 159 n := len(b) 160 w := n / wordSize 161 if w > 0 { 162 bw := *(*[]uintptr)(unsafe.Pointer(&b)) 163 for i := 0; i < w; i++ { 164 bw[i] = ^bw[i] 165 } 166 } 167 168 for i := w * wordSize; i < n; i++ { 169 b[i] = ^b[i] 170 } 171 } 172 173 func safeReverseBytes(b []byte) { 174 for i := range b { 175 b[i] = ^b[i] 176 } 177 } 178 179 func reverseBytes(b []byte) { 180 if supportsUnaligned { 181 fastReverseBytes(b) 182 return 183 } 184 185 safeReverseBytes(b) 186 } 187 188 // EncodeIntToCmpUint make int v to comparable uint type 189 func EncodeIntToCmpUint(v int64) uint64 { 190 return uint64(v) ^ signMask 191 } 192 193 // DecodeCmpUintToInt decodes the u that encoded by EncodeIntToCmpUint 194 func DecodeCmpUintToInt(u uint64) int64 { 195 return int64(u ^ signMask) 196 } 197 198 // EncodeInt appends the encoded value to slice b and returns the appended slice. 199 // EncodeInt guarantees that the encoded value is in ascending order for comparison. 200 func EncodeInt(b []byte, v int64) []byte { 201 var data [8]byte 202 u := EncodeIntToCmpUint(v) 203 binary.BigEndian.PutUint64(data[:], u) 204 return append(b, data[:]...) 205 } 206 207 // EncodeIntDesc appends the encoded value to slice b and returns the appended slice. 208 // EncodeIntDesc guarantees that the encoded value is in descending order for comparison. 209 func EncodeIntDesc(b []byte, v int64) []byte { 210 var data [8]byte 211 u := EncodeIntToCmpUint(v) 212 binary.BigEndian.PutUint64(data[:], ^u) 213 return append(b, data[:]...) 214 } 215 216 // DecodeInt decodes value encoded by EncodeInt before. 217 // It returns the leftover un-decoded slice, decoded value if no error. 218 func DecodeInt(b []byte) ([]byte, int64, error) { 219 if len(b) < 8 { 220 return nil, 0, errors.New("insufficient bytes to decode value") 221 } 222 223 u := binary.BigEndian.Uint64(b[:8]) 224 v := DecodeCmpUintToInt(u) 225 b = b[8:] 226 return b, v, nil 227 } 228 229 // DecodeIntDesc decodes value encoded by EncodeInt before. 230 // It returns the leftover un-decoded slice, decoded value if no error. 231 func DecodeIntDesc(b []byte) ([]byte, int64, error) { 232 if len(b) < 8 { 233 return nil, 0, errors.New("insufficient bytes to decode value") 234 } 235 236 u := binary.BigEndian.Uint64(b[:8]) 237 v := DecodeCmpUintToInt(^u) 238 b = b[8:] 239 return b, v, nil 240 } 241 242 // EncodeUint appends the encoded value to slice b and returns the appended slice. 243 // EncodeUint guarantees that the encoded value is in ascending order for comparison. 244 func EncodeUint(b []byte, v uint64) []byte { 245 var data [8]byte 246 binary.BigEndian.PutUint64(data[:], v) 247 return append(b, data[:]...) 248 } 249 250 // DecodeUint decodes value encoded by EncodeUint before. 251 // It returns the leftover un-decoded slice, decoded value if no error. 252 func DecodeUint(b []byte) ([]byte, uint64, error) { 253 if len(b) < 8 { 254 return nil, 0, errors.New("insufficient bytes to decode value") 255 } 256 257 v := binary.BigEndian.Uint64(b[:8]) 258 b = b[8:] 259 return b, v, nil 260 } 261 262 // DecodeUintDesc decodes value encoded by EncodeInt before. 263 // It returns the leftover un-decoded slice, decoded value if no error. 264 func DecodeUintDesc(b []byte) ([]byte, uint64, error) { 265 if len(b) < 8 { 266 return nil, 0, errors.New("insufficient bytes to decode value") 267 } 268 269 data := b[:8] 270 v := binary.BigEndian.Uint64(data) 271 b = b[8:] 272 return b, ^v, nil 273 } 274 275 // EncodeVarint appends the encoded value to slice b and returns the appended slice. 276 // Note that the encoded result is not memcomparable. 277 func EncodeVarint(b []byte, v int64) []byte { 278 var data [binary.MaxVarintLen64]byte 279 n := binary.PutVarint(data[:], v) 280 return append(b, data[:n]...) 281 } 282 283 // DecodeVarint decodes value encoded by EncodeVarint before. 284 // It returns the leftover un-decoded slice, decoded value if no error. 285 func DecodeVarint(b []byte) ([]byte, int64, error) { 286 v, n := binary.Varint(b) 287 if n > 0 { 288 return b[n:], v, nil 289 } 290 if n < 0 { 291 return nil, 0, errors.New("value larger than 64 bits") 292 } 293 return nil, 0, errors.New("insufficient bytes to decode value") 294 } 295 296 // EncodeUvarint appends the encoded value to slice b and returns the appended slice. 297 // Note that the encoded result is not memcomparable. 298 func EncodeUvarint(b []byte, v uint64) []byte { 299 var data [binary.MaxVarintLen64]byte 300 n := binary.PutUvarint(data[:], v) 301 return append(b, data[:n]...) 302 } 303 304 // DecodeUvarint decodes value encoded by EncodeUvarint before. 305 // It returns the leftover un-decoded slice, decoded value if no error. 306 func DecodeUvarint(b []byte) ([]byte, uint64, error) { 307 v, n := binary.Uvarint(b) 308 if n > 0 { 309 return b[n:], v, nil 310 } 311 if n < 0 { 312 return nil, 0, errors.New("value larger than 64 bits") 313 } 314 return nil, 0, errors.New("insufficient bytes to decode value") 315 } 316 317 const ( 318 negativeTagEnd = 8 // negative tag is (negativeTagEnd - length). 319 positiveTagStart = 0xff - 8 // Positive tag is (positiveTagStart + length). 320 ) 321 322 // EncodeComparableVarint encodes an int64 to a mem-comparable bytes. 323 func EncodeComparableVarint(b []byte, v int64) []byte { 324 if v < 0 { 325 // All negative value has a tag byte prefix (negativeTagEnd - length). 326 // Smaller negative value encodes to more bytes, has smaller tag. 327 if v >= -0xff { 328 return append(b, negativeTagEnd-1, byte(v)) 329 } else if v >= -0xffff { 330 return append(b, negativeTagEnd-2, byte(v>>8), byte(v)) 331 } else if v >= -0xffffff { 332 return append(b, negativeTagEnd-3, byte(v>>16), byte(v>>8), byte(v)) 333 } else if v >= -0xffffffff { 334 return append(b, negativeTagEnd-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 335 } else if v >= -0xffffffffff { 336 return append(b, negativeTagEnd-5, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 337 } else if v >= -0xffffffffffff { 338 return append(b, negativeTagEnd-6, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), 339 byte(v)) 340 } else if v >= -0xffffffffffffff { 341 return append(b, negativeTagEnd-7, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), 342 byte(v>>8), byte(v)) 343 } 344 return append(b, negativeTagEnd-8, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), 345 byte(v>>16), byte(v>>8), byte(v)) 346 } 347 return EncodeComparableUvarint(b, uint64(v)) 348 } 349 350 // EncodeComparableUvarint encodes uint64 into mem-comparable bytes. 351 func EncodeComparableUvarint(b []byte, v uint64) []byte { 352 // The first byte has 256 values, [0, 7] is reserved for negative tags, 353 // [248, 255] is reserved for larger positive tags, 354 // So we can store value [0, 239] in a single byte. 355 // Values cannot be stored in single byte has a tag byte prefix (positiveTagStart+length). 356 // Larger value encodes to more bytes, has larger tag. 357 if v <= positiveTagStart-negativeTagEnd { 358 return append(b, byte(v)+negativeTagEnd) 359 } else if v <= 0xff { 360 return append(b, positiveTagStart+1, byte(v)) 361 } else if v <= 0xffff { 362 return append(b, positiveTagStart+2, byte(v>>8), byte(v)) 363 } else if v <= 0xffffff { 364 return append(b, positiveTagStart+3, byte(v>>16), byte(v>>8), byte(v)) 365 } else if v <= 0xffffffff { 366 return append(b, positiveTagStart+4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 367 } else if v <= 0xffffffffff { 368 return append(b, positiveTagStart+5, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 369 } else if v <= 0xffffffffffff { 370 return append(b, positiveTagStart+6, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), 371 byte(v)) 372 } else if v <= 0xffffffffffffff { 373 return append(b, positiveTagStart+7, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), 374 byte(v>>8), byte(v)) 375 } 376 return append(b, positiveTagStart+8, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), 377 byte(v>>16), byte(v>>8), byte(v)) 378 } 379 380 var ( 381 errDecodeInsufficient = errors.New("insufficient bytes to decode value") 382 errDecodeInvalid = errors.New("invalid bytes to decode value") 383 ) 384 385 // DecodeComparableUvarint decodes mem-comparable uvarint. 386 func DecodeComparableUvarint(b []byte) ([]byte, uint64, error) { 387 if len(b) == 0 { 388 return nil, 0, errDecodeInsufficient 389 } 390 first := b[0] 391 b = b[1:] 392 if first < negativeTagEnd { 393 return nil, 0, errors.WithStack(errDecodeInvalid) 394 } 395 if first <= positiveTagStart { 396 return b, uint64(first) - negativeTagEnd, nil 397 } 398 length := int(first) - positiveTagStart 399 if len(b) < length { 400 return nil, 0, errors.WithStack(errDecodeInsufficient) 401 } 402 var v uint64 403 for _, c := range b[:length] { 404 v = (v << 8) | uint64(c) 405 } 406 return b[length:], v, nil 407 } 408 409 // DecodeComparableVarint decodes mem-comparable varint. 410 func DecodeComparableVarint(b []byte) ([]byte, int64, error) { 411 if len(b) == 0 { 412 return nil, 0, errors.WithStack(errDecodeInsufficient) 413 } 414 first := b[0] 415 if first >= negativeTagEnd && first <= positiveTagStart { 416 return b, int64(first) - negativeTagEnd, nil 417 } 418 b = b[1:] 419 var length int 420 var v uint64 421 if first < negativeTagEnd { 422 length = negativeTagEnd - int(first) 423 v = math.MaxUint64 // negative value has all bits on by default. 424 } else { 425 length = int(first) - positiveTagStart 426 } 427 if len(b) < length { 428 return nil, 0, errors.WithStack(errDecodeInsufficient) 429 } 430 for _, c := range b[:length] { 431 v = (v << 8) | uint64(c) 432 } 433 if first > positiveTagStart && v > math.MaxInt64 { 434 return nil, 0, errors.WithStack(errDecodeInvalid) 435 } else if first < negativeTagEnd && v <= math.MaxInt64 { 436 return nil, 0, errors.WithStack(errDecodeInvalid) 437 } 438 return b[length:], int64(v), nil 439 } 440 441 // EncodedBytesLength returns the length of data after encoded 442 func EncodedBytesLength(dataLen int) int { 443 mod := dataLen % encGroupSize 444 padCount := encGroupSize - mod 445 return dataLen + padCount + 1 + dataLen/encGroupSize 446 } 447 448 // EncodeBytesDesc first encodes bytes using EncodeBytes, then bitwise reverses 449 // encoded value to guarantee the encoded value is in descending order for comparison. 450 func EncodeBytesDesc(b []byte, data []byte) []byte { 451 n := len(b) 452 b = EncodeBytes(b, data) 453 reverseBytes(b[n:]) 454 return b 455 } 456 457 // DecodeBytesDesc decodes bytes which is encoded by EncodeBytesDesc before, 458 // returns the leftover bytes and decoded value if no error. 459 func DecodeBytesDesc(b []byte, buf []byte) ([]byte, []byte, error) { 460 return decodeBytes(b, buf, true) 461 } 462 463 // EncodeBytesExt is an extension of `EncodeBytes`, which will not encode for `isRawKv = true` but just append `data` to `b`. 464 func EncodeBytesExt(b []byte, data []byte, isRawKv bool) []byte { 465 if isRawKv { 466 return append(b, data...) 467 } 468 return EncodeBytes(b, data) 469 } 470 471 func encodeFloatToCmpUint64(f float64) uint64 { 472 u := math.Float64bits(f) 473 if f >= 0 { 474 u |= signMask 475 } else { 476 u = ^u 477 } 478 return u 479 } 480 481 func decodeCmpUintToFloat(u uint64) float64 { 482 if u&signMask > 0 { 483 u &= ^signMask 484 } else { 485 u = ^u 486 } 487 return math.Float64frombits(u) 488 } 489 490 // EncodeFloat encodes a float v into a byte slice which can be sorted lexicographically later. 491 // EncodeFloat guarantees that the encoded value is in ascending order for comparison. 492 func EncodeFloat(b []byte, v float64) []byte { 493 u := encodeFloatToCmpUint64(v) 494 return EncodeUint(b, u) 495 } 496 497 // DecodeFloat decodes a float from a byte slice generated with EncodeFloat before. 498 func DecodeFloat(b []byte) ([]byte, float64, error) { 499 b, u, err := DecodeUint(b) 500 return b, decodeCmpUintToFloat(u), errors.Trace(err) 501 } 502 503 // EncodeFloatDesc encodes a float v into a byte slice which can be sorted lexicographically later. 504 // EncodeFloatDesc guarantees that the encoded value is in descending order for comparison. 505 func EncodeFloatDesc(b []byte, v float64) []byte { 506 u := encodeFloatToCmpUint64(v) 507 return EncodeUintDesc(b, u) 508 } 509 510 // DecodeFloatDesc decodes a float from a byte slice generated with EncodeFloatDesc before. 511 func DecodeFloatDesc(b []byte) ([]byte, float64, error) { 512 b, u, err := DecodeUintDesc(b) 513 return b, decodeCmpUintToFloat(u), errors.Trace(err) 514 }