github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/util/encoding/encoding.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package encoding 12 13 import ( 14 "bytes" 15 "encoding/binary" 16 "encoding/hex" 17 "fmt" 18 "math" 19 "reflect" 20 "strconv" 21 "strings" 22 "time" 23 "unicode" 24 "unicode/utf8" 25 "unsafe" 26 27 "github.com/cockroachdb/apd" 28 "github.com/cockroachdb/cockroach/pkg/geo/geopb" 29 "github.com/cockroachdb/cockroach/pkg/util/bitarray" 30 "github.com/cockroachdb/cockroach/pkg/util/duration" 31 "github.com/cockroachdb/cockroach/pkg/util/ipaddr" 32 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 33 "github.com/cockroachdb/cockroach/pkg/util/timeofday" 34 "github.com/cockroachdb/cockroach/pkg/util/timetz" 35 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 36 "github.com/cockroachdb/cockroach/pkg/util/uuid" 37 "github.com/cockroachdb/errors" 38 ) 39 40 const ( 41 encodedNull = 0x00 42 // A marker greater than NULL but lower than any other value. 43 // This value is not actually ever present in a stored key, but 44 // it's used in keys used as span boundaries for index scans. 45 encodedNotNull = 0x01 46 47 floatNaN = encodedNotNull + 1 48 floatNeg = floatNaN + 1 49 floatZero = floatNeg + 1 50 floatPos = floatZero + 1 51 floatNaNDesc = floatPos + 1 // NaN encoded descendingly 52 53 // The gap between floatNaNDesc and bytesMarker was left for 54 // compatibility reasons. 55 bytesMarker byte = 0x12 56 bytesDescMarker byte = bytesMarker + 1 57 timeMarker byte = bytesDescMarker + 1 58 durationBigNegMarker byte = timeMarker + 1 // Only used for durations < MinInt64 nanos. 59 durationMarker byte = durationBigNegMarker + 1 60 durationBigPosMarker byte = durationMarker + 1 // Only used for durations > MaxInt64 nanos. 61 62 decimalNaN = durationBigPosMarker + 1 // 24 63 decimalNegativeInfinity = decimalNaN + 1 64 decimalNegLarge = decimalNegativeInfinity + 1 65 decimalNegMedium = decimalNegLarge + 11 66 decimalNegSmall = decimalNegMedium + 1 67 decimalZero = decimalNegSmall + 1 68 decimalPosSmall = decimalZero + 1 69 decimalPosMedium = decimalPosSmall + 1 70 decimalPosLarge = decimalPosMedium + 11 71 decimalInfinity = decimalPosLarge + 1 72 decimalNaNDesc = decimalInfinity + 1 // NaN encoded descendingly 73 decimalTerminator = 0x00 74 75 jsonInvertedIndex = decimalNaNDesc + 1 76 jsonEmptyArray = jsonInvertedIndex + 1 77 jsonEmptyObject = jsonEmptyArray + 1 78 79 bitArrayMarker = jsonEmptyObject + 1 80 bitArrayDescMarker = bitArrayMarker + 1 81 bitArrayDataTerminator = 0x00 82 bitArrayDataDescTerminator = 0xff 83 84 timeTZMarker = bitArrayDescMarker + 1 85 geoMarker = timeTZMarker + 1 86 geoDescMarker = geoMarker + 1 87 88 // Markers and terminators for key encoding Datum arrays in sorted order. 89 // For the arrayKeyMarker and other types like bytes and bit arrays, it 90 // might be unclear why we have a separate marker for the ascending and 91 // descending cases. This is necessary because the terminators for these 92 // encodings are different depending on the direction the data is encoded 93 // in. In order to safely decode a set of bytes without knowing the direction 94 // of the encoding, we must store this information in the marker. Otherwise, 95 // we would not know what terminator to look for when decoding this format. 96 arrayKeyMarker = geoDescMarker + 1 97 arrayKeyDescendingMarker = arrayKeyMarker + 1 98 arrayKeyTerminator byte = 0x00 99 arrayKeyDescendingTerminator byte = 0xFF 100 // We use different null encodings for nulls within key arrays. 101 // Doing this allows for the terminator to be less/greater than 102 // the null value within arrays. These byte values overlap with 103 // encodedNotNull, encodedNotNullDesc, and interleavedSentinel, 104 // but they can only exist within an encoded array key. Because 105 // of the context, they cannot be ambiguous with these other bytes. 106 ascendingNullWithinArrayKey byte = 0x01 107 descendingNullWithinArrayKey byte = 0xFE 108 109 // IntMin is chosen such that the range of int tags does not overlap the 110 // ascii character set that is frequently used in testing. 111 IntMin = 0x80 // 128 112 intMaxWidth = 8 113 intZero = IntMin + intMaxWidth // 136 114 intSmall = IntMax - intZero - intMaxWidth // 109 115 // IntMax is the maximum int tag value. 116 IntMax = 0xfd // 253 117 118 // Nulls come last when encoded descendingly. 119 // This value is not actually ever present in a stored key, but 120 // it's used in keys used as span boundaries for index scans. 121 encodedNotNullDesc = 0xfe 122 // interleavedSentinel uses the same byte as encodedNotNullDesc. 123 // It is used in the key encoding of interleaved index keys in order 124 // to coerce the key to sort after its respective parent and ancestors' 125 // index keys. 126 // The byte for NotNullDesc was chosen over NullDesc since NotNullDesc 127 // is never used in actual encoded keys. 128 // This allowed the key pretty printer for interleaved keys to work 129 // without table descriptors. 130 interleavedSentinel = 0xfe 131 encodedNullDesc = 0xff 132 133 // offsetSecsToMicros is a constant that allows conversion from seconds 134 // to microseconds for offsetSecs type calculations (e.g. for TimeTZ). 135 offsetSecsToMicros = 1000000 136 ) 137 138 const ( 139 // EncodedDurationMaxLen is the largest number of bytes used when encoding a 140 // Duration. 141 EncodedDurationMaxLen = 1 + 3*binary.MaxVarintLen64 // 3 varints are encoded. 142 // EncodedTimeTZMaxLen is the largest number of bytes used when encoding a 143 // TimeTZ. 144 EncodedTimeTZMaxLen = 1 + binary.MaxVarintLen64 + binary.MaxVarintLen32 145 ) 146 147 // Direction for ordering results. 148 type Direction int 149 150 // Direction values. 151 const ( 152 _ Direction = iota 153 Ascending 154 Descending 155 ) 156 157 const escapeLength = 2 158 159 // EncodeUint32Ascending encodes the uint32 value using a big-endian 4 byte 160 // representation. The bytes are appended to the supplied buffer and 161 // the final buffer is returned. 162 func EncodeUint32Ascending(b []byte, v uint32) []byte { 163 return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 164 } 165 166 // PutUint32Ascending encodes the uint32 value using a big-endian 4 byte 167 // representation at the specified index, lengthening the input slice if 168 // necessary. 169 func PutUint32Ascending(b []byte, v uint32, idx int) []byte { 170 for len(b) < idx+4 { 171 b = append(b, 0) 172 } 173 b[idx] = byte(v >> 24) 174 b[idx+1] = byte(v >> 16) 175 b[idx+2] = byte(v >> 8) 176 b[idx+3] = byte(v) 177 return b 178 } 179 180 // EncodeUint32Descending encodes the uint32 value so that it sorts in 181 // reverse order, from largest to smallest. 182 func EncodeUint32Descending(b []byte, v uint32) []byte { 183 return EncodeUint32Ascending(b, ^v) 184 } 185 186 // DecodeUint32Ascending decodes a uint32 from the input buffer, treating 187 // the input as a big-endian 4 byte uint32 representation. The remainder 188 // of the input buffer and the decoded uint32 are returned. 189 func DecodeUint32Ascending(b []byte) ([]byte, uint32, error) { 190 if len(b) < 4 { 191 return nil, 0, errors.Errorf("insufficient bytes to decode uint32 int value") 192 } 193 v := binary.BigEndian.Uint32(b) 194 return b[4:], v, nil 195 } 196 197 // DecodeUint32Descending decodes a uint32 value which was encoded 198 // using EncodeUint32Descending. 199 func DecodeUint32Descending(b []byte) ([]byte, uint32, error) { 200 leftover, v, err := DecodeUint32Ascending(b) 201 return leftover, ^v, err 202 } 203 204 const uint64AscendingEncodedLength = 8 205 206 // EncodeUint64Ascending encodes the uint64 value using a big-endian 8 byte 207 // representation. The bytes are appended to the supplied buffer and 208 // the final buffer is returned. 209 func EncodeUint64Ascending(b []byte, v uint64) []byte { 210 return append(b, 211 byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), 212 byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 213 } 214 215 // EncodeUint64Descending encodes the uint64 value so that it sorts in 216 // reverse order, from largest to smallest. 217 func EncodeUint64Descending(b []byte, v uint64) []byte { 218 return EncodeUint64Ascending(b, ^v) 219 } 220 221 // DecodeUint64Ascending decodes a uint64 from the input buffer, treating 222 // the input as a big-endian 8 byte uint64 representation. The remainder 223 // of the input buffer and the decoded uint64 are returned. 224 func DecodeUint64Ascending(b []byte) ([]byte, uint64, error) { 225 if len(b) < 8 { 226 return nil, 0, errors.Errorf("insufficient bytes to decode uint64 int value") 227 } 228 v := binary.BigEndian.Uint64(b) 229 return b[8:], v, nil 230 } 231 232 // DecodeUint64Descending decodes a uint64 value which was encoded 233 // using EncodeUint64Descending. 234 func DecodeUint64Descending(b []byte) ([]byte, uint64, error) { 235 leftover, v, err := DecodeUint64Ascending(b) 236 return leftover, ^v, err 237 } 238 239 const ( 240 maxVarintSize = 9 241 ) 242 243 // EncodeVarintAscending encodes the int64 value using a variable length 244 // (length-prefixed) representation. The length is encoded as a single 245 // byte. If the value to be encoded is negative the length is encoded 246 // as 8-numBytes. If the value is positive it is encoded as 247 // 8+numBytes. The encoded bytes are appended to the supplied buffer 248 // and the final buffer is returned. 249 func EncodeVarintAscending(b []byte, v int64) []byte { 250 if v < 0 { 251 switch { 252 case v >= -0xff: 253 return append(b, IntMin+7, byte(v)) 254 case v >= -0xffff: 255 return append(b, IntMin+6, byte(v>>8), byte(v)) 256 case v >= -0xffffff: 257 return append(b, IntMin+5, byte(v>>16), byte(v>>8), byte(v)) 258 case v >= -0xffffffff: 259 return append(b, IntMin+4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 260 case v >= -0xffffffffff: 261 return append(b, IntMin+3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), 262 byte(v)) 263 case v >= -0xffffffffffff: 264 return append(b, IntMin+2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), 265 byte(v>>8), byte(v)) 266 case v >= -0xffffffffffffff: 267 return append(b, IntMin+1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), 268 byte(v>>16), byte(v>>8), byte(v)) 269 default: 270 return append(b, IntMin, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), 271 byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 272 } 273 } 274 return EncodeUvarintAscending(b, uint64(v)) 275 } 276 277 // EncodeVarintDescending encodes the int64 value so that it sorts in reverse 278 // order, from largest to smallest. 279 func EncodeVarintDescending(b []byte, v int64) []byte { 280 return EncodeVarintAscending(b, ^v) 281 } 282 283 // getVarintLen returns the encoded length of an encoded varint. Assumes the 284 // slice has at least one byte. 285 func getVarintLen(b []byte) (int, error) { 286 length := int(b[0]) - intZero 287 if length >= 0 { 288 if length <= intSmall { 289 // just the tag 290 return 1, nil 291 } 292 // tag and length-intSmall bytes 293 length = 1 + length - intSmall 294 } else { 295 // tag and -length bytes 296 length = 1 - length 297 } 298 299 if length > len(b) { 300 return 0, errors.Errorf("varint length %d exceeds slice length %d", length, len(b)) 301 } 302 return length, nil 303 } 304 305 // DecodeVarintAscending decodes a value encoded by EncodeVaringAscending. 306 func DecodeVarintAscending(b []byte) ([]byte, int64, error) { 307 if len(b) == 0 { 308 return nil, 0, errors.Errorf("insufficient bytes to decode uvarint value") 309 } 310 length := int(b[0]) - intZero 311 if length < 0 { 312 length = -length 313 remB := b[1:] 314 if len(remB) < length { 315 return nil, 0, errors.Errorf("insufficient bytes to decode uvarint value: %q", remB) 316 } 317 var v int64 318 // Use the ones-complement of each encoded byte in order to build 319 // up a positive number, then take the ones-complement again to 320 // arrive at our negative value. 321 for _, t := range remB[:length] { 322 v = (v << 8) | int64(^t) 323 } 324 return remB[length:], ^v, nil 325 } 326 327 remB, v, err := DecodeUvarintAscending(b) 328 if err != nil { 329 return remB, 0, err 330 } 331 if v > math.MaxInt64 { 332 return nil, 0, errors.Errorf("varint %d overflows int64", v) 333 } 334 return remB, int64(v), nil 335 } 336 337 // DecodeVarintDescending decodes a uint64 value which was encoded 338 // using EncodeVarintDescending. 339 func DecodeVarintDescending(b []byte) ([]byte, int64, error) { 340 leftover, v, err := DecodeVarintAscending(b) 341 return leftover, ^v, err 342 } 343 344 // EncodeUvarintAscending encodes the uint64 value using a variable length 345 // (length-prefixed) representation. The length is encoded as a single 346 // byte indicating the number of encoded bytes (-8) to follow. See 347 // EncodeVarintAscending for rationale. The encoded bytes are appended to the 348 // supplied buffer and the final buffer is returned. 349 func EncodeUvarintAscending(b []byte, v uint64) []byte { 350 switch { 351 case v <= intSmall: 352 return append(b, intZero+byte(v)) 353 case v <= 0xff: 354 return append(b, IntMax-7, byte(v)) 355 case v <= 0xffff: 356 return append(b, IntMax-6, byte(v>>8), byte(v)) 357 case v <= 0xffffff: 358 return append(b, IntMax-5, byte(v>>16), byte(v>>8), byte(v)) 359 case v <= 0xffffffff: 360 return append(b, IntMax-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 361 case v <= 0xffffffffff: 362 return append(b, IntMax-3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), 363 byte(v)) 364 case v <= 0xffffffffffff: 365 return append(b, IntMax-2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), 366 byte(v>>8), byte(v)) 367 case v <= 0xffffffffffffff: 368 return append(b, IntMax-1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), 369 byte(v>>16), byte(v>>8), byte(v)) 370 default: 371 return append(b, IntMax, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), 372 byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 373 } 374 } 375 376 // EncodeUvarintDescending encodes the uint64 value so that it sorts in 377 // reverse order, from largest to smallest. 378 func EncodeUvarintDescending(b []byte, v uint64) []byte { 379 switch { 380 case v == 0: 381 return append(b, IntMin+8) 382 case v <= 0xff: 383 v = ^v 384 return append(b, IntMin+7, byte(v)) 385 case v <= 0xffff: 386 v = ^v 387 return append(b, IntMin+6, byte(v>>8), byte(v)) 388 case v <= 0xffffff: 389 v = ^v 390 return append(b, IntMin+5, byte(v>>16), byte(v>>8), byte(v)) 391 case v <= 0xffffffff: 392 v = ^v 393 return append(b, IntMin+4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 394 case v <= 0xffffffffff: 395 v = ^v 396 return append(b, IntMin+3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), 397 byte(v)) 398 case v <= 0xffffffffffff: 399 v = ^v 400 return append(b, IntMin+2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), 401 byte(v>>8), byte(v)) 402 case v <= 0xffffffffffffff: 403 v = ^v 404 return append(b, IntMin+1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), 405 byte(v>>16), byte(v>>8), byte(v)) 406 default: 407 v = ^v 408 return append(b, IntMin, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), 409 byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) 410 } 411 } 412 413 // highestByteIndex returns the index (0 to 7) of the highest nonzero byte in v. 414 func highestByteIndex(v uint64) int { 415 l := 0 416 if v > 0xffffffff { 417 v >>= 32 418 l += 4 419 } 420 if v > 0xffff { 421 v >>= 16 422 l += 2 423 } 424 if v > 0xff { 425 l++ 426 } 427 return l 428 } 429 430 // EncLenUvarintAscending returns the encoding length for EncodeUvarintAscending 431 // without actually encoding. 432 func EncLenUvarintAscending(v uint64) int { 433 if v <= intSmall { 434 return 1 435 } 436 return 2 + highestByteIndex(v) 437 } 438 439 // EncLenUvarintDescending returns the encoding length for 440 // EncodeUvarintDescending without actually encoding. 441 func EncLenUvarintDescending(v uint64) int { 442 if v == 0 { 443 return 1 444 } 445 return 2 + highestByteIndex(v) 446 } 447 448 // DecodeUvarintAscending decodes a varint encoded uint64 from the input 449 // buffer. The remainder of the input buffer and the decoded uint64 450 // are returned. 451 func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) { 452 if len(b) == 0 { 453 return nil, 0, errors.Errorf("insufficient bytes to decode uvarint value") 454 } 455 length := int(b[0]) - intZero 456 b = b[1:] // skip length byte 457 if length <= intSmall { 458 return b, uint64(length), nil 459 } 460 length -= intSmall 461 if length < 0 || length > 8 { 462 return nil, 0, errors.Errorf("invalid uvarint length of %d", length) 463 } else if len(b) < length { 464 return nil, 0, errors.Errorf("insufficient bytes to decode uvarint value: %q", b) 465 } 466 var v uint64 467 // It is faster to range over the elements in a slice than to index 468 // into the slice on each loop iteration. 469 for _, t := range b[:length] { 470 v = (v << 8) | uint64(t) 471 } 472 return b[length:], v, nil 473 } 474 475 // DecodeUvarintDescending decodes a uint64 value which was encoded 476 // using EncodeUvarintDescending. 477 func DecodeUvarintDescending(b []byte) ([]byte, uint64, error) { 478 if len(b) == 0 { 479 return nil, 0, errors.Errorf("insufficient bytes to decode uvarint value") 480 } 481 length := intZero - int(b[0]) 482 b = b[1:] // skip length byte 483 if length < 0 || length > 8 { 484 return nil, 0, errors.Errorf("invalid uvarint length of %d", length) 485 } else if len(b) < length { 486 return nil, 0, errors.Errorf("insufficient bytes to decode uvarint value: %q", b) 487 } 488 var x uint64 489 for _, t := range b[:length] { 490 x = (x << 8) | uint64(^t) 491 } 492 return b[length:], x, nil 493 } 494 495 const ( 496 // <term> -> \x00\x01 497 // \x00 -> \x00\xff 498 escape byte = 0x00 499 escapedTerm byte = 0x01 500 escapedJSONObjectKeyTerm byte = 0x02 501 escapedJSONArray byte = 0x03 502 escaped00 byte = 0xff 503 escapedFF byte = 0x00 504 ) 505 506 type escapes struct { 507 escape byte 508 escapedTerm byte 509 escaped00 byte 510 escapedFF byte 511 marker byte 512 } 513 514 var ( 515 ascendingBytesEscapes = escapes{escape, escapedTerm, escaped00, escapedFF, bytesMarker} 516 descendingBytesEscapes = escapes{^escape, ^escapedTerm, ^escaped00, ^escapedFF, bytesDescMarker} 517 518 ascendingGeoEscapes = escapes{escape, escapedTerm, escaped00, escapedFF, geoMarker} 519 descendingGeoEscapes = escapes{^escape, ^escapedTerm, ^escaped00, ^escapedFF, geoDescMarker} 520 ) 521 522 // EncodeBytesAscending encodes the []byte value using an escape-based 523 // encoding. The encoded value is terminated with the sequence 524 // "\x00\x01" which is guaranteed to not occur elsewhere in the 525 // encoded value. The encoded bytes are append to the supplied buffer 526 // and the resulting buffer is returned. 527 func EncodeBytesAscending(b []byte, data []byte) []byte { 528 return encodeBytesAscendingWithTerminatorAndPrefix(b, data, ascendingBytesEscapes.escapedTerm, bytesMarker) 529 } 530 531 // encodeBytesAscendingWithTerminatorAndPrefix encodes the []byte value using an escape-based 532 // encoding. The encoded value is terminated with the sequence 533 // "\x00\terminator". The encoded bytes are append to the supplied buffer 534 // and the resulting buffer is returned. The terminator allows us to pass 535 // different terminators for things such as JSON key encoding. 536 func encodeBytesAscendingWithTerminatorAndPrefix( 537 b []byte, data []byte, terminator byte, prefix byte, 538 ) []byte { 539 b = append(b, prefix) 540 return encodeBytesAscendingWithTerminator(b, data, terminator) 541 } 542 543 // encodeBytesAscendingWithTerminator encodes the []byte value using an escape-based 544 // encoding. The encoded value is terminated with the sequence 545 // "\x00\terminator". The encoded bytes are append to the supplied buffer 546 // and the resulting buffer is returned. The terminator allows us to pass 547 // different terminators for things such as JSON key encoding. 548 func encodeBytesAscendingWithTerminator(b []byte, data []byte, terminator byte) []byte { 549 bs := encodeBytesAscendingWithoutTerminatorOrPrefix(b, data) 550 return append(bs, escape, terminator) 551 } 552 553 // encodeBytesAscendingWithoutTerminatorOrPrefix encodes the []byte value using an escape-based 554 // encoding. 555 func encodeBytesAscendingWithoutTerminatorOrPrefix(b []byte, data []byte) []byte { 556 for { 557 // IndexByte is implemented by the go runtime in assembly and is 558 // much faster than looping over the bytes in the slice. 559 i := bytes.IndexByte(data, escape) 560 if i == -1 { 561 break 562 } 563 b = append(b, data[:i]...) 564 b = append(b, escape, escaped00) 565 data = data[i+1:] 566 } 567 return append(b, data...) 568 } 569 570 // EncodeBytesDescending encodes the []byte value using an 571 // escape-based encoding and then inverts (ones complement) the result 572 // so that it sorts in reverse order, from larger to smaller 573 // lexicographically. 574 func EncodeBytesDescending(b []byte, data []byte) []byte { 575 n := len(b) 576 b = EncodeBytesAscending(b, data) 577 b[n] = bytesDescMarker 578 onesComplement(b[n+1:]) 579 return b 580 } 581 582 // DecodeBytesAscending decodes a []byte value from the input buffer 583 // which was encoded using EncodeBytesAscending. The decoded bytes 584 // are appended to r. The remainder of the input buffer and the 585 // decoded []byte are returned. 586 func DecodeBytesAscending(b []byte, r []byte) ([]byte, []byte, error) { 587 return decodeBytesInternal(b, r, ascendingBytesEscapes, true) 588 } 589 590 // DecodeBytesDescending decodes a []byte value from the input buffer 591 // which was encoded using EncodeBytesDescending. The decoded bytes 592 // are appended to r. The remainder of the input buffer and the 593 // decoded []byte are returned. 594 func DecodeBytesDescending(b []byte, r []byte) ([]byte, []byte, error) { 595 // Always pass an `r` to make sure we never get back a sub-slice of `b`, 596 // since we're going to modify the contents of the slice. 597 if r == nil { 598 r = []byte{} 599 } 600 b, r, err := decodeBytesInternal(b, r, descendingBytesEscapes, true) 601 onesComplement(r) 602 return b, r, err 603 } 604 605 func decodeBytesInternal(b []byte, r []byte, e escapes, expectMarker bool) ([]byte, []byte, error) { 606 if expectMarker { 607 if len(b) == 0 || b[0] != e.marker { 608 return nil, nil, errors.Errorf("did not find marker %#x in buffer %#x", e.marker, b) 609 } 610 b = b[1:] 611 } 612 613 for { 614 i := bytes.IndexByte(b, e.escape) 615 if i == -1 { 616 return nil, nil, errors.Errorf("did not find terminator %#x in buffer %#x", e.escape, b) 617 } 618 if i+1 >= len(b) { 619 return nil, nil, errors.Errorf("malformed escape in buffer %#x", b) 620 } 621 v := b[i+1] 622 if v == e.escapedTerm { 623 if r == nil { 624 r = b[:i] 625 } else { 626 r = append(r, b[:i]...) 627 } 628 return b[i+2:], r, nil 629 } 630 631 if v != e.escaped00 { 632 return nil, nil, errors.Errorf("unknown escape sequence: %#x %#x", e.escape, v) 633 } 634 635 r = append(r, b[:i]...) 636 r = append(r, e.escapedFF) 637 b = b[i+2:] 638 } 639 } 640 641 // getBytesLength finds the length of a bytes encoding. 642 func getBytesLength(b []byte, e escapes) (int, error) { 643 // Skip the tag. 644 skipped := 1 645 for { 646 i := bytes.IndexByte(b[skipped:], e.escape) 647 if i == -1 { 648 return 0, errors.Errorf("did not find terminator %#x in buffer %#x", e.escape, b) 649 } 650 if i+1 >= len(b) { 651 return 0, errors.Errorf("malformed escape in buffer %#x", b) 652 } 653 skipped += i + escapeLength 654 if b[skipped-1] == e.escapedTerm { 655 return skipped, nil 656 } 657 } 658 } 659 660 // prettyPrintInvertedIndexKey returns a string representation of the path part of a JSON inverted 661 // index. 662 func prettyPrintInvertedIndexKey(b []byte) (string, []byte, error) { 663 outBytes := "" 664 // We're skipping the first byte because it's the JSON tag. 665 tempB := b[1:] 666 for { 667 i := bytes.IndexByte(tempB, escape) 668 669 if i == -1 { 670 return "", nil, errors.Errorf("did not find terminator %#x in buffer %#x", escape, b) 671 } 672 if i+1 >= len(tempB) { 673 return "", nil, errors.Errorf("malformed escape in buffer %#x", b) 674 } 675 676 switch tempB[i+1] { 677 case escapedTerm: 678 if len(tempB[:i]) > 0 { 679 outBytes = outBytes + strconv.Quote(unsafeString(tempB[:i])) 680 } else { 681 lenOut := len(outBytes) 682 if lenOut > 1 && outBytes[lenOut-1] == '/' { 683 outBytes = outBytes[:lenOut-1] 684 } 685 } 686 return outBytes, tempB[i+escapeLength:], nil 687 case escapedJSONObjectKeyTerm: 688 outBytes = outBytes + strconv.Quote(unsafeString(tempB[:i])) + "/" 689 case escapedJSONArray: 690 outBytes = outBytes + "Arr/" 691 default: 692 return "", nil, errors.Errorf("malformed escape in buffer %#x", b) 693 694 } 695 696 tempB = tempB[i+escapeLength:] 697 } 698 } 699 700 // UnsafeConvertStringToBytes converts a string to a byte array to be used with 701 // string encoding functions. Note that the output byte array should not be 702 // modified if the input string is expected to be used again - doing so could 703 // violate Go semantics. 704 func UnsafeConvertStringToBytes(s string) []byte { 705 if len(s) == 0 { 706 return nil 707 } 708 // We unsafely convert the string to a []byte to avoid the 709 // usual allocation when converting to a []byte. This is 710 // kosher because we know that EncodeBytes{,Descending} does 711 // not keep a reference to the value it encodes. The first 712 // step is getting access to the string internals. 713 hdr := (*reflect.StringHeader)(unsafe.Pointer(&s)) 714 // Next we treat the string data as a maximally sized array which we 715 // slice. This usage is safe because the pointer value remains in the string. 716 return (*[0x7fffffff]byte)(unsafe.Pointer(hdr.Data))[:len(s):len(s)] 717 } 718 719 // EncodeStringAscending encodes the string value using an escape-based encoding. See 720 // EncodeBytes for details. The encoded bytes are append to the supplied buffer 721 // and the resulting buffer is returned. 722 func EncodeStringAscending(b []byte, s string) []byte { 723 return encodeStringAscendingWithTerminatorAndPrefix(b, s, ascendingBytesEscapes.escapedTerm, bytesMarker) 724 } 725 726 // encodeStringAscendingWithTerminatorAndPrefix encodes the string value using an escape-based encoding. See 727 // EncodeBytes for details. The encoded bytes are append to the supplied buffer 728 // and the resulting buffer is returned. We can also pass a terminator byte to be used with 729 // JSON key encoding. 730 func encodeStringAscendingWithTerminatorAndPrefix( 731 b []byte, s string, terminator byte, prefix byte, 732 ) []byte { 733 unsafeString := UnsafeConvertStringToBytes(s) 734 return encodeBytesAscendingWithTerminatorAndPrefix(b, unsafeString, terminator, prefix) 735 } 736 737 // EncodeJSONKeyStringAscending encodes the JSON key string value with a JSON specific escaped 738 // terminator. This allows us to encode keys in the same number of bytes as a string, 739 // while at the same time giving us a sentinel to identify JSON keys. The end parameter is used 740 // to determine if this is the last key in a a JSON path. If it is we don't add a separator after it. 741 func EncodeJSONKeyStringAscending(b []byte, s string, end bool) []byte { 742 str := UnsafeConvertStringToBytes(s) 743 744 if end { 745 return encodeBytesAscendingWithoutTerminatorOrPrefix(b, str) 746 } 747 return encodeBytesAscendingWithTerminator(b, str, escapedJSONObjectKeyTerm) 748 } 749 750 // EncodeJSONEmptyArray returns a byte array b with a byte to signify an empty JSON array. 751 func EncodeJSONEmptyArray(b []byte) []byte { 752 return append(b, escape, escapedTerm, jsonEmptyArray) 753 } 754 755 // AddJSONPathTerminator adds a json path terminator to a byte array. 756 func AddJSONPathTerminator(b []byte) []byte { 757 return append(b, escape, escapedTerm) 758 } 759 760 // EncodeJSONEmptyObject returns a byte array b with a byte to signify an empty JSON object. 761 func EncodeJSONEmptyObject(b []byte) []byte { 762 return append(b, escape, escapedTerm, jsonEmptyObject) 763 } 764 765 // EncodeStringDescending is the descending version of EncodeStringAscending. 766 func EncodeStringDescending(b []byte, s string) []byte { 767 if len(s) == 0 { 768 return EncodeBytesDescending(b, nil) 769 } 770 // We unsafely convert the string to a []byte to avoid the 771 // usual allocation when converting to a []byte. This is 772 // kosher because we know that EncodeBytes{,Descending} does 773 // not keep a reference to the value it encodes. The first 774 // step is getting access to the string internals. 775 hdr := (*reflect.StringHeader)(unsafe.Pointer(&s)) 776 // Next we treat the string data as a maximally sized array which we 777 // slice. This usage is safe because the pointer value remains in the string. 778 arg := (*[0x7fffffff]byte)(unsafe.Pointer(hdr.Data))[:len(s):len(s)] 779 return EncodeBytesDescending(b, arg) 780 } 781 782 // unsafeString performs an unsafe conversion from a []byte to a string. The 783 // returned string will share the underlying memory with the []byte which thus 784 // allows the string to be mutable through the []byte. We're careful to use 785 // this method only in situations in which the []byte will not be modified. 786 func unsafeString(b []byte) string { 787 return *(*string)(unsafe.Pointer(&b)) 788 } 789 790 // DecodeUnsafeStringAscending decodes a string value from the input buffer which was 791 // encoded using EncodeString or EncodeBytes. The r []byte is used as a 792 // temporary buffer in order to avoid memory allocations. The remainder of the 793 // input buffer and the decoded string are returned. Note that the returned 794 // string may share storage with the input buffer. 795 func DecodeUnsafeStringAscending(b []byte, r []byte) ([]byte, string, error) { 796 b, r, err := DecodeBytesAscending(b, r) 797 return b, unsafeString(r), err 798 } 799 800 // DecodeUnsafeStringDescending decodes a string value from the input buffer which 801 // was encoded using EncodeStringDescending or EncodeBytesDescending. The r 802 // []byte is used as a temporary buffer in order to avoid memory 803 // allocations. The remainder of the input buffer and the decoded string are 804 // returned. Note that the returned string may share storage with the input 805 // buffer. 806 func DecodeUnsafeStringDescending(b []byte, r []byte) ([]byte, string, error) { 807 b, r, err := DecodeBytesDescending(b, r) 808 return b, unsafeString(r), err 809 } 810 811 // EncodeNullAscending encodes a NULL value. The encodes bytes are appended to the 812 // supplied buffer and the final buffer is returned. The encoded value for a 813 // NULL is guaranteed to not be a prefix for the EncodeVarint, EncodeFloat, 814 // EncodeBytes and EncodeString encodings. 815 func EncodeNullAscending(b []byte) []byte { 816 return append(b, encodedNull) 817 } 818 819 // EncodeJSONAscending encodes a JSON Type. The encoded bytes are appended to the 820 // supplied buffer and the final buffer is returned. 821 func EncodeJSONAscending(b []byte) []byte { 822 return append(b, jsonInvertedIndex) 823 } 824 825 // EncodeNullDescending is the descending equivalent of EncodeNullAscending. 826 func EncodeNullDescending(b []byte) []byte { 827 return append(b, encodedNullDesc) 828 } 829 830 // EncodeNotNullAscending encodes a value that is larger than the NULL marker encoded by 831 // EncodeNull but less than any encoded value returned by EncodeVarint, 832 // EncodeFloat, EncodeBytes or EncodeString. 833 func EncodeNotNullAscending(b []byte) []byte { 834 return append(b, encodedNotNull) 835 } 836 837 // EncodeArrayAscending encodes a value used to signify membership of an array for JSON objects. 838 func EncodeArrayAscending(b []byte) []byte { 839 return append(b, escape, escapedJSONArray) 840 } 841 842 // EncodeTrueAscending encodes the boolean value true for use with JSON inverted indexes. 843 func EncodeTrueAscending(b []byte) []byte { 844 return append(b, byte(True)) 845 } 846 847 // EncodeFalseAscending encodes the boolean value false for use with JSON inverted indexes. 848 func EncodeFalseAscending(b []byte) []byte { 849 return append(b, byte(False)) 850 } 851 852 // EncodeNotNullDescending is the descending equivalent of EncodeNotNullAscending. 853 func EncodeNotNullDescending(b []byte) []byte { 854 return append(b, encodedNotNullDesc) 855 } 856 857 // EncodeInterleavedSentinel encodes an interleavedSentinel that is necessary 858 // for interleaved indexes and their index keys. 859 // The interleavedSentinel has a byte value 0xfe and is equivalent to 860 // encodedNotNullDesc. 861 func EncodeInterleavedSentinel(b []byte) []byte { 862 return append(b, interleavedSentinel) 863 } 864 865 // DecodeIfNull decodes a NULL value from the input buffer. If the input buffer 866 // contains a null at the start of the buffer then it is removed from the 867 // buffer and true is returned for the second result. Otherwise, the buffer is 868 // returned unchanged and false is returned for the second result. Since the 869 // NULL value encoding is guaranteed to never occur as the prefix for the 870 // EncodeVarint, EncodeFloat, EncodeBytes and EncodeString encodings, it is 871 // safe to call DecodeIfNull on their encoded values. 872 // This function handles both ascendingly and descendingly encoded NULLs. 873 func DecodeIfNull(b []byte) ([]byte, bool) { 874 if PeekType(b) == Null { 875 return b[1:], true 876 } 877 return b, false 878 } 879 880 // DecodeIfNotNull decodes a not-NULL value from the input buffer. If the input 881 // buffer contains a not-NULL marker at the start of the buffer then it is 882 // removed from the buffer and true is returned for the second 883 // result. Otherwise, the buffer is returned unchanged and false is returned 884 // for the second result. Note that the not-NULL marker is identical to the 885 // empty string encoding, so do not use this routine where it is necessary to 886 // distinguish not-NULL from the empty string. 887 // This function handles both ascendingly and descendingly encoded NULLs. 888 func DecodeIfNotNull(b []byte) ([]byte, bool) { 889 if PeekType(b) == NotNull { 890 return b[1:], true 891 } 892 return b, false 893 } 894 895 // DecodeIfNotNullDescending decodes encodedNotNullDesc from the input buffer 896 // and returns the remaining buffer without the sentinel if encodedNotNullDesc 897 // is the first byte. 898 // Otherwise, the buffer is returned unchanged and false is returned. 899 func DecodeIfNotNullDescending(b []byte) ([]byte, bool) { 900 if len(b) == 0 { 901 return b, false 902 } 903 904 if b[0] == encodedNotNullDesc { 905 return b[1:], true 906 } 907 908 return b, false 909 } 910 911 // DecodeIfInterleavedSentinel decodes the interleavedSentinel from the input 912 // buffer and returns the remaining buffer without the sentinel if the 913 // interleavedSentinel is the first byte. 914 // Otherwise, the buffer is returned unchanged and false is returned. 915 func DecodeIfInterleavedSentinel(b []byte) ([]byte, bool) { 916 // The interleavedSentinel is equivalent to encodedNotNullDesc 917 return DecodeIfNotNullDescending(b) 918 } 919 920 // EncodeTimeAscending encodes a time value, appends it to the supplied buffer, 921 // and returns the final buffer. The encoding is guaranteed to be ordered 922 // Such that if t1.Before(t2) then after EncodeTime(b1, t1), and 923 // EncodeTime(b2, t2), Compare(b1, b2) < 0. The time zone offset not 924 // included in the encoding. 925 func EncodeTimeAscending(b []byte, t time.Time) []byte { 926 return encodeTime(b, t.Unix(), int64(t.Nanosecond())) 927 } 928 929 // EncodeTimeDescending is the descending version of EncodeTimeAscending. 930 func EncodeTimeDescending(b []byte, t time.Time) []byte { 931 return encodeTime(b, ^t.Unix(), ^int64(t.Nanosecond())) 932 } 933 934 func encodeTime(b []byte, unix, nanos int64) []byte { 935 // Read the unix absolute time. This is the absolute time and is 936 // not time zone offset dependent. 937 b = append(b, timeMarker) 938 b = EncodeVarintAscending(b, unix) 939 b = EncodeVarintAscending(b, nanos) 940 return b 941 } 942 943 // DecodeTimeAscending decodes a time.Time value which was encoded using 944 // EncodeTime. The remainder of the input buffer and the decoded 945 // time.Time are returned. 946 func DecodeTimeAscending(b []byte) ([]byte, time.Time, error) { 947 b, sec, nsec, err := decodeTime(b) 948 if err != nil { 949 return b, time.Time{}, err 950 } 951 return b, timeutil.Unix(sec, nsec), nil 952 } 953 954 // DecodeTimeDescending is the descending version of DecodeTimeAscending. 955 func DecodeTimeDescending(b []byte) ([]byte, time.Time, error) { 956 b, sec, nsec, err := decodeTime(b) 957 if err != nil { 958 return b, time.Time{}, err 959 } 960 return b, timeutil.Unix(^sec, ^nsec), nil 961 } 962 963 func decodeTime(b []byte) (r []byte, sec int64, nsec int64, err error) { 964 if PeekType(b) != Time { 965 return nil, 0, 0, errors.Errorf("did not find marker") 966 } 967 b = b[1:] 968 b, sec, err = DecodeVarintAscending(b) 969 if err != nil { 970 return b, 0, 0, err 971 } 972 b, nsec, err = DecodeVarintAscending(b) 973 if err != nil { 974 return b, 0, 0, err 975 } 976 return b, sec, nsec, nil 977 } 978 979 // EncodeGeoAscending encodes a geopb.SpatialObject value in ascending order and 980 // returns the new buffer. 981 // TODO(otan): this should ideally just be encoded by {SRID,Shape,Raw Points}. 982 // EWKB is expensive to encode. However, we don't store this as a PRIMARY KEY 983 // (this is needed for GROUP BY only for now), so we ignore it for now. 984 func EncodeGeoAscending(b []byte, g *geopb.SpatialObject) ([]byte, error) { 985 data, err := protoutil.Marshal(g) 986 if err != nil { 987 return nil, err 988 } 989 b = encodeBytesAscendingWithTerminatorAndPrefix(b, data, ascendingGeoEscapes.escapedTerm, geoMarker) 990 return b, nil 991 } 992 993 // EncodeGeoDescending encodes a geopb.SpatialObject value in descending order and 994 // returns the new buffer. 995 func EncodeGeoDescending(b []byte, g *geopb.SpatialObject) ([]byte, error) { 996 n := len(b) 997 var err error 998 b, err = EncodeGeoAscending(b, g) 999 if err != nil { 1000 return nil, err 1001 } 1002 b[n] = geoDescMarker 1003 onesComplement(b[n+1:]) 1004 return b, nil 1005 } 1006 1007 // DecodeGeoAscending decodes a geopb.SpatialObject value that was encoded 1008 // in ascending order back into a geopb.SpatialObject. 1009 func DecodeGeoAscending(b []byte) ([]byte, geopb.SpatialObject, error) { 1010 var pbBytes []byte 1011 var ret geopb.SpatialObject 1012 var err error 1013 b, pbBytes, err = decodeBytesInternal(b, pbBytes, ascendingGeoEscapes, true) 1014 if err != nil { 1015 return b, ret, err 1016 } 1017 err = protoutil.Unmarshal(pbBytes, &ret) 1018 return b, ret, err 1019 } 1020 1021 // DecodeGeoDescending decodes a geopb.SpatialObject value that was encoded 1022 // in descending order back into a geopb.SpatialObject. 1023 func DecodeGeoDescending(b []byte) ([]byte, geopb.SpatialObject, error) { 1024 var pbBytes []byte 1025 var ret geopb.SpatialObject 1026 var err error 1027 b, pbBytes, err = decodeBytesInternal(b, pbBytes, descendingGeoEscapes, true) 1028 if err != nil { 1029 return b, ret, err 1030 } 1031 onesComplement(pbBytes) 1032 err = protoutil.Unmarshal(pbBytes, &ret) 1033 return b, ret, err 1034 } 1035 1036 // EncodeTimeTZAscending encodes a timetz.TimeTZ value and appends it to 1037 // the supplied buffer and returns the final buffer. 1038 // The encoding is guaranteed to be ordered such that if t1.Before(t2) 1039 // then after encodeTimeTZ(b1, t1) and encodeTimeTZ(b2, t2), 1040 // Compare(b1, b2) < 0. 1041 // The time zone offset is included in the encoding. 1042 func EncodeTimeTZAscending(b []byte, t timetz.TimeTZ) []byte { 1043 // Do not use TimeOfDay's add function, as it loses 24:00:00 encoding. 1044 return encodeTimeTZ(b, int64(t.TimeOfDay)+int64(t.OffsetSecs)*offsetSecsToMicros, t.OffsetSecs) 1045 } 1046 1047 // EncodeTimeTZDescending is the descending version of EncodeTimeTZAscending. 1048 func EncodeTimeTZDescending(b []byte, t timetz.TimeTZ) []byte { 1049 // Do not use TimeOfDay's add function, as it loses 24:00:00 encoding. 1050 return encodeTimeTZ(b, ^(int64(t.TimeOfDay) + int64(t.OffsetSecs)*offsetSecsToMicros), ^t.OffsetSecs) 1051 } 1052 1053 func encodeTimeTZ(b []byte, unixMicros int64, offsetSecs int32) []byte { 1054 b = append(b, timeTZMarker) 1055 b = EncodeVarintAscending(b, unixMicros) 1056 b = EncodeVarintAscending(b, int64(offsetSecs)) 1057 return b 1058 } 1059 1060 // DecodeTimeTZAscending decodes a timetz.TimeTZ value which was encoded 1061 // using encodeTimeTZ. The remainder of the input buffer and the decoded 1062 // timetz.TimeTZ are returned. 1063 func DecodeTimeTZAscending(b []byte) ([]byte, timetz.TimeTZ, error) { 1064 b, unixMicros, offsetSecs, err := decodeTimeTZ(b) 1065 if err != nil { 1066 return nil, timetz.TimeTZ{}, err 1067 } 1068 // Do not use timeofday.FromInt, as it loses 24:00:00 encoding. 1069 return b, timetz.TimeTZ{ 1070 TimeOfDay: timeofday.TimeOfDay(unixMicros - int64(offsetSecs)*offsetSecsToMicros), 1071 OffsetSecs: offsetSecs, 1072 }, nil 1073 } 1074 1075 // DecodeTimeTZDescending is the descending version of DecodeTimeTZAscending. 1076 func DecodeTimeTZDescending(b []byte) ([]byte, timetz.TimeTZ, error) { 1077 b, unixMicros, offsetSecs, err := decodeTimeTZ(b) 1078 if err != nil { 1079 return nil, timetz.TimeTZ{}, err 1080 } 1081 // Do not use timeofday.FromInt, as it loses 24:00:00 encoding. 1082 return b, timetz.TimeTZ{ 1083 TimeOfDay: timeofday.TimeOfDay(^unixMicros - int64(^offsetSecs)*offsetSecsToMicros), 1084 OffsetSecs: ^offsetSecs, 1085 }, nil 1086 } 1087 1088 func decodeTimeTZ(b []byte) ([]byte, int64, int32, error) { 1089 if PeekType(b) != TimeTZ { 1090 return nil, 0, 0, errors.Errorf("did not find marker") 1091 } 1092 b = b[1:] 1093 var err error 1094 var unixMicros int64 1095 b, unixMicros, err = DecodeVarintAscending(b) 1096 if err != nil { 1097 return nil, 0, 0, err 1098 } 1099 var offsetSecs int64 1100 b, offsetSecs, err = DecodeVarintAscending(b) 1101 if err != nil { 1102 return nil, 0, 0, err 1103 } 1104 return b, unixMicros, int32(offsetSecs), nil 1105 } 1106 1107 // EncodeDurationAscending encodes a duration.Duration value, appends it to the 1108 // supplied buffer, and returns the final buffer. The encoding is guaranteed to 1109 // be ordered such that if t1.Compare(t2) < 0 (or = 0 or > 0) then bytes.Compare 1110 // will order them the same way after encoding. 1111 func EncodeDurationAscending(b []byte, d duration.Duration) ([]byte, error) { 1112 sortNanos, months, days, err := d.Encode() 1113 if err != nil { 1114 // TODO(dan): Handle this using d.EncodeBigInt() and the 1115 // durationBigNeg/durationBigPos markers. 1116 return b, err 1117 } 1118 b = append(b, durationMarker) 1119 b = EncodeVarintAscending(b, sortNanos) 1120 b = EncodeVarintAscending(b, months) 1121 b = EncodeVarintAscending(b, days) 1122 return b, nil 1123 } 1124 1125 // EncodeDurationDescending is the descending version of EncodeDurationAscending. 1126 func EncodeDurationDescending(b []byte, d duration.Duration) ([]byte, error) { 1127 sortNanos, months, days, err := d.Encode() 1128 if err != nil { 1129 // TODO(dan): Handle this using d.EncodeBigInt() and the 1130 // durationBigNeg/durationBigPos markers. 1131 return b, err 1132 } 1133 b = append(b, durationMarker) 1134 b = EncodeVarintDescending(b, sortNanos) 1135 b = EncodeVarintDescending(b, months) 1136 b = EncodeVarintDescending(b, days) 1137 return b, nil 1138 } 1139 1140 // DecodeDurationAscending decodes a duration.Duration value which was encoded 1141 // using EncodeDurationAscending. The remainder of the input buffer and the 1142 // decoded duration.Duration are returned. 1143 func DecodeDurationAscending(b []byte) ([]byte, duration.Duration, error) { 1144 if PeekType(b) != Duration { 1145 return nil, duration.Duration{}, errors.Errorf("did not find marker %x", b) 1146 } 1147 b = b[1:] 1148 b, sortNanos, err := DecodeVarintAscending(b) 1149 if err != nil { 1150 return b, duration.Duration{}, err 1151 } 1152 b, months, err := DecodeVarintAscending(b) 1153 if err != nil { 1154 return b, duration.Duration{}, err 1155 } 1156 b, days, err := DecodeVarintAscending(b) 1157 if err != nil { 1158 return b, duration.Duration{}, err 1159 } 1160 d, err := duration.Decode(sortNanos, months, days) 1161 if err != nil { 1162 return b, duration.Duration{}, err 1163 } 1164 return b, d, nil 1165 } 1166 1167 // DecodeDurationDescending is the descending version of DecodeDurationAscending. 1168 func DecodeDurationDescending(b []byte) ([]byte, duration.Duration, error) { 1169 if PeekType(b) != Duration { 1170 return nil, duration.Duration{}, errors.Errorf("did not find marker") 1171 } 1172 b = b[1:] 1173 b, sortNanos, err := DecodeVarintDescending(b) 1174 if err != nil { 1175 return b, duration.Duration{}, err 1176 } 1177 b, months, err := DecodeVarintDescending(b) 1178 if err != nil { 1179 return b, duration.Duration{}, err 1180 } 1181 b, days, err := DecodeVarintDescending(b) 1182 if err != nil { 1183 return b, duration.Duration{}, err 1184 } 1185 d, err := duration.Decode(sortNanos, months, days) 1186 if err != nil { 1187 return b, duration.Duration{}, err 1188 } 1189 return b, d, nil 1190 } 1191 1192 // EncodeBitArrayAscending encodes a bitarray.BitArray value, appends it to the 1193 // supplied buffer, and returns the final buffer. The encoding is guaranteed to 1194 // be ordered such that if t1.Compare(t2) < 0 (or = 0 or > 0) then bytes.Compare 1195 // will order them the same way after encoding. 1196 // 1197 // The encoding uses varint encoding for each word of the backing 1198 // array. This is a trade-off. The alternative is to encode the entire 1199 // backing word array as a byte array, using byte array encoding and escaped 1200 // special bytes (via `encodeBytesAscendingWithoutTerminatorOrPrefix`). 1201 // There are two arguments against this alternative: 1202 // - the bytes must be encoded big endian, but the most common architectures 1203 // running CockroachDB are little-endian, so the bytes would need 1204 // to be reordered prior to encoding. 1205 // - when decoding or skipping over a value, the decoding/sizing loop 1206 // would need to look at every byte of the encoding to find the 1207 // terminator. 1208 // In contrast, the chosen encoding using varints is endianness-agnostic 1209 // and enables fast decoding/skipping thanks ot the tag bytes. 1210 func EncodeBitArrayAscending(b []byte, d bitarray.BitArray) []byte { 1211 b = append(b, bitArrayMarker) 1212 words, lastBitsUsed := d.EncodingParts() 1213 for _, w := range words { 1214 b = EncodeUvarintAscending(b, w) 1215 } 1216 b = append(b, bitArrayDataTerminator) 1217 b = EncodeUvarintAscending(b, lastBitsUsed) 1218 return b 1219 } 1220 1221 // EncodeBitArrayDescending is the descending version of EncodeBitArrayAscending. 1222 func EncodeBitArrayDescending(b []byte, d bitarray.BitArray) []byte { 1223 b = append(b, bitArrayDescMarker) 1224 words, lastBitsUsed := d.EncodingParts() 1225 for _, w := range words { 1226 b = EncodeUvarintDescending(b, w) 1227 } 1228 b = append(b, bitArrayDataDescTerminator) 1229 b = EncodeUvarintDescending(b, lastBitsUsed) 1230 return b 1231 } 1232 1233 // DecodeBitArrayAscending decodes a bit array which was encoded using 1234 // EncodeBitArrayAscending. The remainder of the input buffer and the 1235 // decoded bit array are returned. 1236 func DecodeBitArrayAscending(b []byte) ([]byte, bitarray.BitArray, error) { 1237 if PeekType(b) != BitArray { 1238 return nil, bitarray.BitArray{}, errors.Errorf("did not find marker %x", b) 1239 } 1240 b = b[1:] 1241 1242 // First compute the length. 1243 numWords, _, err := getBitArrayWordsLen(b, bitArrayDataTerminator) 1244 if err != nil { 1245 return b, bitarray.BitArray{}, err 1246 } 1247 // Decode the words. 1248 words := make([]uint64, numWords) 1249 for i := range words { 1250 b, words[i], err = DecodeUvarintAscending(b) 1251 if err != nil { 1252 return b, bitarray.BitArray{}, err 1253 } 1254 } 1255 // Decode the final part. 1256 if len(b) == 0 || b[0] != bitArrayDataTerminator { 1257 return b, bitarray.BitArray{}, errBitArrayTerminatorMissing 1258 } 1259 b = b[1:] 1260 b, lastVal, err := DecodeUvarintAscending(b) 1261 if err != nil { 1262 return b, bitarray.BitArray{}, err 1263 } 1264 ba, err := bitarray.FromEncodingParts(words, lastVal) 1265 return b, ba, err 1266 } 1267 1268 var errBitArrayTerminatorMissing = errors.New("cannot find bit array data terminator") 1269 1270 // getBitArrayWordsLen returns the number of bit array words in the 1271 // encoded bytes and the size in bytes of the encoded word array 1272 // (excluding the terminator byte). 1273 func getBitArrayWordsLen(b []byte, term byte) (int, int, error) { 1274 bSearch := b 1275 numWords := 0 1276 sz := 0 1277 for { 1278 if len(bSearch) == 0 { 1279 return 0, 0, errors.Errorf("slice too short for bit array (%d)", len(b)) 1280 } 1281 if bSearch[0] == term { 1282 break 1283 } 1284 vLen, err := getVarintLen(bSearch) 1285 if err != nil { 1286 return 0, 0, err 1287 } 1288 bSearch = bSearch[vLen:] 1289 numWords++ 1290 sz += vLen 1291 } 1292 return numWords, sz, nil 1293 } 1294 1295 // DecodeBitArrayDescending is the descending version of DecodeBitArrayAscending. 1296 func DecodeBitArrayDescending(b []byte) ([]byte, bitarray.BitArray, error) { 1297 if PeekType(b) != BitArrayDesc { 1298 return nil, bitarray.BitArray{}, errors.Errorf("did not find marker %x", b) 1299 } 1300 b = b[1:] 1301 1302 // First compute the length. 1303 numWords, _, err := getBitArrayWordsLen(b, bitArrayDataDescTerminator) 1304 if err != nil { 1305 return b, bitarray.BitArray{}, err 1306 } 1307 // Decode the words. 1308 words := make([]uint64, numWords) 1309 for i := range words { 1310 b, words[i], err = DecodeUvarintDescending(b) 1311 if err != nil { 1312 return b, bitarray.BitArray{}, err 1313 } 1314 } 1315 // Decode the final part. 1316 if len(b) == 0 || b[0] != bitArrayDataDescTerminator { 1317 return b, bitarray.BitArray{}, errBitArrayTerminatorMissing 1318 } 1319 b = b[1:] 1320 b, lastVal, err := DecodeUvarintDescending(b) 1321 if err != nil { 1322 return b, bitarray.BitArray{}, err 1323 } 1324 ba, err := bitarray.FromEncodingParts(words, lastVal) 1325 return b, ba, err 1326 } 1327 1328 // Type represents the type of a value encoded by 1329 // Encode{Null,NotNull,Varint,Uvarint,Float,Bytes}. 1330 //go:generate stringer -type=Type 1331 type Type int 1332 1333 // Type values. 1334 // TODO(dan, arjun): Make this into a proto enum. 1335 // The 'Type' annotations are necessary for producing stringer-generated values. 1336 const ( 1337 Unknown Type = 0 1338 Null Type = 1 1339 NotNull Type = 2 1340 Int Type = 3 1341 Float Type = 4 1342 Decimal Type = 5 1343 Bytes Type = 6 1344 BytesDesc Type = 7 // Bytes encoded descendingly 1345 Time Type = 8 1346 Duration Type = 9 1347 True Type = 10 1348 False Type = 11 1349 UUID Type = 12 1350 Array Type = 13 1351 IPAddr Type = 14 1352 // SentinelType is used for bit manipulation to check if the encoded type 1353 // value requires more than 4 bits, and thus will be encoded in two bytes. It 1354 // is not used as a type value, and thus intentionally overlaps with the 1355 // subsequent type value. The 'Type' annotation is intentionally omitted here. 1356 SentinelType = 15 1357 JSON Type = 15 1358 Tuple Type = 16 1359 BitArray Type = 17 1360 BitArrayDesc Type = 18 // BitArray encoded descendingly 1361 TimeTZ Type = 19 1362 Geo Type = 20 1363 GeoDesc Type = 21 1364 ArrayKeyAsc Type = 22 // Array key encoding 1365 ArrayKeyDesc Type = 23 // Array key encoded descendingly 1366 ) 1367 1368 // typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one 1369 // for every possible byte value. 1370 var typMap [256]Type 1371 1372 func init() { 1373 buf := []byte{0} 1374 for i := range typMap { 1375 buf[0] = byte(i) 1376 typMap[i] = slowPeekType(buf) 1377 } 1378 } 1379 1380 // PeekType peeks at the type of the value encoded at the start of b. 1381 func PeekType(b []byte) Type { 1382 if len(b) >= 1 { 1383 return typMap[b[0]] 1384 } 1385 return Unknown 1386 } 1387 1388 // slowPeekType is the old implementation of PeekType. It's used to generate 1389 // the lookup table for PeekType. 1390 func slowPeekType(b []byte) Type { 1391 if len(b) >= 1 { 1392 m := b[0] 1393 switch { 1394 case m == encodedNull, m == encodedNullDesc: 1395 return Null 1396 case m == encodedNotNull, m == encodedNotNullDesc: 1397 return NotNull 1398 case m == arrayKeyMarker: 1399 return ArrayKeyAsc 1400 case m == arrayKeyDescendingMarker: 1401 return ArrayKeyDesc 1402 case m == bytesMarker: 1403 return Bytes 1404 case m == bytesDescMarker: 1405 return BytesDesc 1406 case m == bitArrayMarker: 1407 return BitArray 1408 case m == bitArrayDescMarker: 1409 return BitArrayDesc 1410 case m == timeMarker: 1411 return Time 1412 case m == timeTZMarker: 1413 return TimeTZ 1414 case m == geoMarker: 1415 return Geo 1416 case m == geoDescMarker: 1417 return GeoDesc 1418 case m == byte(Array): 1419 return Array 1420 case m == byte(True): 1421 return True 1422 case m == byte(False): 1423 return False 1424 case m == durationBigNegMarker, m == durationMarker, m == durationBigPosMarker: 1425 return Duration 1426 case m >= IntMin && m <= IntMax: 1427 return Int 1428 case m >= floatNaN && m <= floatNaNDesc: 1429 return Float 1430 case m >= decimalNaN && m <= decimalNaNDesc: 1431 return Decimal 1432 } 1433 } 1434 return Unknown 1435 } 1436 1437 // GetMultiVarintLen find the length of <num> encoded varints that follow a 1438 // 1-byte tag. 1439 func GetMultiVarintLen(b []byte, num int) (int, error) { 1440 p := 1 1441 for i := 0; i < num && p < len(b); i++ { 1442 len, err := getVarintLen(b[p:]) 1443 if err != nil { 1444 return 0, err 1445 } 1446 p += len 1447 } 1448 return p, nil 1449 } 1450 1451 // getMultiNonsortingVarintLen finds the length of <num> encoded nonsorting varints. 1452 func getMultiNonsortingVarintLen(b []byte, num int) (int, error) { 1453 p := 0 1454 for i := 0; i < num && p < len(b); i++ { 1455 _, len, _, err := DecodeNonsortingStdlibVarint(b[p:]) 1456 if err != nil { 1457 return 0, err 1458 } 1459 p += len 1460 } 1461 return p, nil 1462 } 1463 1464 // getArrayLength returns the length of a key encoded array. The input 1465 // must have had the array type marker stripped from the front. 1466 func getArrayLength(buf []byte, dir Direction) (int, error) { 1467 result := 0 1468 for { 1469 if len(buf) == 0 { 1470 return 0, errors.AssertionFailedf("invalid array encoding (unterminated)") 1471 } 1472 if IsArrayKeyDone(buf, dir) { 1473 // Increment to include the terminator byte. 1474 result++ 1475 break 1476 } 1477 next, err := PeekLength(buf) 1478 if err != nil { 1479 return 0, err 1480 } 1481 // Shift buf over by the encoded data amount. 1482 buf = buf[next:] 1483 result += next 1484 } 1485 return result, nil 1486 } 1487 1488 // PeekLength returns the length of the encoded value at the start of b. Note: 1489 // if this function succeeds, it's not a guarantee that decoding the value will 1490 // succeed. PeekLength is meant to be used on key encoded data only. 1491 func PeekLength(b []byte) (int, error) { 1492 if len(b) == 0 { 1493 return 0, errors.Errorf("empty slice") 1494 } 1495 m := b[0] 1496 switch m { 1497 case encodedNull, encodedNullDesc, encodedNotNull, encodedNotNullDesc, 1498 floatNaN, floatNaNDesc, floatZero, decimalZero, byte(True), byte(False): 1499 // interleavedSentinel also falls into this path. Since it 1500 // contains the same byte value as encodedNotNullDesc, it 1501 // cannot be included explicitly in the case statement. 1502 // ascendingNullWithinArrayKey and descendingNullWithinArrayKey also 1503 // contain the same byte values as encodedNotNull and encodedNotNullDesc 1504 // respectively. 1505 return 1, nil 1506 case bitArrayMarker, bitArrayDescMarker: 1507 terminator := byte(bitArrayDataTerminator) 1508 if m == bitArrayDescMarker { 1509 terminator = bitArrayDataDescTerminator 1510 } 1511 _, n, err := getBitArrayWordsLen(b[1:], terminator) 1512 if err != nil { 1513 return 1 + n, err 1514 } 1515 m, err := getVarintLen(b[n+2:]) 1516 if err != nil { 1517 return 1 + n + m + 1, err 1518 } 1519 return 1 + n + m + 1, nil 1520 case arrayKeyMarker, arrayKeyDescendingMarker: 1521 dir := Ascending 1522 if m == arrayKeyDescendingMarker { 1523 dir = Descending 1524 } 1525 length, err := getArrayLength(b[1:], dir) 1526 return 1 + length, err 1527 case bytesMarker: 1528 return getBytesLength(b, ascendingBytesEscapes) 1529 case geoMarker: 1530 return getBytesLength(b, ascendingGeoEscapes) 1531 case jsonInvertedIndex: 1532 return getJSONInvertedIndexKeyLength(b) 1533 case bytesDescMarker: 1534 return getBytesLength(b, descendingBytesEscapes) 1535 case geoDescMarker: 1536 return getBytesLength(b, descendingGeoEscapes) 1537 case timeMarker, timeTZMarker: 1538 return GetMultiVarintLen(b, 2) 1539 case durationBigNegMarker, durationMarker, durationBigPosMarker: 1540 return GetMultiVarintLen(b, 3) 1541 case floatNeg, floatPos: 1542 // the marker is followed by 8 bytes 1543 if len(b) < 9 { 1544 return 0, errors.Errorf("slice too short for float (%d)", len(b)) 1545 } 1546 return 9, nil 1547 } 1548 if m >= IntMin && m <= IntMax { 1549 return getVarintLen(b) 1550 } 1551 if m >= decimalNaN && m <= decimalNaNDesc { 1552 return getDecimalLen(b) 1553 } 1554 return 0, errors.Errorf("unknown tag %d", m) 1555 } 1556 1557 // PrettyPrintValue returns the string representation of all contiguous 1558 // decodable values in the provided byte slice, separated by a provided 1559 // separator. 1560 // The directions each value is encoded may be provided. If valDirs is nil, 1561 // all values are decoded and printed with the default direction (ascending). 1562 func PrettyPrintValue(valDirs []Direction, b []byte, sep string) string { 1563 s1, allDecoded := prettyPrintValueImpl(valDirs, b, sep) 1564 if allDecoded { 1565 return s1 1566 } 1567 if undoPrefixEnd, ok := UndoPrefixEnd(b); ok { 1568 // When we UndoPrefixEnd, we may have lost a tail of 0xFFs. Try to add 1569 // enough of them to get something decoded. This is best-effort, we have to stop 1570 // somewhere. 1571 cap := 20 1572 if len(valDirs) > len(b) { 1573 cap = len(valDirs) - len(b) 1574 } 1575 for i := 0; i < cap; i++ { 1576 if s2, allDecoded := prettyPrintValueImpl(valDirs, undoPrefixEnd, sep); allDecoded { 1577 return s2 + sep + "PrefixEnd" 1578 } 1579 undoPrefixEnd = append(undoPrefixEnd, 0xFF) 1580 } 1581 } 1582 return s1 1583 } 1584 1585 func prettyPrintValueImpl(valDirs []Direction, b []byte, sep string) (string, bool) { 1586 allDecoded := true 1587 var buf strings.Builder 1588 for len(b) > 0 { 1589 // If there are more values than encoding directions specified, 1590 // valDir will contain the 0 value of Direction. 1591 // prettyPrintFirstValue will then use the default encoding 1592 // direction per each value type. 1593 var valDir Direction 1594 if len(valDirs) > 0 { 1595 valDir = valDirs[0] 1596 valDirs = valDirs[1:] 1597 } 1598 1599 bb, s, err := prettyPrintFirstValue(valDir, b) 1600 if err != nil { 1601 allDecoded = false 1602 buf.WriteString(sep) 1603 buf.WriteByte('?') 1604 buf.WriteByte('?') 1605 buf.WriteByte('?') 1606 } else { 1607 buf.WriteString(sep) 1608 buf.WriteString(s) 1609 } 1610 b = bb 1611 } 1612 return buf.String(), allDecoded 1613 } 1614 1615 // prettyPrintFirstValue returns a string representation of the first decodable 1616 // value in the provided byte slice, along with the remaining byte slice 1617 // after decoding. 1618 // 1619 // Ascending will be the default direction (when dir is the 0 value) for all 1620 // values except for NotNull. 1621 // 1622 // NotNull: if Ascending or Descending directions are explicitly provided (i.e. 1623 // for table keys), then !NULL will be used. Otherwise, # will be used. 1624 // 1625 // We prove that the default # will only be used for interleaved sentinels: 1626 // - For non-table keys, we never have NotNull. 1627 // - For table keys, we always explicitly pass in Ascending and Descending for 1628 // all key values, including NotNulls. The only case we do not pass in 1629 // direction is during a SHOW RANGES ON TABLE parent and there exists 1630 // an interleaved split key. Note that interleaved keys cannot have NotNull 1631 // values except for the interleaved sentinel. 1632 // 1633 // Defaulting to Ascending for all other value types is fine since all 1634 // non-table keys encode values with Ascending. 1635 // 1636 // The only case where we end up defaulting direction for table keys is for 1637 // interleaved split keys in SHOW RANGES ON TABLE parent. Since 1638 // interleaved prefixes are defined on the primary key (and primary key values 1639 // are always encoded Ascending), this will always print out the correct key 1640 // even if we don't have directions for the child index's columns. 1641 func prettyPrintFirstValue(dir Direction, b []byte) ([]byte, string, error) { 1642 var err error 1643 switch typ := PeekType(b); typ { 1644 case Null: 1645 b, _ = DecodeIfNull(b) 1646 return b, "NULL", nil 1647 case True: 1648 return b[1:], "True", nil 1649 case False: 1650 return b[1:], "False", nil 1651 case Array: 1652 return b[1:], "Arr", nil 1653 case ArrayKeyAsc, ArrayKeyDesc: 1654 encDir := Ascending 1655 if typ == ArrayKeyDesc { 1656 encDir = Descending 1657 } 1658 var build strings.Builder 1659 buf, err := ValidateAndConsumeArrayKeyMarker(b, encDir) 1660 if err != nil { 1661 return nil, "", err 1662 } 1663 build.WriteString("ARRAY[") 1664 first := true 1665 // Use the array key decoding logic, but instead of calling out 1666 // to DecodeTableKey, just make a recursive call. 1667 for { 1668 if len(buf) == 0 { 1669 return nil, "", errors.AssertionFailedf("invalid array (unterminated)") 1670 } 1671 if IsArrayKeyDone(buf, encDir) { 1672 buf = buf[1:] 1673 break 1674 } 1675 var next string 1676 if IsNextByteArrayEncodedNull(buf, dir) { 1677 next = "NULL" 1678 buf = buf[1:] 1679 } else { 1680 buf, next, err = prettyPrintFirstValue(dir, buf) 1681 if err != nil { 1682 return nil, "", err 1683 } 1684 } 1685 if !first { 1686 build.WriteString(",") 1687 } 1688 build.WriteString(next) 1689 first = false 1690 } 1691 build.WriteString("]") 1692 return buf, build.String(), nil 1693 case NotNull: 1694 // The tag can be either encodedNotNull or encodedNotNullDesc. The 1695 // latter can be an interleaved sentinel. 1696 isNotNullDesc := (b[0] == encodedNotNullDesc) 1697 b, _ = DecodeIfNotNull(b) 1698 if dir != Ascending && dir != Descending && isNotNullDesc { 1699 // Unspecified direction (0 value) will default to '#' for the 1700 // interleaved sentinel. 1701 return b, "#", nil 1702 } 1703 return b, "!NULL", nil 1704 case Int: 1705 var i int64 1706 if dir == Descending { 1707 b, i, err = DecodeVarintDescending(b) 1708 } else { 1709 b, i, err = DecodeVarintAscending(b) 1710 } 1711 if err != nil { 1712 return b, "", err 1713 } 1714 return b, strconv.FormatInt(i, 10), nil 1715 case Float: 1716 var f float64 1717 if dir == Descending { 1718 b, f, err = DecodeFloatDescending(b) 1719 } else { 1720 b, f, err = DecodeFloatAscending(b) 1721 } 1722 if err != nil { 1723 return b, "", err 1724 } 1725 return b, strconv.FormatFloat(f, 'g', -1, 64), nil 1726 case Decimal: 1727 var d apd.Decimal 1728 if dir == Descending { 1729 b, d, err = DecodeDecimalDescending(b, nil) 1730 } else { 1731 b, d, err = DecodeDecimalAscending(b, nil) 1732 } 1733 if err != nil { 1734 return b, "", err 1735 } 1736 return b, d.String(), nil 1737 case BitArray: 1738 if dir == Descending { 1739 return b, "", errors.Errorf("descending bit column dir but ascending bit array encoding") 1740 } 1741 var d bitarray.BitArray 1742 b, d, err = DecodeBitArrayAscending(b) 1743 return b, "B" + d.String(), err 1744 case BitArrayDesc: 1745 if dir == Ascending { 1746 return b, "", errors.Errorf("ascending bit column dir but descending bit array encoding") 1747 } 1748 var d bitarray.BitArray 1749 b, d, err = DecodeBitArrayDescending(b) 1750 return b, "B" + d.String(), err 1751 case Bytes: 1752 if dir == Descending { 1753 return b, "", errors.Errorf("descending bytes column dir but ascending bytes encoding") 1754 } 1755 var s string 1756 b, s, err = DecodeUnsafeStringAscending(b, nil) 1757 if err != nil { 1758 return b, "", err 1759 } 1760 return b, strconv.Quote(s), nil 1761 case BytesDesc: 1762 if dir == Ascending { 1763 return b, "", errors.Errorf("ascending bytes column dir but descending bytes encoding") 1764 } 1765 1766 var s string 1767 b, s, err = DecodeUnsafeStringDescending(b, nil) 1768 if err != nil { 1769 return b, "", err 1770 } 1771 return b, strconv.Quote(s), nil 1772 case Time: 1773 var t time.Time 1774 if dir == Descending { 1775 b, t, err = DecodeTimeDescending(b) 1776 } else { 1777 b, t, err = DecodeTimeAscending(b) 1778 } 1779 if err != nil { 1780 return b, "", err 1781 } 1782 return b, t.UTC().Format(time.RFC3339Nano), nil 1783 case TimeTZ: 1784 var t timetz.TimeTZ 1785 if dir == Descending { 1786 b, t, err = DecodeTimeTZDescending(b) 1787 } else { 1788 b, t, err = DecodeTimeTZAscending(b) 1789 } 1790 if err != nil { 1791 return b, "", err 1792 } 1793 return b, t.String(), nil 1794 case Duration: 1795 var d duration.Duration 1796 if dir == Descending { 1797 b, d, err = DecodeDurationDescending(b) 1798 } else { 1799 b, d, err = DecodeDurationAscending(b) 1800 } 1801 if err != nil { 1802 return b, "", err 1803 } 1804 return b, d.StringNanos(), nil 1805 default: 1806 if len(b) >= 1 { 1807 switch b[0] { 1808 case jsonInvertedIndex: 1809 var str string 1810 str, b, err = prettyPrintInvertedIndexKey(b) 1811 if err != nil { 1812 return b, "", err 1813 } 1814 if str == "" { 1815 return prettyPrintFirstValue(dir, b) 1816 } 1817 return b, str, nil 1818 case jsonEmptyArray: 1819 return b[1:], "[]", nil 1820 case jsonEmptyObject: 1821 return b[1:], "{}", nil 1822 } 1823 } 1824 // This shouldn't ever happen, but if it does, return an empty slice. 1825 return nil, strconv.Quote(string(b)), nil 1826 } 1827 } 1828 1829 // UndoPrefixEnd is a partial inverse for roachpb.Key.PrefixEnd. 1830 // 1831 // In general, we can't undo PrefixEnd because it is lossy; we don't know how 1832 // many FFs were stripped from the original key. For example: 1833 // - key: 01 02 03 FF FF 1834 // - PrefixEnd: 01 02 04 1835 // - UndoPrefixEnd: 01 02 03 1836 // 1837 // Some keys are not possible results of PrefixEnd; in particular, PrefixEnd 1838 // keys never end in 00. If an impossible key is passed, the second return value 1839 // is false. 1840 // 1841 // Specifically, calling UndoPrefixEnd will reverse the effects of calling a 1842 // PrefixEnd on a byte sequence, except when the byte sequence represents a 1843 // maximal prefix (i.e., 0xff...). This is because PrefixEnd is a lossy 1844 // operation: PrefixEnd(0xff) returns 0xff rather than wrapping around to the 1845 // minimal prefix 0x00. For consistency, UndoPrefixEnd is also lossy: 1846 // UndoPrefixEnd(0x00) returns 0x00 rather than wrapping around to the maximal 1847 // prefix 0xff. 1848 // 1849 // Formally: 1850 // 1851 // PrefixEnd(UndoPrefixEnd(p)) = p for all non-minimal prefixes p 1852 // UndoPrefixEnd(PrefixEnd(p)) = p for all non-maximal prefixes p 1853 // 1854 // A minimal prefix is any prefix that consists only of one or more 0x00 bytes; 1855 // analogously, a maximal prefix is any prefix that consists only of one or more 1856 // 0xff bytes. 1857 // 1858 // UndoPrefixEnd is implemented here to avoid a circular dependency on roachpb, 1859 // but arguably belongs in a byte-manipulation utility package. 1860 func UndoPrefixEnd(b []byte) (_ []byte, ok bool) { 1861 if len(b) == 0 || b[len(b)-1] == 0 { 1862 // Not a possible result of PrefixEnd. 1863 return nil, false 1864 } 1865 out := append([]byte(nil), b...) 1866 out[len(out)-1]-- 1867 return out, true 1868 } 1869 1870 // NonsortingVarintMaxLen is the maximum length of an EncodeNonsortingVarint 1871 // encoded value. 1872 const NonsortingVarintMaxLen = binary.MaxVarintLen64 1873 1874 // EncodeNonsortingStdlibVarint encodes an int value using encoding/binary, appends it 1875 // to the supplied buffer, and returns the final buffer. 1876 func EncodeNonsortingStdlibVarint(appendTo []byte, x int64) []byte { 1877 // Fixed size array to allocate this on the stack. 1878 var scratch [binary.MaxVarintLen64]byte 1879 i := binary.PutVarint(scratch[:binary.MaxVarintLen64], x) 1880 return append(appendTo, scratch[:i]...) 1881 } 1882 1883 // DecodeNonsortingStdlibVarint decodes a value encoded by EncodeNonsortingVarint. It 1884 // returns the length of the encoded varint and value. 1885 func DecodeNonsortingStdlibVarint(b []byte) (remaining []byte, length int, value int64, err error) { 1886 value, length = binary.Varint(b) 1887 if length <= 0 { 1888 return nil, 0, 0, fmt.Errorf("int64 varint decoding failed: %d", length) 1889 } 1890 return b[length:], length, value, nil 1891 } 1892 1893 // NonsortingUvarintMaxLen is the maximum length of an EncodeNonsortingUvarint 1894 // encoded value. 1895 const NonsortingUvarintMaxLen = 10 1896 1897 // EncodeNonsortingUvarint encodes a uint64, appends it to the supplied buffer, 1898 // and returns the final buffer. The encoding used is similar to 1899 // encoding/binary, but with the most significant bits first: 1900 // - Unsigned integers are serialized 7 bits at a time, starting with the 1901 // most significant bits. 1902 // - The most significant bit (msb) in each output byte indicates if there 1903 // is a continuation byte (msb = 1). 1904 func EncodeNonsortingUvarint(appendTo []byte, x uint64) []byte { 1905 switch { 1906 case x < (1 << 7): 1907 return append(appendTo, byte(x)) 1908 case x < (1 << 14): 1909 return append(appendTo, 0x80|byte(x>>7), 0x7f&byte(x)) 1910 case x < (1 << 21): 1911 return append(appendTo, 0x80|byte(x>>14), 0x80|byte(x>>7), 0x7f&byte(x)) 1912 case x < (1 << 28): 1913 return append(appendTo, 0x80|byte(x>>21), 0x80|byte(x>>14), 0x80|byte(x>>7), 0x7f&byte(x)) 1914 case x < (1 << 35): 1915 return append(appendTo, 0x80|byte(x>>28), 0x80|byte(x>>21), 0x80|byte(x>>14), 0x80|byte(x>>7), 0x7f&byte(x)) 1916 case x < (1 << 42): 1917 return append(appendTo, 0x80|byte(x>>35), 0x80|byte(x>>28), 0x80|byte(x>>21), 0x80|byte(x>>14), 0x80|byte(x>>7), 0x7f&byte(x)) 1918 case x < (1 << 49): 1919 return append(appendTo, 0x80|byte(x>>42), 0x80|byte(x>>35), 0x80|byte(x>>28), 0x80|byte(x>>21), 0x80|byte(x>>14), 0x80|byte(x>>7), 0x7f&byte(x)) 1920 case x < (1 << 56): 1921 return append(appendTo, 0x80|byte(x>>49), 0x80|byte(x>>42), 0x80|byte(x>>35), 0x80|byte(x>>28), 0x80|byte(x>>21), 0x80|byte(x>>14), 0x80|byte(x>>7), 0x7f&byte(x)) 1922 case x < (1 << 63): 1923 return append(appendTo, 0x80|byte(x>>56), 0x80|byte(x>>49), 0x80|byte(x>>42), 0x80|byte(x>>35), 0x80|byte(x>>28), 0x80|byte(x>>21), 0x80|byte(x>>14), 0x80|byte(x>>7), 0x7f&byte(x)) 1924 default: 1925 return append(appendTo, 0x80|byte(x>>63), 0x80|byte(x>>56), 0x80|byte(x>>49), 0x80|byte(x>>42), 0x80|byte(x>>35), 0x80|byte(x>>28), 0x80|byte(x>>21), 0x80|byte(x>>14), 0x80|byte(x>>7), 0x7f&byte(x)) 1926 } 1927 } 1928 1929 // DecodeNonsortingUvarint decodes a value encoded by EncodeNonsortingUvarint. It 1930 // returns the length of the encoded varint and value. 1931 func DecodeNonsortingUvarint(buf []byte) (remaining []byte, length int, value uint64, err error) { 1932 // TODO(dan): Handle overflow. 1933 for i, b := range buf { 1934 value += uint64(b & 0x7f) 1935 if b < 0x80 { 1936 return buf[i+1:], i + 1, value, nil 1937 } 1938 value <<= 7 1939 } 1940 return buf, 0, 0, nil 1941 } 1942 1943 // DecodeNonsortingStdlibUvarint decodes a value encoded with binary.PutUvarint. It 1944 // returns the length of the encoded varint and value. 1945 func DecodeNonsortingStdlibUvarint( 1946 buf []byte, 1947 ) (remaining []byte, length int, value uint64, err error) { 1948 i, n := binary.Uvarint(buf) 1949 if n <= 0 { 1950 return buf, 0, 0, errors.New("buffer too small") 1951 } 1952 return buf[n:], n, i, nil 1953 } 1954 1955 // PeekLengthNonsortingUvarint returns the length of the value that starts at 1956 // the beginning of buf and was encoded by EncodeNonsortingUvarint. 1957 func PeekLengthNonsortingUvarint(buf []byte) int { 1958 for i, b := range buf { 1959 if b&0x80 == 0 { 1960 return i + 1 1961 } 1962 } 1963 return 0 1964 } 1965 1966 // NoColumnID is a sentinel for the EncodeFooValue methods representing an 1967 // invalid column id. 1968 const NoColumnID uint32 = 0 1969 1970 // EncodeValueTag encodes the prefix that is used by each of the EncodeFooValue 1971 // methods. 1972 // 1973 // The prefix uses varints to encode a column id and type, packing them into a 1974 // single byte when they're small (colID < 8 and typ < 15). This works by 1975 // shifting the colID "left" by 4 and putting any type less than 15 in the low 1976 // bytes. The result is uvarint encoded and fits in one byte if the original 1977 // column id fit in 3 bits. If it doesn't fit in one byte, the most significant 1978 // bits spill to the "left", leaving the type bits always at the very "right". 1979 // 1980 // If the type is > 15, the reserved sentinel of 15 is placed in the type bits 1981 // and a uvarint follows with the type value. This means that there are always 1982 // one or two uvarints. 1983 // 1984 // Together, this means the everything but the last byte of the first uvarint 1985 // can be dropped if the column id isn't needed. 1986 func EncodeValueTag(appendTo []byte, colID uint32, typ Type) []byte { 1987 if typ >= SentinelType { 1988 appendTo = EncodeNonsortingUvarint(appendTo, uint64(colID)<<4|uint64(SentinelType)) 1989 return EncodeNonsortingUvarint(appendTo, uint64(typ)) 1990 } 1991 if colID == NoColumnID { 1992 // TODO(dan): EncodeValueTag is not inlined by the compiler. Copying this 1993 // special case into one of the EncodeFooValue functions speeds it up by 1994 // ~4ns. 1995 return append(appendTo, byte(typ)) 1996 } 1997 return EncodeNonsortingUvarint(appendTo, uint64(colID)<<4|uint64(typ)) 1998 } 1999 2000 // EncodeNullValue encodes a null value, appends it to the supplied buffer, and 2001 // returns the final buffer. 2002 func EncodeNullValue(appendTo []byte, colID uint32) []byte { 2003 return EncodeValueTag(appendTo, colID, Null) 2004 } 2005 2006 // EncodeNotNullValue encodes a not null value, appends it to the supplied 2007 // buffer, and returns the final buffer. 2008 func EncodeNotNullValue(appendTo []byte, colID uint32) []byte { 2009 return EncodeValueTag(appendTo, colID, NotNull) 2010 } 2011 2012 // EncodeBoolValue encodes a bool value, appends it to the supplied buffer, and 2013 // returns the final buffer. 2014 func EncodeBoolValue(appendTo []byte, colID uint32, b bool) []byte { 2015 if b { 2016 return EncodeValueTag(appendTo, colID, True) 2017 } 2018 return EncodeValueTag(appendTo, colID, False) 2019 } 2020 2021 // EncodeIntValue encodes an int value with its value tag, appends it to the 2022 // supplied buffer, and returns the final buffer. 2023 func EncodeIntValue(appendTo []byte, colID uint32, i int64) []byte { 2024 appendTo = EncodeValueTag(appendTo, colID, Int) 2025 return EncodeUntaggedIntValue(appendTo, i) 2026 } 2027 2028 // EncodeUntaggedIntValue encodes an int value, appends it to the supplied buffer, and 2029 // returns the final buffer. 2030 func EncodeUntaggedIntValue(appendTo []byte, i int64) []byte { 2031 return EncodeNonsortingStdlibVarint(appendTo, i) 2032 } 2033 2034 const floatValueEncodedLength = uint64AscendingEncodedLength 2035 2036 // EncodeFloatValue encodes a float value with its value tag, appends it to the 2037 // supplied buffer, and returns the final buffer. 2038 func EncodeFloatValue(appendTo []byte, colID uint32, f float64) []byte { 2039 appendTo = EncodeValueTag(appendTo, colID, Float) 2040 return EncodeUntaggedFloatValue(appendTo, f) 2041 } 2042 2043 // EncodeUntaggedFloatValue encodes a float value, appends it to the supplied buffer, 2044 // and returns the final buffer. 2045 func EncodeUntaggedFloatValue(appendTo []byte, f float64) []byte { 2046 return EncodeUint64Ascending(appendTo, math.Float64bits(f)) 2047 } 2048 2049 // EncodeBytesValue encodes a byte array value with its value tag, appends it to 2050 // the supplied buffer, and returns the final buffer. 2051 func EncodeBytesValue(appendTo []byte, colID uint32, data []byte) []byte { 2052 appendTo = EncodeValueTag(appendTo, colID, Bytes) 2053 return EncodeUntaggedBytesValue(appendTo, data) 2054 } 2055 2056 // EncodeUntaggedBytesValue encodes a byte array value, appends it to the supplied 2057 // buffer, and returns the final buffer. 2058 func EncodeUntaggedBytesValue(appendTo []byte, data []byte) []byte { 2059 appendTo = EncodeNonsortingUvarint(appendTo, uint64(len(data))) 2060 return append(appendTo, data...) 2061 } 2062 2063 // EncodeArrayValue encodes a byte array value with its value tag, appends it to 2064 // the supplied buffer, and returns the final buffer. 2065 func EncodeArrayValue(appendTo []byte, colID uint32, data []byte) []byte { 2066 appendTo = EncodeValueTag(appendTo, colID, Array) 2067 return EncodeUntaggedBytesValue(appendTo, data) 2068 } 2069 2070 // EncodeTimeValue encodes a time.Time value with its value tag, appends it to 2071 // the supplied buffer, and returns the final buffer. 2072 func EncodeTimeValue(appendTo []byte, colID uint32, t time.Time) []byte { 2073 appendTo = EncodeValueTag(appendTo, colID, Time) 2074 return EncodeUntaggedTimeValue(appendTo, t) 2075 } 2076 2077 // EncodeUntaggedTimeValue encodes a time.Time value, appends it to the supplied buffer, 2078 // and returns the final buffer. 2079 func EncodeUntaggedTimeValue(appendTo []byte, t time.Time) []byte { 2080 appendTo = EncodeNonsortingStdlibVarint(appendTo, t.Unix()) 2081 return EncodeNonsortingStdlibVarint(appendTo, int64(t.Nanosecond())) 2082 } 2083 2084 // EncodeTimeTZValue encodes a timetz.TimeTZ value with its value tag, appends it to 2085 // the supplied buffer, and returns the final buffer. 2086 func EncodeTimeTZValue(appendTo []byte, colID uint32, t timetz.TimeTZ) []byte { 2087 appendTo = EncodeValueTag(appendTo, colID, TimeTZ) 2088 return EncodeUntaggedTimeTZValue(appendTo, t) 2089 } 2090 2091 // EncodeUntaggedTimeTZValue encodes a time.Time value, appends it to the supplied buffer, 2092 // and returns the final buffer. 2093 func EncodeUntaggedTimeTZValue(appendTo []byte, t timetz.TimeTZ) []byte { 2094 appendTo = EncodeNonsortingStdlibVarint(appendTo, int64(t.TimeOfDay)) 2095 return EncodeNonsortingStdlibVarint(appendTo, int64(t.OffsetSecs)) 2096 } 2097 2098 // EncodeGeoValue encodes a geopb.SpatialObject value with its value tag, appends it to 2099 // the supplied buffer, and returns the final buffer. 2100 func EncodeGeoValue(appendTo []byte, colID uint32, so geopb.SpatialObject) ([]byte, error) { 2101 appendTo = EncodeValueTag(appendTo, colID, Geo) 2102 return EncodeUntaggedGeoValue(appendTo, so) 2103 } 2104 2105 // EncodeUntaggedGeoValue encodes a geopb.SpatialObject value, appends it to the supplied buffer, 2106 // and returns the final buffer. 2107 func EncodeUntaggedGeoValue(appendTo []byte, so geopb.SpatialObject) ([]byte, error) { 2108 bytes, err := protoutil.Marshal(&so) 2109 if err != nil { 2110 return nil, err 2111 } 2112 return EncodeUntaggedBytesValue(appendTo, bytes), nil 2113 } 2114 2115 // EncodeDecimalValue encodes an apd.Decimal value with its value tag, appends 2116 // it to the supplied buffer, and returns the final buffer. 2117 func EncodeDecimalValue(appendTo []byte, colID uint32, d *apd.Decimal) []byte { 2118 appendTo = EncodeValueTag(appendTo, colID, Decimal) 2119 return EncodeUntaggedDecimalValue(appendTo, d) 2120 } 2121 2122 // EncodeUntaggedDecimalValue encodes an apd.Decimal value, appends it to the supplied 2123 // buffer, and returns the final buffer. 2124 func EncodeUntaggedDecimalValue(appendTo []byte, d *apd.Decimal) []byte { 2125 // To avoid the allocation, leave space for the varint, encode the decimal, 2126 // encode the varint, and shift the encoded decimal to the end of the 2127 // varint. 2128 varintPos := len(appendTo) 2129 // Manually append 10 (binary.MaxVarintLen64) 0s to avoid the allocation. 2130 appendTo = append(appendTo, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 2131 decOffset := len(appendTo) 2132 appendTo = EncodeNonsortingDecimal(appendTo, d) 2133 decLen := len(appendTo) - decOffset 2134 varintLen := binary.PutUvarint(appendTo[varintPos:decOffset], uint64(decLen)) 2135 copy(appendTo[varintPos+varintLen:varintPos+varintLen+decLen], appendTo[decOffset:decOffset+decLen]) 2136 return appendTo[:varintPos+varintLen+decLen] 2137 } 2138 2139 // EncodeDurationValue encodes a duration.Duration value with its value tag, 2140 // appends it to the supplied buffer, and returns the final buffer. 2141 func EncodeDurationValue(appendTo []byte, colID uint32, d duration.Duration) []byte { 2142 appendTo = EncodeValueTag(appendTo, colID, Duration) 2143 return EncodeUntaggedDurationValue(appendTo, d) 2144 } 2145 2146 // EncodeUntaggedDurationValue encodes a duration.Duration value, appends it to the 2147 // supplied buffer, and returns the final buffer. 2148 func EncodeUntaggedDurationValue(appendTo []byte, d duration.Duration) []byte { 2149 appendTo = EncodeNonsortingStdlibVarint(appendTo, d.Months) 2150 appendTo = EncodeNonsortingStdlibVarint(appendTo, d.Days) 2151 return EncodeNonsortingStdlibVarint(appendTo, d.Nanos()) 2152 } 2153 2154 // EncodeBitArrayValue encodes a bit array value with its value tag, 2155 // appends it to the supplied buffer, and returns the final buffer. 2156 func EncodeBitArrayValue(appendTo []byte, colID uint32, d bitarray.BitArray) []byte { 2157 appendTo = EncodeValueTag(appendTo, colID, BitArray) 2158 return EncodeUntaggedBitArrayValue(appendTo, d) 2159 } 2160 2161 // EncodeUntaggedBitArrayValue encodes a bit array value, appends it to the 2162 // supplied buffer, and returns the final buffer. 2163 func EncodeUntaggedBitArrayValue(appendTo []byte, d bitarray.BitArray) []byte { 2164 bitLen := d.BitLen() 2165 words, _ := d.EncodingParts() 2166 2167 appendTo = EncodeNonsortingUvarint(appendTo, uint64(bitLen)) 2168 for _, w := range words { 2169 appendTo = EncodeUint64Ascending(appendTo, w) 2170 } 2171 return appendTo 2172 } 2173 2174 // EncodeUUIDValue encodes a uuid.UUID value with its value tag, appends it to 2175 // the supplied buffer, and returns the final buffer. 2176 func EncodeUUIDValue(appendTo []byte, colID uint32, u uuid.UUID) []byte { 2177 appendTo = EncodeValueTag(appendTo, colID, UUID) 2178 return EncodeUntaggedUUIDValue(appendTo, u) 2179 } 2180 2181 // EncodeUntaggedUUIDValue encodes a uuid.UUID value, appends it to the supplied buffer, 2182 // and returns the final buffer. 2183 func EncodeUntaggedUUIDValue(appendTo []byte, u uuid.UUID) []byte { 2184 return append(appendTo, u.GetBytes()...) 2185 } 2186 2187 // EncodeIPAddrValue encodes a ipaddr.IPAddr value with its value tag, appends 2188 // it to the supplied buffer, and returns the final buffer. 2189 func EncodeIPAddrValue(appendTo []byte, colID uint32, u ipaddr.IPAddr) []byte { 2190 appendTo = EncodeValueTag(appendTo, colID, IPAddr) 2191 return EncodeUntaggedIPAddrValue(appendTo, u) 2192 } 2193 2194 // EncodeUntaggedIPAddrValue encodes a ipaddr.IPAddr value, appends it to the 2195 // supplied buffer, and returns the final buffer. 2196 func EncodeUntaggedIPAddrValue(appendTo []byte, u ipaddr.IPAddr) []byte { 2197 return u.ToBuffer(appendTo) 2198 } 2199 2200 // EncodeJSONValue encodes an already-byte-encoded JSON value with no value tag 2201 // but with a length prefix, appends it to the supplied buffer, and returns the 2202 // final buffer. 2203 func EncodeJSONValue(appendTo []byte, colID uint32, data []byte) []byte { 2204 appendTo = EncodeValueTag(appendTo, colID, JSON) 2205 return EncodeUntaggedBytesValue(appendTo, data) 2206 } 2207 2208 // DecodeValueTag decodes a value encoded by EncodeValueTag, used as a prefix in 2209 // each of the other EncodeFooValue methods. 2210 // 2211 // The tag is structured such that the encoded column id can be dropped from the 2212 // front by removing the first `typeOffset` bytes. DecodeValueTag, 2213 // PeekValueLength and each of the DecodeFooValue methods will still work as 2214 // expected with `b[typeOffset:]`. (Except, obviously, the column id is no 2215 // longer encoded so if this suffix is passed back to DecodeValueTag, the 2216 // returned colID should be discarded.) 2217 // 2218 // Concretely: 2219 // b := ... 2220 // typeOffset, _, colID, typ, err := DecodeValueTag(b) 2221 // _, _, _, typ, err := DecodeValueTag(b[typeOffset:]) 2222 // will return the same typ and err and 2223 // DecodeFooValue(b) 2224 // DecodeFooValue(b[typeOffset:]) 2225 // will return the same thing. PeekValueLength works as expected with either of 2226 // `b` or `b[typeOffset:]`. 2227 func DecodeValueTag(b []byte) (typeOffset int, dataOffset int, colID uint32, typ Type, err error) { 2228 // TODO(dan): This can be made faster by special casing the single byte 2229 // version and skipping the column id extraction when it's not needed. 2230 if len(b) == 0 { 2231 return 0, 0, 0, Unknown, fmt.Errorf("empty array") 2232 } 2233 var n int 2234 var tag uint64 2235 b, n, tag, err = DecodeNonsortingUvarint(b) 2236 if err != nil { 2237 return 0, 0, 0, Unknown, err 2238 } 2239 colID = uint32(tag >> 4) 2240 2241 typ = Type(tag & 0xf) 2242 typeOffset = n - 1 2243 dataOffset = n 2244 if typ == SentinelType { 2245 _, n, tag, err = DecodeNonsortingUvarint(b) 2246 if err != nil { 2247 return 0, 0, 0, Unknown, err 2248 } 2249 typ = Type(tag) 2250 dataOffset += n 2251 } 2252 return typeOffset, dataOffset, colID, typ, nil 2253 } 2254 2255 // DecodeBoolValue decodes a value encoded by EncodeBoolValue. 2256 func DecodeBoolValue(buf []byte) (remaining []byte, b bool, err error) { 2257 _, dataOffset, _, typ, err := DecodeValueTag(buf) 2258 if err != nil { 2259 return buf, false, err 2260 } 2261 buf = buf[dataOffset:] 2262 switch typ { 2263 case True: 2264 return buf, true, nil 2265 case False: 2266 return buf, false, nil 2267 default: 2268 return buf, false, fmt.Errorf("value type is not %s or %s: %s", True, False, typ) 2269 } 2270 } 2271 2272 // DecodeIntValue decodes a value encoded by EncodeIntValue. 2273 func DecodeIntValue(b []byte) (remaining []byte, i int64, err error) { 2274 b, err = decodeValueTypeAssert(b, Int) 2275 if err != nil { 2276 return b, 0, err 2277 } 2278 return DecodeUntaggedIntValue(b) 2279 } 2280 2281 // DecodeUntaggedIntValue decodes a value encoded by EncodeUntaggedIntValue. 2282 func DecodeUntaggedIntValue(b []byte) (remaining []byte, i int64, err error) { 2283 b, _, i, err = DecodeNonsortingStdlibVarint(b) 2284 return b, i, err 2285 } 2286 2287 // DecodeFloatValue decodes a value encoded by EncodeFloatValue. 2288 func DecodeFloatValue(b []byte) (remaining []byte, f float64, err error) { 2289 b, err = decodeValueTypeAssert(b, Float) 2290 if err != nil { 2291 return b, 0, err 2292 } 2293 return DecodeUntaggedFloatValue(b) 2294 } 2295 2296 // DecodeUntaggedFloatValue decodes a value encoded by EncodeUntaggedFloatValue. 2297 func DecodeUntaggedFloatValue(b []byte) (remaining []byte, f float64, err error) { 2298 if len(b) < 8 { 2299 return b, 0, fmt.Errorf("float64 value should be exactly 8 bytes: %d", len(b)) 2300 } 2301 var i uint64 2302 b, i, err = DecodeUint64Ascending(b) 2303 return b, math.Float64frombits(i), err 2304 } 2305 2306 // DecodeBytesValue decodes a value encoded by EncodeBytesValue. 2307 func DecodeBytesValue(b []byte) (remaining []byte, data []byte, err error) { 2308 b, err = decodeValueTypeAssert(b, Bytes) 2309 if err != nil { 2310 return b, nil, err 2311 } 2312 return DecodeUntaggedBytesValue(b) 2313 } 2314 2315 // DecodeUntaggedBytesValue decodes a value encoded by EncodeUntaggedBytesValue. 2316 func DecodeUntaggedBytesValue(b []byte) (remaining, data []byte, err error) { 2317 var i uint64 2318 b, _, i, err = DecodeNonsortingUvarint(b) 2319 if err != nil { 2320 return b, nil, err 2321 } 2322 return b[int(i):], b[:int(i)], nil 2323 } 2324 2325 // DecodeTimeValue decodes a value encoded by EncodeTimeValue. 2326 func DecodeTimeValue(b []byte) (remaining []byte, t time.Time, err error) { 2327 b, err = decodeValueTypeAssert(b, Time) 2328 if err != nil { 2329 return b, time.Time{}, err 2330 } 2331 return DecodeUntaggedTimeValue(b) 2332 } 2333 2334 // DecodeUntaggedTimeValue decodes a value encoded by EncodeUntaggedTimeValue. 2335 func DecodeUntaggedTimeValue(b []byte) (remaining []byte, t time.Time, err error) { 2336 var sec, nsec int64 2337 b, _, sec, err = DecodeNonsortingStdlibVarint(b) 2338 if err != nil { 2339 return b, time.Time{}, err 2340 } 2341 b, _, nsec, err = DecodeNonsortingStdlibVarint(b) 2342 if err != nil { 2343 return b, time.Time{}, err 2344 } 2345 return b, timeutil.Unix(sec, nsec), nil 2346 } 2347 2348 // DecodeTimeTZValue decodes a value encoded by EncodeTimeTZValue. 2349 func DecodeTimeTZValue(b []byte) (remaining []byte, t timetz.TimeTZ, err error) { 2350 b, err = decodeValueTypeAssert(b, TimeTZ) 2351 if err != nil { 2352 return b, timetz.TimeTZ{}, err 2353 } 2354 return DecodeUntaggedTimeTZValue(b) 2355 } 2356 2357 // DecodeUntaggedTimeTZValue decodes a value encoded by EncodeUntaggedTimeTZValue. 2358 func DecodeUntaggedTimeTZValue(b []byte) (remaining []byte, t timetz.TimeTZ, err error) { 2359 var timeOfDayMicros int64 2360 b, _, timeOfDayMicros, err = DecodeNonsortingStdlibVarint(b) 2361 if err != nil { 2362 return b, timetz.TimeTZ{}, err 2363 } 2364 var offsetSecs int64 2365 b, _, offsetSecs, err = DecodeNonsortingStdlibVarint(b) 2366 if err != nil { 2367 return b, timetz.TimeTZ{}, err 2368 } 2369 // Do not use timeofday.FromInt as it truncates 24:00 into 00:00. 2370 return b, timetz.MakeTimeTZ(timeofday.TimeOfDay(timeOfDayMicros), int32(offsetSecs)), nil 2371 } 2372 2373 // DecodeDecimalValue decodes a value encoded by EncodeDecimalValue. 2374 func DecodeDecimalValue(b []byte) (remaining []byte, d apd.Decimal, err error) { 2375 b, err = decodeValueTypeAssert(b, Decimal) 2376 if err != nil { 2377 return b, apd.Decimal{}, err 2378 } 2379 return DecodeUntaggedDecimalValue(b) 2380 } 2381 2382 // DecodeUntaggedGeoValue decodes a value encoded by EncodeUntaggedGeoValue. 2383 func DecodeUntaggedGeoValue( 2384 b []byte, 2385 ) (remaining []byte, spatialObject geopb.SpatialObject, err error) { 2386 var data []byte 2387 remaining, data, err = DecodeUntaggedBytesValue(b) 2388 if err != nil { 2389 return b, geopb.SpatialObject{}, err 2390 } 2391 err = protoutil.Unmarshal(data, &spatialObject) 2392 return remaining, spatialObject, err 2393 } 2394 2395 // DecodeUntaggedDecimalValue decodes a value encoded by EncodeUntaggedDecimalValue. 2396 func DecodeUntaggedDecimalValue(b []byte) (remaining []byte, d apd.Decimal, err error) { 2397 var i uint64 2398 b, _, i, err = DecodeNonsortingStdlibUvarint(b) 2399 if err != nil { 2400 return b, apd.Decimal{}, err 2401 } 2402 d, err = DecodeNonsortingDecimal(b[:int(i)], nil) 2403 return b[int(i):], d, err 2404 } 2405 2406 // DecodeIntoUntaggedDecimalValue is like DecodeUntaggedDecimalValue except it 2407 // writes the new Decimal into the input apd.Decimal pointer, which must be 2408 // non-nil. 2409 func DecodeIntoUntaggedDecimalValue(d *apd.Decimal, b []byte) (remaining []byte, err error) { 2410 var i uint64 2411 b, _, i, err = DecodeNonsortingStdlibUvarint(b) 2412 if err != nil { 2413 return b, err 2414 } 2415 err = DecodeIntoNonsortingDecimal(d, b[:int(i)], nil) 2416 return b[int(i):], err 2417 } 2418 2419 // DecodeDurationValue decodes a value encoded by EncodeUntaggedDurationValue. 2420 func DecodeDurationValue(b []byte) (remaining []byte, d duration.Duration, err error) { 2421 b, err = decodeValueTypeAssert(b, Duration) 2422 if err != nil { 2423 return b, duration.Duration{}, err 2424 } 2425 return DecodeUntaggedDurationValue(b) 2426 } 2427 2428 // DecodeUntaggedDurationValue decodes a value encoded by EncodeUntaggedDurationValue. 2429 func DecodeUntaggedDurationValue(b []byte) (remaining []byte, d duration.Duration, err error) { 2430 var months, days, nanos int64 2431 b, _, months, err = DecodeNonsortingStdlibVarint(b) 2432 if err != nil { 2433 return b, duration.Duration{}, err 2434 } 2435 b, _, days, err = DecodeNonsortingStdlibVarint(b) 2436 if err != nil { 2437 return b, duration.Duration{}, err 2438 } 2439 b, _, nanos, err = DecodeNonsortingStdlibVarint(b) 2440 if err != nil { 2441 return b, duration.Duration{}, err 2442 } 2443 return b, duration.DecodeDuration(months, days, nanos), nil 2444 } 2445 2446 // DecodeBitArrayValue decodes a value encoded by EncodeUntaggedBitArrayValue. 2447 func DecodeBitArrayValue(b []byte) (remaining []byte, d bitarray.BitArray, err error) { 2448 b, err = decodeValueTypeAssert(b, BitArray) 2449 if err != nil { 2450 return b, bitarray.BitArray{}, err 2451 } 2452 return DecodeUntaggedBitArrayValue(b) 2453 } 2454 2455 // DecodeUntaggedBitArrayValue decodes a value encoded by EncodeUntaggedBitArrayValue. 2456 func DecodeUntaggedBitArrayValue(b []byte) (remaining []byte, d bitarray.BitArray, err error) { 2457 var bitLen uint64 2458 b, _, bitLen, err = DecodeNonsortingUvarint(b) 2459 if err != nil { 2460 return b, bitarray.BitArray{}, err 2461 } 2462 words, lastBitsUsed := bitarray.EncodingPartsForBitLen(uint(bitLen)) 2463 for i := range words { 2464 var val uint64 2465 b, val, err = DecodeUint64Ascending(b) 2466 if err != nil { 2467 return b, bitarray.BitArray{}, err 2468 } 2469 words[i] = val 2470 } 2471 ba, err := bitarray.FromEncodingParts(words, lastBitsUsed) 2472 return b, ba, err 2473 } 2474 2475 const uuidValueEncodedLength = 16 2476 2477 var _ [uuidValueEncodedLength]byte = uuid.UUID{} // Assert that uuid.UUID is length 16. 2478 2479 // DecodeUUIDValue decodes a value encoded by EncodeUUIDValue. 2480 func DecodeUUIDValue(b []byte) (remaining []byte, u uuid.UUID, err error) { 2481 b, err = decodeValueTypeAssert(b, UUID) 2482 if err != nil { 2483 return b, u, err 2484 } 2485 return DecodeUntaggedUUIDValue(b) 2486 } 2487 2488 // DecodeUntaggedUUIDValue decodes a value encoded by EncodeUntaggedUUIDValue. 2489 func DecodeUntaggedUUIDValue(b []byte) (remaining []byte, u uuid.UUID, err error) { 2490 u, err = uuid.FromBytes(b[:uuidValueEncodedLength]) 2491 if err != nil { 2492 return b, uuid.UUID{}, err 2493 } 2494 return b[uuidValueEncodedLength:], u, nil 2495 } 2496 2497 // DecodeIPAddrValue decodes a value encoded by EncodeIPAddrValue. 2498 func DecodeIPAddrValue(b []byte) (remaining []byte, u ipaddr.IPAddr, err error) { 2499 b, err = decodeValueTypeAssert(b, IPAddr) 2500 if err != nil { 2501 return b, u, err 2502 } 2503 return DecodeUntaggedIPAddrValue(b) 2504 } 2505 2506 // DecodeUntaggedIPAddrValue decodes a value encoded by EncodeUntaggedIPAddrValue. 2507 func DecodeUntaggedIPAddrValue(b []byte) (remaining []byte, u ipaddr.IPAddr, err error) { 2508 remaining, err = u.FromBuffer(b) 2509 return remaining, u, err 2510 } 2511 2512 func decodeValueTypeAssert(b []byte, expected Type) ([]byte, error) { 2513 _, dataOffset, _, typ, err := DecodeValueTag(b) 2514 if err != nil { 2515 return b, err 2516 } 2517 b = b[dataOffset:] 2518 if typ != expected { 2519 return b, errors.Errorf("value type is not %s: %s", expected, typ) 2520 } 2521 return b, nil 2522 } 2523 2524 // PeekValueLength returns the length of the encoded value at the start of b. 2525 // Note: If this function succeeds, it's not a guarantee that decoding the value 2526 // will succeed. 2527 // 2528 // `b` can point either at beginning of the "full tag" with the column id, or it 2529 // can point to the beginning of the type part of the tag, as indicated by the 2530 // `typeOffset` returned by this or DecodeValueTag. 2531 // 2532 // The length returned is the full length of the encoded value, including the 2533 // entire tag. 2534 func PeekValueLength(b []byte) (typeOffset int, length int, err error) { 2535 if len(b) == 0 { 2536 return 0, 0, nil 2537 } 2538 var dataOffset int 2539 var typ Type 2540 typeOffset, dataOffset, _, typ, err = DecodeValueTag(b) 2541 if err != nil { 2542 return 0, 0, err 2543 } 2544 length, err = PeekValueLengthWithOffsetsAndType(b, dataOffset, typ) 2545 return typeOffset, length, err 2546 } 2547 2548 // PeekValueLengthWithOffsetsAndType is the same as PeekValueLength, except it 2549 // expects a dataOffset and typ value from a previous call to DecodeValueTag 2550 // on its input byte slice. Use this if you've already called DecodeValueTag 2551 // on the input for another reason, to avoid it getting called twice. 2552 func PeekValueLengthWithOffsetsAndType(b []byte, dataOffset int, typ Type) (length int, err error) { 2553 b = b[dataOffset:] 2554 switch typ { 2555 case Null: 2556 return dataOffset, nil 2557 case True, False: 2558 return dataOffset, nil 2559 case Int: 2560 _, n, _, err := DecodeNonsortingStdlibVarint(b) 2561 return dataOffset + n, err 2562 case Float: 2563 return dataOffset + floatValueEncodedLength, nil 2564 case Bytes, Array, JSON, Geo: 2565 _, n, i, err := DecodeNonsortingUvarint(b) 2566 return dataOffset + n + int(i), err 2567 case BitArray: 2568 _, n, bitLen, err := DecodeNonsortingUvarint(b) 2569 if err != nil { 2570 return 0, err 2571 } 2572 numWords, _ := bitarray.SizesForBitLen(uint(bitLen)) 2573 return dataOffset + n + int(numWords)*8, err 2574 case Tuple: 2575 rem, l, numTuples, err := DecodeNonsortingUvarint(b) 2576 if err != nil { 2577 return 0, errors.Wrapf(err, "cannot decode tuple header: ") 2578 } 2579 for i := 0; i < int(numTuples); i++ { 2580 _, entryLen, err := PeekValueLength(rem) 2581 if err != nil { 2582 return 0, errors.Wrapf(err, "cannot peek tuple entry %d", i) 2583 } 2584 l += entryLen 2585 rem = rem[entryLen:] 2586 } 2587 return dataOffset + l, nil 2588 case Decimal: 2589 _, n, i, err := DecodeNonsortingStdlibUvarint(b) 2590 return dataOffset + n + int(i), err 2591 case Time, TimeTZ: 2592 n, err := getMultiNonsortingVarintLen(b, 2) 2593 return dataOffset + n, err 2594 case Duration: 2595 n, err := getMultiNonsortingVarintLen(b, 3) 2596 return dataOffset + n, err 2597 case UUID: 2598 return dataOffset + uuidValueEncodedLength, err 2599 case IPAddr: 2600 family := ipaddr.IPFamily(b[0]) 2601 if family == ipaddr.IPv4family { 2602 return dataOffset + ipaddr.IPv4size, err 2603 } else if family == ipaddr.IPv6family { 2604 return dataOffset + ipaddr.IPv6size, err 2605 } 2606 return 0, errors.Errorf("got invalid INET IP family: %d", family) 2607 default: 2608 return 0, errors.Errorf("unknown type %s", typ) 2609 } 2610 } 2611 2612 // PrintableBytes returns true iff the given byte array is a valid 2613 // UTF-8 sequence and it is printable. 2614 func PrintableBytes(b []byte) bool { 2615 return len(bytes.TrimLeftFunc(b, isValidAndPrintableRune)) == 0 2616 } 2617 2618 func isValidAndPrintableRune(r rune) bool { 2619 return r != utf8.RuneError && unicode.IsPrint(r) 2620 } 2621 2622 // PrettyPrintValueEncoded returns a string representation of the first 2623 // decodable value in the provided byte slice, along with the remaining byte 2624 // slice after decoding. 2625 func PrettyPrintValueEncoded(b []byte) ([]byte, string, error) { 2626 _, dataOffset, _, typ, err := DecodeValueTag(b) 2627 if err != nil { 2628 return b, "", err 2629 } 2630 switch typ { 2631 case Null: 2632 b = b[dataOffset:] 2633 return b, "NULL", nil 2634 case True: 2635 b = b[dataOffset:] 2636 return b, "true", nil 2637 case False: 2638 b = b[dataOffset:] 2639 return b, "false", nil 2640 case Int: 2641 var i int64 2642 b, i, err = DecodeIntValue(b) 2643 if err != nil { 2644 return b, "", err 2645 } 2646 return b, strconv.FormatInt(i, 10), nil 2647 case Float: 2648 var f float64 2649 b, f, err = DecodeFloatValue(b) 2650 if err != nil { 2651 return b, "", err 2652 } 2653 return b, strconv.FormatFloat(f, 'g', -1, 64), nil 2654 case Decimal: 2655 var d apd.Decimal 2656 b, d, err = DecodeDecimalValue(b) 2657 if err != nil { 2658 return b, "", err 2659 } 2660 return b, d.String(), nil 2661 case Bytes: 2662 var data []byte 2663 b, data, err = DecodeBytesValue(b) 2664 if err != nil { 2665 return b, "", err 2666 } 2667 if PrintableBytes(data) { 2668 return b, string(data), nil 2669 } 2670 // The following code extends hex.EncodeToString(). 2671 dst := make([]byte, 2+hex.EncodedLen(len(data))) 2672 dst[0], dst[1] = '0', 'x' 2673 hex.Encode(dst[2:], data) 2674 return b, string(dst), nil 2675 case Time: 2676 var t time.Time 2677 b, t, err = DecodeTimeValue(b) 2678 if err != nil { 2679 return b, "", err 2680 } 2681 return b, t.UTC().Format(time.RFC3339Nano), nil 2682 case TimeTZ: 2683 var t timetz.TimeTZ 2684 b, t, err = DecodeTimeTZValue(b) 2685 if err != nil { 2686 return b, "", err 2687 } 2688 return b, t.String(), nil 2689 case Duration: 2690 var d duration.Duration 2691 b, d, err = DecodeDurationValue(b) 2692 if err != nil { 2693 return b, "", err 2694 } 2695 return b, d.StringNanos(), nil 2696 case BitArray: 2697 var d bitarray.BitArray 2698 b, d, err = DecodeBitArrayValue(b) 2699 if err != nil { 2700 return b, "", err 2701 } 2702 return b, "B" + d.String(), nil 2703 case UUID: 2704 var u uuid.UUID 2705 b, u, err = DecodeUUIDValue(b) 2706 if err != nil { 2707 return b, "", err 2708 } 2709 return b, u.String(), nil 2710 case IPAddr: 2711 var ipAddr ipaddr.IPAddr 2712 b, ipAddr, err = DecodeIPAddrValue(b) 2713 if err != nil { 2714 return b, "", err 2715 } 2716 return b, ipAddr.String(), nil 2717 default: 2718 return b, "", errors.Errorf("unknown type %s", typ) 2719 } 2720 } 2721 2722 // DecomposeKeyTokens breaks apart a key into its individual key-encoded values 2723 // and returns a slice of byte slices, one for each key-encoded value. 2724 // It also returns whether the key contains a NULL value. 2725 func DecomposeKeyTokens(b []byte) (tokens [][]byte, containsNull bool, err error) { 2726 var out [][]byte 2727 2728 for len(b) > 0 { 2729 tokenLen, err := PeekLength(b) 2730 if err != nil { 2731 return nil, false, err 2732 } 2733 2734 if PeekType(b) == Null { 2735 containsNull = true 2736 } 2737 2738 out = append(out, b[:tokenLen]) 2739 b = b[tokenLen:] 2740 } 2741 2742 return out, containsNull, nil 2743 } 2744 2745 // getInvertedIndexKeyLength finds the length of an inverted index key 2746 // encoded as a byte array. 2747 func getInvertedIndexKeyLength(b []byte) (int, error) { 2748 skipped := 0 2749 for { 2750 i := bytes.IndexByte(b[skipped:], escape) 2751 if i == -1 { 2752 return 0, errors.Errorf("malformed inverted index key in buffer %#x", b) 2753 } 2754 skipped += i + escapeLength 2755 switch b[skipped-1] { 2756 case escapedTerm, jsonEmptyObject, jsonEmptyArray: 2757 return skipped, nil 2758 } 2759 } 2760 } 2761 2762 // getJSONInvertedIndexKeyLength returns the length of encoded JSON inverted index 2763 // key at the start of b. 2764 func getJSONInvertedIndexKeyLength(buf []byte) (int, error) { 2765 len, err := getInvertedIndexKeyLength(buf) 2766 if err != nil { 2767 return 0, err 2768 } 2769 2770 switch buf[len] { 2771 case jsonEmptyArray, jsonEmptyObject: 2772 return len + 1, nil 2773 2774 default: 2775 valLen, err := PeekLength(buf[len:]) 2776 if err != nil { 2777 return 0, err 2778 } 2779 2780 return len + valLen, nil 2781 } 2782 } 2783 2784 // EncodeArrayKeyMarker adds the array key encoding marker to buf and 2785 // returns the new buffer. 2786 func EncodeArrayKeyMarker(buf []byte, dir Direction) []byte { 2787 switch dir { 2788 case Ascending: 2789 return append(buf, arrayKeyMarker) 2790 case Descending: 2791 return append(buf, arrayKeyDescendingMarker) 2792 default: 2793 panic("invalid direction") 2794 } 2795 } 2796 2797 // EncodeArrayKeyTerminator adds the array key terminator to buf and 2798 // returns the new buffer. 2799 func EncodeArrayKeyTerminator(buf []byte, dir Direction) []byte { 2800 switch dir { 2801 case Ascending: 2802 return append(buf, arrayKeyTerminator) 2803 case Descending: 2804 return append(buf, arrayKeyDescendingTerminator) 2805 default: 2806 panic("invalid direction") 2807 } 2808 } 2809 2810 // EncodeNullWithinArrayKey encodes NULL within a key encoded array. 2811 func EncodeNullWithinArrayKey(buf []byte, dir Direction) []byte { 2812 switch dir { 2813 case Ascending: 2814 return append(buf, ascendingNullWithinArrayKey) 2815 case Descending: 2816 return append(buf, descendingNullWithinArrayKey) 2817 default: 2818 panic("invalid direction") 2819 } 2820 } 2821 2822 // IsNextByteArrayEncodedNull returns if the first byte in the input 2823 // is the NULL encoded byte within an array key. 2824 func IsNextByteArrayEncodedNull(buf []byte, dir Direction) bool { 2825 expected := ascendingNullWithinArrayKey 2826 if dir == Descending { 2827 expected = descendingNullWithinArrayKey 2828 } 2829 return buf[0] == expected 2830 } 2831 2832 // ValidateAndConsumeArrayKeyMarker checks that the marker at the front 2833 // of buf is valid for an array of the given direction, and consumes it 2834 // if so. It returns an error if the tag is invalid. 2835 func ValidateAndConsumeArrayKeyMarker(buf []byte, dir Direction) ([]byte, error) { 2836 typ := PeekType(buf) 2837 expected := ArrayKeyAsc 2838 if dir == Descending { 2839 expected = ArrayKeyDesc 2840 } 2841 if typ != expected { 2842 return nil, errors.Newf("invalid type found %s", typ) 2843 } 2844 return buf[1:], nil 2845 } 2846 2847 // IsArrayKeyDone returns if the first byte in the input is the array 2848 // terminator for the input direction. 2849 func IsArrayKeyDone(buf []byte, dir Direction) bool { 2850 expected := arrayKeyTerminator 2851 if dir == Descending { 2852 expected = arrayKeyDescendingTerminator 2853 } 2854 return buf[0] == expected 2855 }