github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sqlbase/encoded_datum.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sqlbase 12 13 import ( 14 "bytes" 15 "fmt" 16 "unsafe" 17 18 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 19 "github.com/cockroachdb/cockroach/pkg/sql/types" 20 "github.com/cockroachdb/cockroach/pkg/util/encoding" 21 "github.com/cockroachdb/cockroach/pkg/util/log" 22 "github.com/cockroachdb/errors" 23 ) 24 25 // EncodingDirToDatumEncoding returns an equivalent DatumEncoding for the given 26 // encoding direction. 27 func EncodingDirToDatumEncoding(dir encoding.Direction) DatumEncoding { 28 switch dir { 29 case encoding.Ascending: 30 return DatumEncoding_ASCENDING_KEY 31 case encoding.Descending: 32 return DatumEncoding_DESCENDING_KEY 33 default: 34 panic(fmt.Sprintf("invalid encoding direction: %d", dir)) 35 } 36 } 37 38 // EncDatum represents a datum that is "backed" by an encoding and/or by a 39 // tree.Datum. It allows "passing through" a Datum without decoding and 40 // reencoding. 41 type EncDatum struct { 42 // Encoding type. Valid only if encoded is not nil. 43 encoding DatumEncoding 44 45 // Encoded datum (according to the encoding field). 46 encoded []byte 47 48 // Decoded datum. 49 Datum tree.Datum 50 } 51 52 func (ed *EncDatum) stringWithAlloc(typ *types.T, a *DatumAlloc) string { 53 if ed.Datum == nil { 54 if ed.encoded == nil { 55 return "<unset>" 56 } 57 if a == nil { 58 a = &DatumAlloc{} 59 } 60 err := ed.EnsureDecoded(typ, a) 61 if err != nil { 62 return fmt.Sprintf("<error: %v>", err) 63 } 64 } 65 return ed.Datum.String() 66 } 67 68 func (ed *EncDatum) String(typ *types.T) string { 69 return ed.stringWithAlloc(typ, nil) 70 } 71 72 // BytesEqual is true if the EncDatum's encoded field is equal to the input. 73 func (ed *EncDatum) BytesEqual(b []byte) bool { 74 return bytes.Equal(ed.encoded, b) 75 } 76 77 // EncodedString returns an immutable copy of this EncDatum's encoded field. 78 func (ed *EncDatum) EncodedString() string { 79 return string(ed.encoded) 80 } 81 82 // EncodedBytes returns this EncDatum's encoded field. This should be rarely 83 // used, and the caller must not modify the returned slice. 84 func (ed *EncDatum) EncodedBytes() []byte { 85 return ed.encoded 86 } 87 88 // EncDatumOverhead is the overhead of EncDatum in bytes. 89 const EncDatumOverhead = unsafe.Sizeof(EncDatum{}) 90 91 // Size returns a lower bound on the total size of the receiver in bytes, 92 // including memory referenced by the receiver. 93 func (ed EncDatum) Size() uintptr { 94 size := EncDatumOverhead 95 if ed.encoded != nil { 96 size += uintptr(len(ed.encoded)) 97 } 98 if ed.Datum != nil { 99 size += ed.Datum.Size() 100 } 101 return size 102 } 103 104 // EncDatumFromEncoded initializes an EncDatum with the given encoded 105 // value. The encoded value is stored as a shallow copy, so the caller must 106 // make sure the slice is not modified for the lifetime of the EncDatum. 107 // The underlying Datum is nil. 108 func EncDatumFromEncoded(enc DatumEncoding, encoded []byte) EncDatum { 109 if len(encoded) == 0 { 110 panic(fmt.Sprintf("empty encoded value")) 111 } 112 return EncDatum{ 113 encoding: enc, 114 encoded: encoded, 115 Datum: nil, 116 } 117 } 118 119 // EncDatumFromBuffer initializes an EncDatum with an encoding that is 120 // possibly followed by other data. Similar to EncDatumFromEncoded, 121 // except that this function figures out where the encoding stops and returns a 122 // slice for the rest of the buffer. 123 func EncDatumFromBuffer(typ *types.T, enc DatumEncoding, buf []byte) (EncDatum, []byte, error) { 124 if len(buf) == 0 { 125 return EncDatum{}, nil, errors.New("empty encoded value") 126 } 127 switch enc { 128 case DatumEncoding_ASCENDING_KEY, DatumEncoding_DESCENDING_KEY: 129 var encLen int 130 var err error 131 encLen, err = encoding.PeekLength(buf) 132 if err != nil { 133 return EncDatum{}, nil, err 134 } 135 ed := EncDatumFromEncoded(enc, buf[:encLen]) 136 return ed, buf[encLen:], nil 137 case DatumEncoding_VALUE: 138 typeOffset, encLen, err := encoding.PeekValueLength(buf) 139 if err != nil { 140 return EncDatum{}, nil, err 141 } 142 ed := EncDatumFromEncoded(enc, buf[typeOffset:encLen]) 143 return ed, buf[encLen:], nil 144 default: 145 panic(fmt.Sprintf("unknown encoding %s", enc)) 146 } 147 } 148 149 // EncDatumValueFromBufferWithOffsetsAndType is just like calling 150 // EncDatumFromBuffer with DatumEncoding_VALUE, except it expects that you pass 151 // in the result of calling DecodeValueTag on the input buf. Use this if you've 152 // already called DecodeValueTag on buf already, to avoid it getting called 153 // more than necessary. 154 func EncDatumValueFromBufferWithOffsetsAndType( 155 buf []byte, typeOffset int, dataOffset int, typ encoding.Type, 156 ) (EncDatum, []byte, error) { 157 encLen, err := encoding.PeekValueLengthWithOffsetsAndType(buf, dataOffset, typ) 158 if err != nil { 159 return EncDatum{}, nil, err 160 } 161 ed := EncDatumFromEncoded(DatumEncoding_VALUE, buf[typeOffset:encLen]) 162 return ed, buf[encLen:], nil 163 } 164 165 // DatumToEncDatum initializes an EncDatum with the given Datum. 166 func DatumToEncDatum(ctyp *types.T, d tree.Datum) EncDatum { 167 if d == nil { 168 panic("Cannot convert nil datum to EncDatum") 169 } 170 171 dTyp := d.ResolvedType() 172 if d != tree.DNull && !ctyp.Equivalent(dTyp) && !dTyp.IsAmbiguous() { 173 panic(fmt.Sprintf("invalid datum type given: %s, expected %s", dTyp, ctyp)) 174 } 175 return EncDatum{Datum: d} 176 } 177 178 // UnsetDatum ensures subsequent IsUnset() calls return false. 179 func (ed *EncDatum) UnsetDatum() { 180 ed.encoded = nil 181 ed.Datum = nil 182 ed.encoding = 0 183 } 184 185 // IsUnset returns true if EncDatumFromEncoded or DatumToEncDatum were not called. 186 func (ed *EncDatum) IsUnset() bool { 187 return ed.encoded == nil && ed.Datum == nil 188 } 189 190 // IsNull returns true if the EncDatum value is NULL. Equivalent to checking if 191 // ed.Datum is DNull after calling EnsureDecoded. 192 func (ed *EncDatum) IsNull() bool { 193 if ed.Datum != nil { 194 return ed.Datum == tree.DNull 195 } 196 if ed.encoded == nil { 197 panic("IsNull on unset EncDatum") 198 } 199 switch ed.encoding { 200 case DatumEncoding_ASCENDING_KEY, DatumEncoding_DESCENDING_KEY: 201 _, isNull := encoding.DecodeIfNull(ed.encoded) 202 return isNull 203 204 case DatumEncoding_VALUE: 205 _, _, _, typ, err := encoding.DecodeValueTag(ed.encoded) 206 if err != nil { 207 panic(err) 208 } 209 return typ == encoding.Null 210 211 default: 212 panic(fmt.Sprintf("unknown encoding %s", ed.encoding)) 213 } 214 } 215 216 // EnsureDecoded ensures that the Datum field is set (decoding if it is not). 217 func (ed *EncDatum) EnsureDecoded(typ *types.T, a *DatumAlloc) error { 218 if ed.Datum != nil { 219 return nil 220 } 221 if ed.encoded == nil { 222 return errors.AssertionFailedf("decoding unset EncDatum") 223 } 224 var err error 225 var rem []byte 226 switch ed.encoding { 227 case DatumEncoding_ASCENDING_KEY: 228 ed.Datum, rem, err = DecodeTableKey(a, typ, ed.encoded, encoding.Ascending) 229 case DatumEncoding_DESCENDING_KEY: 230 ed.Datum, rem, err = DecodeTableKey(a, typ, ed.encoded, encoding.Descending) 231 case DatumEncoding_VALUE: 232 ed.Datum, rem, err = DecodeTableValue(a, typ, ed.encoded) 233 default: 234 return errors.AssertionFailedf("unknown encoding %d", log.Safe(ed.encoding)) 235 } 236 if err != nil { 237 return errors.Wrapf(err, "error decoding %d bytes", log.Safe(len(ed.encoded))) 238 } 239 if len(rem) != 0 { 240 ed.Datum = nil 241 return errors.AssertionFailedf( 242 "%d trailing bytes in encoded value: %+v", log.Safe(len(rem)), rem) 243 } 244 return nil 245 } 246 247 // Encoding returns the encoding that is already available (the latter indicated 248 // by the bool return value). 249 func (ed *EncDatum) Encoding() (DatumEncoding, bool) { 250 if ed.encoded == nil { 251 return 0, false 252 } 253 return ed.encoding, true 254 } 255 256 // Encode appends the encoded datum to the given slice using the requested 257 // encoding. 258 // Note: DatumEncoding_VALUE encodings are not unique because they can contain 259 // a column ID so they should not be used to test for equality. 260 func (ed *EncDatum) Encode( 261 typ *types.T, a *DatumAlloc, enc DatumEncoding, appendTo []byte, 262 ) ([]byte, error) { 263 if ed.encoded != nil && enc == ed.encoding { 264 // We already have an encoding that matches that we can use. 265 return append(appendTo, ed.encoded...), nil 266 } 267 if err := ed.EnsureDecoded(typ, a); err != nil { 268 return nil, err 269 } 270 switch enc { 271 case DatumEncoding_ASCENDING_KEY: 272 return EncodeTableKey(appendTo, ed.Datum, encoding.Ascending) 273 case DatumEncoding_DESCENDING_KEY: 274 return EncodeTableKey(appendTo, ed.Datum, encoding.Descending) 275 case DatumEncoding_VALUE: 276 return EncodeTableValue(appendTo, ColumnID(encoding.NoColumnID), ed.Datum, a.scratch) 277 default: 278 panic(fmt.Sprintf("unknown encoding requested %s", enc)) 279 } 280 } 281 282 // Fingerprint appends a unique hash of ed to the given slice. If datums are intended 283 // to be deduplicated or grouped with hashes, this function should be used 284 // instead of encode. Additionally, Fingerprint has the property that if the 285 // fingerprints of a set of datums are appended together, the resulting 286 // fingerprint will uniquely identify the set. 287 func (ed *EncDatum) Fingerprint(typ *types.T, a *DatumAlloc, appendTo []byte) ([]byte, error) { 288 // Note: we don't ed.EnsureDecoded on top of this method, because the default 289 // case uses ed.Encode, which has a fast path if the encoded bytes are already 290 // the right encoding. 291 switch typ.Family() { 292 case types.JsonFamily: 293 if err := ed.EnsureDecoded(typ, a); err != nil { 294 return nil, err 295 } 296 // We must use value encodings without a column ID even if the EncDatum already 297 // is encoded with the value encoding so that the hashes are indeed unique. 298 return EncodeTableValue(appendTo, ColumnID(encoding.NoColumnID), ed.Datum, a.scratch) 299 default: 300 // For values that are key encodable, using the ascending key. 301 // TODO (rohany): However, there should be a knob for the hasher that sees 302 // what kind of encoding already exists on the enc datums incoming to the 303 // DistSQL operators, and should use that encoding to avoid re-encoding 304 // datums into different encoding types as much as possible. 305 return ed.Encode(typ, a, DatumEncoding_ASCENDING_KEY, appendTo) 306 } 307 } 308 309 // Compare returns: 310 // -1 if the receiver is less than rhs, 311 // 0 if the receiver is equal to rhs, 312 // +1 if the receiver is greater than rhs. 313 func (ed *EncDatum) Compare( 314 typ *types.T, a *DatumAlloc, evalCtx *tree.EvalContext, rhs *EncDatum, 315 ) (int, error) { 316 // TODO(radu): if we have both the Datum and a key encoding available, which 317 // one would be faster to use? 318 if ed.encoding == rhs.encoding && ed.encoded != nil && rhs.encoded != nil { 319 switch ed.encoding { 320 case DatumEncoding_ASCENDING_KEY: 321 return bytes.Compare(ed.encoded, rhs.encoded), nil 322 case DatumEncoding_DESCENDING_KEY: 323 return bytes.Compare(rhs.encoded, ed.encoded), nil 324 } 325 } 326 if err := ed.EnsureDecoded(typ, a); err != nil { 327 return 0, err 328 } 329 if err := rhs.EnsureDecoded(typ, a); err != nil { 330 return 0, err 331 } 332 return ed.Datum.Compare(evalCtx, rhs.Datum), nil 333 } 334 335 // GetInt decodes an EncDatum that is known to be of integer type and returns 336 // the integer value. It is a more convenient and more efficient alternative to 337 // calling EnsureDecoded and casting the Datum. 338 func (ed *EncDatum) GetInt() (int64, error) { 339 if ed.Datum != nil { 340 if ed.Datum == tree.DNull { 341 return 0, errors.Errorf("NULL INT value") 342 } 343 return int64(*ed.Datum.(*tree.DInt)), nil 344 } 345 346 switch ed.encoding { 347 case DatumEncoding_ASCENDING_KEY: 348 if _, isNull := encoding.DecodeIfNull(ed.encoded); isNull { 349 return 0, errors.Errorf("NULL INT value") 350 } 351 _, val, err := encoding.DecodeVarintAscending(ed.encoded) 352 return val, err 353 354 case DatumEncoding_DESCENDING_KEY: 355 if _, isNull := encoding.DecodeIfNull(ed.encoded); isNull { 356 return 0, errors.Errorf("NULL INT value") 357 } 358 _, val, err := encoding.DecodeVarintDescending(ed.encoded) 359 return val, err 360 361 case DatumEncoding_VALUE: 362 _, dataOffset, _, typ, err := encoding.DecodeValueTag(ed.encoded) 363 if err != nil { 364 return 0, err 365 } 366 // NULL, true, and false are special, because their values are fully encoded by their value tag. 367 if typ == encoding.Null { 368 return 0, errors.Errorf("NULL INT value") 369 } 370 371 _, val, err := encoding.DecodeUntaggedIntValue(ed.encoded[dataOffset:]) 372 return val, err 373 374 default: 375 return 0, errors.Errorf("unknown encoding %s", ed.encoding) 376 } 377 } 378 379 // EncDatumRow is a row of EncDatums. 380 type EncDatumRow []EncDatum 381 382 func (r EncDatumRow) stringToBuf(types []*types.T, a *DatumAlloc, b *bytes.Buffer) { 383 if len(types) != len(r) { 384 panic(fmt.Sprintf("mismatched types (%v) and row (%v)", types, r)) 385 } 386 b.WriteString("[") 387 for i := range r { 388 if i > 0 { 389 b.WriteString(" ") 390 } 391 b.WriteString(r[i].stringWithAlloc(types[i], a)) 392 } 393 b.WriteString("]") 394 } 395 396 // Copy makes a copy of this EncDatumRow. Convenient for tests. Use an 397 // EncDatumRowAlloc in non-test code. 398 func (r EncDatumRow) Copy() EncDatumRow { 399 if r == nil { 400 return nil 401 } 402 rCopy := make(EncDatumRow, len(r)) 403 copy(rCopy, r) 404 return rCopy 405 } 406 407 func (r EncDatumRow) String(types []*types.T) string { 408 var b bytes.Buffer 409 r.stringToBuf(types, &DatumAlloc{}, &b) 410 return b.String() 411 } 412 413 // EncDatumRowOverhead is the overhead of EncDatumRow in bytes. 414 const EncDatumRowOverhead = unsafe.Sizeof(EncDatumRow{}) 415 416 // Size returns a lower bound on the total size all EncDatum's in the receiver, 417 // including memory referenced by all EncDatum's. 418 func (r EncDatumRow) Size() uintptr { 419 size := EncDatumRowOverhead 420 for _, ed := range r { 421 size += ed.Size() 422 } 423 return size 424 } 425 426 // EncDatumRowToDatums converts a given EncDatumRow to a Datums. 427 func EncDatumRowToDatums( 428 types []*types.T, datums tree.Datums, row EncDatumRow, da *DatumAlloc, 429 ) error { 430 if len(types) != len(row) { 431 panic(fmt.Sprintf("mismatched types (%v) and row (%v)", types, row)) 432 } 433 if len(row) != len(datums) { 434 return errors.Errorf( 435 "Length mismatch (%d and %d) between datums and row", len(datums), len(row)) 436 } 437 for i, encDatum := range row { 438 if encDatum.IsUnset() { 439 datums[i] = tree.DNull 440 continue 441 } 442 err := encDatum.EnsureDecoded(types[i], da) 443 if err != nil { 444 return err 445 } 446 datums[i] = encDatum.Datum 447 } 448 return nil 449 } 450 451 // Compare returns the relative ordering of two EncDatumRows according to a 452 // ColumnOrdering: 453 // -1 if the receiver comes before the rhs in the ordering, 454 // +1 if the receiver comes after the rhs in the ordering, 455 // 0 if the relative order does not matter (i.e. the two rows have the same 456 // values for the columns in the ordering). 457 // 458 // Note that a return value of 0 does not (in general) imply that the rows are 459 // equal; for example, rows [1 1 5] and [1 1 6] when compared against ordering 460 // {{0, asc}, {1, asc}} (i.e. ordered by first column and then by second 461 // column). 462 func (r EncDatumRow) Compare( 463 types []*types.T, 464 a *DatumAlloc, 465 ordering ColumnOrdering, 466 evalCtx *tree.EvalContext, 467 rhs EncDatumRow, 468 ) (int, error) { 469 if len(r) != len(types) || len(rhs) != len(types) { 470 panic(fmt.Sprintf("length mismatch: %d types, %d lhs, %d rhs\n%+v\n%+v\n%+v", len(types), len(r), len(rhs), types, r, rhs)) 471 } 472 for _, c := range ordering { 473 cmp, err := r[c.ColIdx].Compare(types[c.ColIdx], a, evalCtx, &rhs[c.ColIdx]) 474 if err != nil { 475 return 0, err 476 } 477 if cmp != 0 { 478 if c.Direction == encoding.Descending { 479 cmp = -cmp 480 } 481 return cmp, nil 482 } 483 } 484 return 0, nil 485 } 486 487 // CompareToDatums is a version of Compare which compares against decoded Datums. 488 func (r EncDatumRow) CompareToDatums( 489 types []*types.T, 490 a *DatumAlloc, 491 ordering ColumnOrdering, 492 evalCtx *tree.EvalContext, 493 rhs tree.Datums, 494 ) (int, error) { 495 for _, c := range ordering { 496 if err := r[c.ColIdx].EnsureDecoded(types[c.ColIdx], a); err != nil { 497 return 0, err 498 } 499 cmp := r[c.ColIdx].Datum.Compare(evalCtx, rhs[c.ColIdx]) 500 if cmp != 0 { 501 if c.Direction == encoding.Descending { 502 cmp = -cmp 503 } 504 return cmp, nil 505 } 506 } 507 return 0, nil 508 } 509 510 // EncDatumRows is a slice of EncDatumRows having the same schema. 511 type EncDatumRows []EncDatumRow 512 513 func (r EncDatumRows) String(types []*types.T) string { 514 var a DatumAlloc 515 var b bytes.Buffer 516 b.WriteString("[") 517 for i, r := range r { 518 if i > 0 { 519 b.WriteString(" ") 520 } 521 r.stringToBuf(types, &a, &b) 522 } 523 b.WriteString("]") 524 return b.String() 525 } 526 527 // EncDatumRowContainer holds rows and can cycle through them. 528 // Must be Reset upon initialization. 529 type EncDatumRowContainer struct { 530 rows EncDatumRows 531 index int 532 } 533 534 // Peek returns the current element at the top of the container. 535 func (c *EncDatumRowContainer) Peek() EncDatumRow { 536 return c.rows[c.index] 537 } 538 539 // Pop returns the next row from the container. Will cycle through the rows 540 // again if we reach the end. 541 func (c *EncDatumRowContainer) Pop() EncDatumRow { 542 if c.index < 0 { 543 c.index = len(c.rows) - 1 544 } 545 row := c.rows[c.index] 546 c.index-- 547 return row 548 } 549 550 // Push adds a row to the container. 551 func (c *EncDatumRowContainer) Push(row EncDatumRow) { 552 c.rows = append(c.rows, row) 553 c.index = len(c.rows) - 1 554 } 555 556 // Reset clears the container and resets the indexes. 557 // Must be called upon creating a container. 558 func (c *EncDatumRowContainer) Reset() { 559 c.rows = c.rows[:0] 560 c.index = -1 561 } 562 563 // IsEmpty returns whether the container is "empty", which means that it's about 564 // to cycle through its rows again on the next Pop. 565 func (c *EncDatumRowContainer) IsEmpty() bool { 566 return c.index == -1 567 } 568 569 // EncDatumRowAlloc is a helper that speeds up allocation of EncDatumRows 570 // (preferably of the same length). 571 type EncDatumRowAlloc struct { 572 buf []EncDatum 573 // Preallocate a small initial batch (helps cases where 574 // we only allocate a few small rows). 575 prealloc [16]EncDatum 576 } 577 578 // AllocRow allocates an EncDatumRow with the given number of columns. 579 func (a *EncDatumRowAlloc) AllocRow(cols int) EncDatumRow { 580 if a.buf == nil { 581 // First call. 582 a.buf = a.prealloc[:] 583 } 584 if len(a.buf) < cols { 585 // If the rows are small, allocate storage for a bunch of rows at once. 586 bufLen := cols 587 if cols <= 16 { 588 bufLen *= 16 589 } else if cols <= 64 { 590 bufLen *= 4 591 } 592 a.buf = make([]EncDatum, bufLen) 593 } 594 // Chop off a row from buf, and limit its capacity to avoid corrupting the 595 // following row in the unlikely case that the caller appends to the slice. 596 result := EncDatumRow(a.buf[:cols:cols]) 597 a.buf = a.buf[cols:] 598 return result 599 } 600 601 // CopyRow allocates an EncDatumRow and copies the given row to it. 602 func (a *EncDatumRowAlloc) CopyRow(row EncDatumRow) EncDatumRow { 603 rowCopy := a.AllocRow(len(row)) 604 copy(rowCopy, row) 605 return rowCopy 606 }