github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/props/histogram.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package props 12 13 import ( 14 "bytes" 15 "fmt" 16 "io" 17 "math" 18 "sort" 19 20 "github.com/cockroachdb/cockroach/pkg/sql/opt" 21 "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" 22 "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" 23 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 24 "github.com/cockroachdb/cockroach/pkg/sql/types" 25 "github.com/cockroachdb/errors" 26 "github.com/olekukonko/tablewriter" 27 ) 28 29 // Histogram captures the distribution of values for a particular column within 30 // a relational expression. 31 // Histograms are immutable. 32 type Histogram struct { 33 evalCtx *tree.EvalContext 34 col opt.ColumnID 35 buckets []cat.HistogramBucket 36 } 37 38 func (h *Histogram) String() string { 39 w := histogramWriter{} 40 w.init(h.buckets) 41 var buf bytes.Buffer 42 w.write(&buf) 43 return buf.String() 44 } 45 46 // Init initializes the histogram with data from the catalog. 47 func (h *Histogram) Init( 48 evalCtx *tree.EvalContext, col opt.ColumnID, buckets []cat.HistogramBucket, 49 ) { 50 h.evalCtx = evalCtx 51 h.col = col 52 h.buckets = buckets 53 } 54 55 // copy returns a deep copy of the histogram. 56 func (h *Histogram) copy() *Histogram { 57 buckets := make([]cat.HistogramBucket, len(h.buckets)) 58 copy(buckets, h.buckets) 59 return &Histogram{ 60 evalCtx: h.evalCtx, 61 col: h.col, 62 buckets: buckets, 63 } 64 } 65 66 // BucketCount returns the number of buckets in the histogram. 67 func (h *Histogram) BucketCount() int { 68 return len(h.buckets) 69 } 70 71 // Bucket returns a pointer to the ith bucket in the histogram. 72 // i must be greater than or equal to 0 and less than BucketCount. 73 func (h *Histogram) Bucket(i int) *cat.HistogramBucket { 74 return &h.buckets[i] 75 } 76 77 // ValuesCount returns the total number of values in the histogram. It can 78 // be used to estimate the selectivity of a predicate by comparing the values 79 // count before and after calling Filter on the histogram. 80 func (h *Histogram) ValuesCount() float64 { 81 var count float64 82 for i := range h.buckets { 83 count += h.buckets[i].NumRange 84 count += h.buckets[i].NumEq 85 } 86 return count 87 } 88 89 // DistinctValuesCount returns the estimated number of distinct values in the 90 // histogram. 91 func (h *Histogram) DistinctValuesCount() float64 { 92 var count float64 93 for i := range h.buckets { 94 b := &h.buckets[i] 95 count += b.DistinctRange 96 if b.NumEq > 1 { 97 count++ 98 } else { 99 count += b.NumEq 100 } 101 } 102 if maxCount := h.maxDistinctValuesCount(); maxCount < count { 103 count = maxCount 104 } 105 return count 106 } 107 108 // maxDistinctValuesCount estimates the maximum number of distinct values in 109 // the histogram. 110 func (h *Histogram) maxDistinctValuesCount() float64 { 111 if len(h.buckets) == 0 { 112 return 0 113 } 114 115 // The first bucket always has a zero value for NumRange, so the lower bound 116 // of the histogram is the upper bound of the first bucket. 117 if h.Bucket(0).NumRange != 0 { 118 panic(errors.AssertionFailedf("the first bucket should have NumRange=0")) 119 } 120 lowerBound := h.Bucket(0).UpperBound 121 122 var count float64 123 for i := range h.buckets { 124 b := &h.buckets[i] 125 rng, ok := maxDistinctValuesInRange(lowerBound, b.UpperBound) 126 127 if ok && b.NumRange > rng { 128 count += rng 129 } else { 130 count += b.NumRange 131 } 132 133 if b.NumEq > 1 { 134 count++ 135 } else { 136 count += b.NumEq 137 } 138 lowerBound = h.getNextLowerBound(b.UpperBound) 139 } 140 return count 141 } 142 143 // maxDistinctValuesInRange returns the maximum number of distinct values in 144 // the range [lowerBound, upperBound). It returns ok=false when it is not 145 // possible to determine a finite value (which is the case for all types other 146 // than integers and dates). 147 func maxDistinctValuesInRange(lowerBound, upperBound tree.Datum) (_ float64, ok bool) { 148 switch lowerBound.ResolvedType().Family() { 149 case types.IntFamily: 150 return float64(*upperBound.(*tree.DInt)) - float64(*lowerBound.(*tree.DInt)), true 151 152 case types.DateFamily: 153 lower := lowerBound.(*tree.DDate) 154 upper := upperBound.(*tree.DDate) 155 if lower.IsFinite() && upper.IsFinite() { 156 return float64(upper.PGEpochDays()) - float64(lower.PGEpochDays()), true 157 } 158 return 0, false 159 160 default: 161 return 0, false 162 } 163 } 164 165 // CanFilter returns true if the given constraint can filter the histogram. 166 // This is the case if the histogram column matches one of the columns in 167 // the exact prefix of c or the next column immediately after the exact prefix. 168 // Returns the offset of the matching column in the constraint if found, as 169 // well as the exact prefix. 170 func (h *Histogram) CanFilter(c *constraint.Constraint) (colOffset, exactPrefix int, ok bool) { 171 exactPrefix = c.ExactPrefix(h.evalCtx) 172 constrainedCols := c.ConstrainedColumns(h.evalCtx) 173 for i := 0; i < constrainedCols && i <= exactPrefix; i++ { 174 if c.Columns.Get(i).ID() == h.col { 175 return i, exactPrefix, true 176 } 177 } 178 return 0, exactPrefix, false 179 } 180 181 // Filter filters the histogram according to the given constraint, and returns 182 // a new histogram with the results. CanFilter should be called first to 183 // validate that c can filter the histogram. 184 func (h *Histogram) Filter(c *constraint.Constraint) *Histogram { 185 colOffset, exactPrefix, ok := h.CanFilter(c) 186 if !ok { 187 panic(errors.AssertionFailedf("column mismatch")) 188 } 189 190 bucketCount := h.BucketCount() 191 filtered := &Histogram{ 192 evalCtx: h.evalCtx, 193 col: h.col, 194 buckets: make([]cat.HistogramBucket, 0, bucketCount), 195 } 196 if bucketCount == 0 { 197 return filtered 198 } 199 200 // The first bucket always has a zero value for NumRange, so the lower bound 201 // of the histogram is the upper bound of the first bucket. 202 if h.Bucket(0).NumRange != 0 { 203 panic(errors.AssertionFailedf("the first bucket should have NumRange=0")) 204 } 205 206 prefix := make([]tree.Datum, colOffset) 207 for i := range prefix { 208 prefix[i] = c.Spans.Get(0).StartKey().Value(i) 209 } 210 desc := c.Columns.Get(colOffset).Descending() 211 var iter histogramIter 212 iter.init(h, desc) 213 spanIndex := 0 214 keyCtx := constraint.KeyContext{EvalCtx: h.evalCtx, Columns: c.Columns} 215 216 // Find the first span that may overlap with the histogram. 217 firstBucket := makeSpanFromBucket(&iter, prefix) 218 spanCount := c.Spans.Count() 219 for spanIndex < spanCount { 220 span := c.Spans.Get(spanIndex) 221 if firstBucket.StartsAfter(&keyCtx, span) { 222 spanIndex++ 223 continue 224 } 225 break 226 } 227 if spanIndex == spanCount { 228 return filtered 229 } 230 231 // Use binary search to find the first bucket that overlaps with the span. 232 span := c.Spans.Get(spanIndex) 233 bucIndex := sort.Search(bucketCount, func(i int) bool { 234 iter.setIdx(i) 235 bucket := makeSpanFromBucket(&iter, prefix) 236 if desc { 237 return span.StartsAfter(&keyCtx, &bucket) 238 } 239 return !span.StartsAfter(&keyCtx, &bucket) 240 }) 241 if desc { 242 bucIndex-- 243 if bucIndex == -1 { 244 return filtered 245 } 246 } else if bucIndex == bucketCount { 247 return filtered 248 } 249 iter.setIdx(bucIndex) 250 if !desc && bucIndex > 0 { 251 prevUpperBound := h.Bucket(bucIndex - 1).UpperBound 252 filtered.addEmptyBucket(prevUpperBound, desc) 253 } 254 255 // For the remaining buckets and spans, use a variation on merge sort. 256 for spanIndex < spanCount { 257 if spanIndex > 0 && colOffset < exactPrefix { 258 // If this column is part of the exact prefix, we don't need to look at 259 // the rest of the spans. 260 break 261 } 262 263 // Convert the bucket to a span in order to take advantage of the 264 // constraint library. 265 left := makeSpanFromBucket(&iter, prefix) 266 right := c.Spans.Get(spanIndex) 267 268 if left.StartsAfter(&keyCtx, right) { 269 spanIndex++ 270 continue 271 } 272 273 filteredSpan := left 274 if !filteredSpan.TryIntersectWith(&keyCtx, right) { 275 filtered.addEmptyBucket(iter.b.UpperBound, desc) 276 if ok := iter.next(); !ok { 277 break 278 } 279 continue 280 } 281 282 filteredBucket := iter.b 283 if filteredSpan.Compare(&keyCtx, &left) != 0 { 284 // The bucket was cut off in the middle. Get the resulting filtered 285 // bucket. 286 filteredBucket = getFilteredBucket(&iter, &keyCtx, &filteredSpan, colOffset) 287 if !desc && filteredSpan.CompareStarts(&keyCtx, &left) != 0 { 288 // We need to add an empty bucket before the new bucket. 289 ub := h.getPrevUpperBound(filteredSpan.StartKey(), filteredSpan.StartBoundary(), colOffset) 290 filtered.addEmptyBucket(ub, desc) 291 } 292 } 293 filtered.addBucket(filteredBucket, desc) 294 295 if desc && filteredSpan.CompareEnds(&keyCtx, &left) != 0 { 296 // We need to add an empty bucket after the new bucket. 297 ub := h.getPrevUpperBound(filteredSpan.EndKey(), filteredSpan.EndBoundary(), colOffset) 298 filtered.addEmptyBucket(ub, desc) 299 } 300 301 // Skip past whichever span ends first, or skip past both if they have 302 // the same endpoint. 303 cmp := left.CompareEnds(&keyCtx, right) 304 if cmp <= 0 { 305 if ok := iter.next(); !ok { 306 break 307 } 308 } 309 if cmp >= 0 { 310 spanIndex++ 311 } 312 } 313 314 if desc { 315 // After we reverse the buckets below, the last bucket will become the 316 // first bucket. NumRange of the first bucket must be 0, so add an empty 317 // bucket if needed. 318 if iter.next() { 319 // The remaining buckets from the original histogram have been removed. 320 filtered.addEmptyBucket(iter.lb, desc) 321 } else if lastBucket := filtered.buckets[len(filtered.buckets)-1]; lastBucket.NumRange != 0 { 322 iter.setIdx(0) 323 span := makeSpanFromBucket(&iter, prefix) 324 ub := h.getPrevUpperBound(span.EndKey(), span.EndBoundary(), colOffset) 325 filtered.addEmptyBucket(ub, desc) 326 } 327 328 // Reverse the buckets so they are in ascending order. 329 for i := 0; i < len(filtered.buckets)/2; i++ { 330 j := len(filtered.buckets) - 1 - i 331 filtered.buckets[i], filtered.buckets[j] = filtered.buckets[j], filtered.buckets[i] 332 } 333 } 334 335 return filtered 336 } 337 338 func (h *Histogram) getNextLowerBound(currentUpperBound tree.Datum) tree.Datum { 339 nextLowerBound, ok := currentUpperBound.Next(h.evalCtx) 340 if !ok { 341 nextLowerBound = currentUpperBound 342 } 343 return nextLowerBound 344 } 345 346 func (h *Histogram) getPrevUpperBound( 347 currentLowerBound constraint.Key, boundary constraint.SpanBoundary, colOffset int, 348 ) tree.Datum { 349 prevUpperBound := currentLowerBound.Value(colOffset) 350 if boundary == constraint.IncludeBoundary { 351 if prev, ok := prevUpperBound.Prev(h.evalCtx); ok { 352 prevUpperBound = prev 353 } 354 } 355 return prevUpperBound 356 } 357 358 func (h *Histogram) addEmptyBucket(upperBound tree.Datum, desc bool) { 359 h.addBucket(&cat.HistogramBucket{UpperBound: upperBound}, desc) 360 } 361 362 func (h *Histogram) addBucket(bucket *cat.HistogramBucket, desc bool) { 363 // Check whether we can combine this bucket with the previous bucket. 364 if len(h.buckets) != 0 { 365 lastBucket := &h.buckets[len(h.buckets)-1] 366 lower, higher := lastBucket, bucket 367 if desc { 368 lower, higher = bucket, lastBucket 369 } 370 if lower.NumRange == 0 && lower.NumEq == 0 && higher.NumRange == 0 { 371 lastBucket.NumEq = higher.NumEq 372 lastBucket.UpperBound = higher.UpperBound 373 return 374 } 375 if lastBucket.UpperBound.Compare(h.evalCtx, bucket.UpperBound) == 0 { 376 lastBucket.NumEq = lower.NumEq + higher.NumRange + higher.NumEq 377 lastBucket.NumRange = lower.NumRange 378 return 379 } 380 } 381 h.buckets = append(h.buckets, *bucket) 382 } 383 384 // ApplySelectivity reduces the size of each histogram bucket according to 385 // the given selectivity, and returns a new histogram with the results. 386 func (h *Histogram) ApplySelectivity(selectivity float64) *Histogram { 387 res := h.copy() 388 for i := range res.buckets { 389 b := &res.buckets[i] 390 391 // Save n and d for the distinct count formula below. 392 n := b.NumRange 393 d := b.DistinctRange 394 395 b.NumEq *= selectivity 396 b.NumRange *= selectivity 397 398 if d == 0 { 399 continue 400 } 401 // If each distinct value appears n/d times, and the probability of a 402 // row being filtered out is (1 - selectivity), the probability that all 403 // n/d rows are filtered out is (1 - selectivity)^(n/d). So the expected 404 // number of values that are filtered out is d*(1 - selectivity)^(n/d). 405 // 406 // This formula returns d * selectivity when d=n but is closer to d 407 // when d << n. 408 b.DistinctRange = d - d*math.Pow(1-selectivity, n/d) 409 } 410 return res 411 } 412 413 // histogramIter is a helper struct for iterating through the buckets in a 414 // histogram. It enables iterating both forward and backward through the 415 // buckets. 416 type histogramIter struct { 417 h *Histogram 418 desc bool 419 idx int 420 b *cat.HistogramBucket 421 lb tree.Datum 422 ub tree.Datum 423 } 424 425 // init initializes a histogramIter to point to the first bucket of the given 426 // histogram. If desc is true, the iterator starts from the end of the 427 // histogram and moves backwards. 428 func (hi *histogramIter) init(h *Histogram, desc bool) { 429 hi.idx = -1 430 if desc { 431 hi.idx = h.BucketCount() 432 } 433 hi.h = h 434 hi.desc = desc 435 hi.next() 436 } 437 438 // setIdx updates the histogramIter to point to the ith bucket in the 439 // histogram. 440 func (hi *histogramIter) setIdx(i int) { 441 hi.idx = i - 1 442 if hi.desc { 443 hi.idx = i + 1 444 } 445 hi.next() 446 } 447 448 // next sets the histogramIter to point to the next bucket. If hi.desc is true 449 // the "next" bucket is actually the previous bucket in the histogram. Returns 450 // false if there are no more buckets. 451 func (hi *histogramIter) next() (ok bool) { 452 getBounds := func() (lb, ub tree.Datum) { 453 hi.b = hi.h.Bucket(hi.idx) 454 ub = hi.b.UpperBound 455 if hi.idx == 0 { 456 lb = ub 457 } else { 458 lb = hi.h.getNextLowerBound(hi.h.Bucket(hi.idx - 1).UpperBound) 459 } 460 return lb, ub 461 } 462 463 if hi.desc { 464 hi.idx-- 465 if hi.idx < 0 { 466 return false 467 } 468 hi.ub, hi.lb = getBounds() 469 } else { 470 hi.idx++ 471 if hi.idx >= hi.h.BucketCount() { 472 return false 473 } 474 hi.lb, hi.ub = getBounds() 475 } 476 477 return true 478 } 479 480 func makeSpanFromBucket(iter *histogramIter, prefix []tree.Datum) (span constraint.Span) { 481 span.Init( 482 constraint.MakeCompositeKey(append(prefix[:len(prefix):len(prefix)], iter.lb)...), 483 constraint.IncludeBoundary, 484 constraint.MakeCompositeKey(append(prefix[:len(prefix):len(prefix)], iter.ub)...), 485 constraint.IncludeBoundary, 486 ) 487 return span 488 } 489 490 // getFilteredBucket filters the histogram bucket according to the given span, 491 // and returns a new bucket with the results. The span represents the maximum 492 // range of values that remain in the bucket after filtering. The span must 493 // be fully contained within the bucket, or else getFilteredBucket will throw 494 // an error. 495 // 496 // For example, suppose a bucket initially has lower bound 0 (inclusive) and 497 // contains the following data: {NumEq: 5, NumRange: 10, UpperBound: 10} (all 498 // values are integers). 499 // 500 // The following spans will filter the bucket as shown: 501 // [/0 - /5] => {NumEq: 1, NumRange: 5, UpperBound: 5} 502 // [/2 - /10] => {NumEq: 5, NumRange: 8, UpperBound: 10} 503 // [/20 - /30] => error 504 // 505 // Note that the calculations for NumEq and NumRange depend on the data type. 506 // For discrete data types such as integers and dates, it is always possible 507 // to assign a non-zero value for NumEq as long as NumEq and NumRange were 508 // non-zero in the original bucket. For continuous types such as floats, 509 // NumEq will be zero unless the filtered bucket includes the original upper 510 // bound. For example, given the same bucket as in the above example, but with 511 // floating point values instead of integers: 512 // 513 // [/0 - /5] => {NumEq: 0, NumRange: 5, UpperBound: 5.0} 514 // [/2 - /10] => {NumEq: 5, NumRange: 8, UpperBound: 10.0} 515 // [/20 - /30] => error 516 // 517 // For non-numeric types such as strings, it is not possible to estimate 518 // the size of NumRange if the bucket is cut off in the middle. In this case, 519 // we use the heuristic that NumRange is reduced by half. 520 // 521 func getFilteredBucket( 522 iter *histogramIter, keyCtx *constraint.KeyContext, filteredSpan *constraint.Span, colOffset int, 523 ) *cat.HistogramBucket { 524 spanLowerBound := filteredSpan.StartKey().Value(colOffset) 525 spanUpperBound := filteredSpan.EndKey().Value(colOffset) 526 bucketLowerBound := iter.lb 527 bucketUpperBound := iter.ub 528 b := iter.b 529 530 // Check that the given span is contained in the bucket. 531 cmpSpanStartBucketStart := spanLowerBound.Compare(keyCtx.EvalCtx, bucketLowerBound) 532 cmpSpanEndBucketEnd := spanUpperBound.Compare(keyCtx.EvalCtx, bucketUpperBound) 533 contained := cmpSpanStartBucketStart >= 0 && cmpSpanEndBucketEnd <= 0 534 if iter.desc { 535 contained = cmpSpanStartBucketStart <= 0 && cmpSpanEndBucketEnd >= 0 536 } 537 if !contained { 538 panic(errors.AssertionFailedf("span must be fully contained in the bucket")) 539 } 540 541 // Extract the range sizes before and after filtering. Only numeric and 542 // date-time types will have ok=true, since these are the only types for 543 // which we can accurately calculate the range size of a non-equality span. 544 rangeBefore, rangeAfter, ok := getRangesBeforeAndAfter( 545 bucketLowerBound, bucketUpperBound, spanLowerBound, spanUpperBound, iter.desc, 546 ) 547 548 // Determine whether this span represents an equality condition. 549 isEqualityCondition := spanLowerBound.Compare(keyCtx.EvalCtx, spanUpperBound) == 0 550 551 // Determine whether this span includes the original upper bound of the 552 // bucket. 553 isSpanEndBoundaryInclusive := filteredSpan.EndBoundary() == constraint.IncludeBoundary 554 includesOriginalUpperBound := isSpanEndBoundaryInclusive && cmpSpanEndBucketEnd == 0 555 if iter.desc { 556 isSpanStartBoundaryInclusive := filteredSpan.StartBoundary() == constraint.IncludeBoundary 557 includesOriginalUpperBound = isSpanStartBoundaryInclusive && cmpSpanStartBucketStart == 0 558 } 559 560 // Calculate the new value for numEq. 561 var numEq float64 562 if includesOriginalUpperBound { 563 numEq = b.NumEq 564 } else { 565 if isEqualityCondition { 566 // This span represents an equality condition with a value in the range 567 // of this bucket. Use the distinct count of the bucket to estimate the 568 // selectivity of the equality condition. 569 selectivity := 1.0 570 if b.DistinctRange > 1 { 571 selectivity = 1 / b.DistinctRange 572 } 573 numEq = selectivity * b.NumRange 574 } else if ok && rangeBefore > 0 && isDiscrete(bucketLowerBound.ResolvedType()) { 575 // If we were successful in finding the ranges before and after filtering 576 // and the data type is discrete (e.g., integer, date, or timestamp), we 577 // can assign some of the old NumRange to the new NumEq. 578 numEq = b.NumRange / rangeBefore 579 } 580 } 581 582 // Calculate the new value for numRange. 583 var numRange float64 584 if isEqualityCondition { 585 numRange = 0 586 } else if ok && rangeBefore > 0 { 587 // If we were successful in finding the ranges before and after filtering, 588 // calculate the fraction of values that should be assigned to the new 589 // bucket. 590 numRange = b.NumRange * rangeAfter / rangeBefore 591 } else { 592 // In the absence of any information, assume we reduced the size of the 593 // bucket by half. 594 numRange = 0.5 * b.NumRange 595 } 596 597 // Calculate the new value for distinctCountRange. 598 var distinctCountRange float64 599 if b.NumRange > 0 { 600 distinctCountRange = b.DistinctRange * numRange / b.NumRange 601 } 602 603 upperBound := spanUpperBound 604 if iter.desc { 605 upperBound = spanLowerBound 606 } 607 return &cat.HistogramBucket{ 608 NumEq: numEq, 609 NumRange: numRange, 610 DistinctRange: distinctCountRange, 611 UpperBound: upperBound, 612 } 613 } 614 615 // getRangesBeforeAndAfter returns the size of the ranges before and after the 616 // given bucket is filtered by the given span. If swap is true, the upper and 617 // lower bounds should be swapped for the bucket and the span. Returns ok=true 618 // if these range sizes are calculated successfully, and false otherwise. 619 func getRangesBeforeAndAfter( 620 bucketLowerBound, bucketUpperBound, spanLowerBound, spanUpperBound tree.Datum, swap bool, 621 ) (rangeBefore, rangeAfter float64, ok bool) { 622 // If the data types don't match, don't bother trying to calculate the range 623 // sizes. This should almost never happen, but we want to avoid type 624 // assertion errors below. 625 typesMatch := 626 bucketLowerBound.ResolvedType().Equivalent(bucketUpperBound.ResolvedType()) && 627 bucketUpperBound.ResolvedType().Equivalent(spanLowerBound.ResolvedType()) && 628 spanLowerBound.ResolvedType().Equivalent(spanUpperBound.ResolvedType()) 629 if !typesMatch { 630 return 0, 0, false 631 } 632 633 if swap { 634 bucketLowerBound, bucketUpperBound = bucketUpperBound, bucketLowerBound 635 spanLowerBound, spanUpperBound = spanUpperBound, spanLowerBound 636 } 637 638 // TODO(rytaft): handle more types here. 639 // Note: the calculations below assume that bucketLowerBound is inclusive and 640 // Span.PreferInclusive() has been called on the span. 641 642 getRange := func(lowerBound, upperBound tree.Datum) (rng float64, ok bool) { 643 switch lowerBound.ResolvedType().Family() { 644 case types.IntFamily: 645 rng = float64(*upperBound.(*tree.DInt)) - float64(*lowerBound.(*tree.DInt)) 646 return rng, true 647 648 case types.DateFamily: 649 lower := lowerBound.(*tree.DDate) 650 upper := upperBound.(*tree.DDate) 651 if lower.IsFinite() && upper.IsFinite() { 652 rng = float64(upper.PGEpochDays()) - float64(lower.PGEpochDays()) 653 return rng, true 654 } 655 return 0, false 656 657 case types.DecimalFamily: 658 lower, err := lowerBound.(*tree.DDecimal).Float64() 659 if err != nil { 660 return 0, false 661 } 662 upper, err := upperBound.(*tree.DDecimal).Float64() 663 if err != nil { 664 return 0, false 665 } 666 rng = upper - lower 667 return rng, true 668 669 case types.FloatFamily: 670 rng = float64(*upperBound.(*tree.DFloat)) - float64(*lowerBound.(*tree.DFloat)) 671 return rng, true 672 673 case types.TimestampFamily: 674 lower := lowerBound.(*tree.DTimestamp).Time 675 upper := upperBound.(*tree.DTimestamp).Time 676 rng = float64(upper.Sub(lower)) 677 return rng, true 678 679 case types.TimestampTZFamily: 680 lower := lowerBound.(*tree.DTimestampTZ).Time 681 upper := upperBound.(*tree.DTimestampTZ).Time 682 rng = float64(upper.Sub(lower)) 683 return rng, true 684 685 default: 686 return 0, false 687 } 688 } 689 690 rangeBefore, okBefore := getRange(bucketLowerBound, bucketUpperBound) 691 rangeAfter, okAfter := getRange(spanLowerBound, spanUpperBound) 692 ok = okBefore && okAfter 693 694 return rangeBefore, rangeAfter, ok 695 } 696 697 // isDiscrete returns true if the given data type is discrete. 698 func isDiscrete(typ *types.T) bool { 699 switch typ.Family() { 700 case types.IntFamily, types.DateFamily, types.TimestampFamily, types.TimestampTZFamily: 701 return true 702 } 703 return false 704 } 705 706 // histogramWriter prints histograms with the following formatting: 707 // NumRange1 NumEq1 NumRange2 NumEq2 .... 708 // <----------- UpperBound1 ----------- UpperBound2 .... 709 // 710 // For example: 711 // 0 1 90 10 0 20 712 // <--- 0 ---- 100 --- 200 713 // 714 // This describes a histogram with 3 buckets. The first bucket contains 1 value 715 // equal to 0. The second bucket contains 90 values between 0 and 100 and 716 // 10 values equal to 100. Finally, the third bucket contains 20 values equal 717 // to 200. 718 type histogramWriter struct { 719 cells [][]string 720 colWidths []int 721 } 722 723 const ( 724 // These constants describe the two rows that are printed. 725 counts = iota 726 boundaries 727 ) 728 729 func (w *histogramWriter) init(buckets []cat.HistogramBucket) { 730 w.cells = [][]string{ 731 make([]string, len(buckets)*2), 732 make([]string, len(buckets)*2), 733 } 734 w.colWidths = make([]int, len(buckets)*2) 735 736 for i, b := range buckets { 737 w.cells[counts][i*2] = fmt.Sprintf(" %.5g ", b.NumRange) 738 w.cells[counts][i*2+1] = fmt.Sprintf("%.5g", b.NumEq) 739 // TODO(rytaft): truncate large strings. 740 w.cells[boundaries][i*2+1] = fmt.Sprintf(" %s ", b.UpperBound.String()) 741 if width := tablewriter.DisplayWidth(w.cells[counts][i*2]); width > w.colWidths[i*2] { 742 w.colWidths[i*2] = width 743 } 744 if width := tablewriter.DisplayWidth(w.cells[counts][i*2+1]); width > w.colWidths[i*2+1] { 745 w.colWidths[i*2+1] = width 746 } 747 if width := tablewriter.DisplayWidth(w.cells[boundaries][i*2+1]); width > w.colWidths[i*2+1] { 748 w.colWidths[i*2+1] = width 749 } 750 } 751 } 752 753 func (w *histogramWriter) write(out io.Writer) { 754 if len(w.cells[counts]) == 0 { 755 return 756 } 757 758 // Print a space to match up with the "<" character below. 759 fmt.Fprint(out, " ") 760 for i := range w.cells[counts] { 761 fmt.Fprintf(out, "%s", tablewriter.Pad(w.cells[counts][i], " ", w.colWidths[i])) 762 } 763 fmt.Fprint(out, "\n") 764 fmt.Fprint(out, "<") 765 for i := range w.cells[boundaries] { 766 fmt.Fprintf(out, "%s", tablewriter.Pad(w.cells[boundaries][i], "-", w.colWidths[i])) 767 } 768 }