github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/stats/row_sampling.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package stats
    12  
    13  import (
    14  	"container/heap"
    15  	"context"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    20  	"github.com/cockroachdb/cockroach/pkg/util"
    21  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    22  )
    23  
    24  // SampledRow is a row that was sampled.
    25  type SampledRow struct {
    26  	Row  sqlbase.EncDatumRow
    27  	Rank uint64
    28  }
    29  
    30  // SampleReservoir implements reservoir sampling using random sort. Each
    31  // row is assigned a rank (which should be a uniformly generated random value),
    32  // and rows with the smallest K ranks are retained.
    33  //
    34  // This is implemented as a max-heap of the smallest K ranks; each row can
    35  // replace the row with the maximum rank. Note that heap operations only happen
    36  // when we actually encounter a row that is among the top K so far; the
    37  // probability of this is K/N if there were N rows so far; for large streams, we
    38  // would have O(K log K) heap operations. The overall running time for a stream
    39  // of size N is O(N + K log^2 K).
    40  //
    41  // The same structure can be used to combine sample sets (as long as the
    42  // original ranks are preserved) for distributed reservoir sampling. The
    43  // requirement is that the capacity of each distributed reservoir must have been
    44  // at least as large as this reservoir.
    45  type SampleReservoir struct {
    46  	samples  []SampledRow
    47  	colTypes []*types.T
    48  	da       sqlbase.DatumAlloc
    49  	ra       sqlbase.EncDatumRowAlloc
    50  	memAcc   *mon.BoundAccount
    51  
    52  	// sampleCols contains the ordinals of columns that should be sampled from
    53  	// each row. Note that the sampled rows still contain all columns, but
    54  	// any columns not part of this set are given a null value.
    55  	sampleCols util.FastIntSet
    56  }
    57  
    58  var _ heap.Interface = &SampleReservoir{}
    59  
    60  // Init initializes a SampleReservoir.
    61  func (sr *SampleReservoir) Init(
    62  	numSamples int, colTypes []*types.T, memAcc *mon.BoundAccount, sampleCols util.FastIntSet,
    63  ) {
    64  	sr.samples = make([]SampledRow, 0, numSamples)
    65  	sr.colTypes = colTypes
    66  	sr.memAcc = memAcc
    67  	sr.sampleCols = sampleCols
    68  }
    69  
    70  // Disable releases the memory of this SampleReservoir and sets its capacity
    71  // to zero.
    72  func (sr *SampleReservoir) Disable() {
    73  	sr.samples = nil
    74  }
    75  
    76  // Len is part of heap.Interface.
    77  func (sr *SampleReservoir) Len() int {
    78  	return len(sr.samples)
    79  }
    80  
    81  // Less is part of heap.Interface.
    82  func (sr *SampleReservoir) Less(i, j int) bool {
    83  	// We want a max heap, so higher ranks sort first.
    84  	return sr.samples[i].Rank > sr.samples[j].Rank
    85  }
    86  
    87  // Swap is part of heap.Interface.
    88  func (sr *SampleReservoir) Swap(i, j int) {
    89  	sr.samples[i], sr.samples[j] = sr.samples[j], sr.samples[i]
    90  }
    91  
    92  // Push is part of heap.Interface, but we're not using it.
    93  func (sr *SampleReservoir) Push(x interface{}) { panic("unimplemented") }
    94  
    95  // Pop is part of heap.Interface, but we're not using it.
    96  func (sr *SampleReservoir) Pop() interface{} { panic("unimplemented") }
    97  
    98  // SampleRow looks at a row and either drops it or adds it to the reservoir.
    99  func (sr *SampleReservoir) SampleRow(
   100  	ctx context.Context, evalCtx *tree.EvalContext, row sqlbase.EncDatumRow, rank uint64,
   101  ) error {
   102  	if len(sr.samples) < cap(sr.samples) {
   103  		// We haven't accumulated enough rows yet, just append.
   104  		rowCopy := sr.ra.AllocRow(len(row))
   105  
   106  		// Perform memory accounting for the allocated EncDatumRow. We will account
   107  		// for the additional memory used after copying inside copyRow.
   108  		if sr.memAcc != nil {
   109  			if err := sr.memAcc.Grow(ctx, int64(len(rowCopy))*int64(rowCopy[0].Size())); err != nil {
   110  				return err
   111  			}
   112  		}
   113  		if err := sr.copyRow(ctx, evalCtx, rowCopy, row); err != nil {
   114  			return err
   115  		}
   116  		sr.samples = append(sr.samples, SampledRow{Row: rowCopy, Rank: rank})
   117  		if len(sr.samples) == cap(sr.samples) {
   118  			// We just reached the limit; initialize the heap.
   119  			heap.Init(sr)
   120  		}
   121  		return nil
   122  	}
   123  	// Replace the max rank if ours is smaller.
   124  	if len(sr.samples) > 0 && rank < sr.samples[0].Rank {
   125  		if err := sr.copyRow(ctx, evalCtx, sr.samples[0].Row, row); err != nil {
   126  			return err
   127  		}
   128  		sr.samples[0].Rank = rank
   129  		heap.Fix(sr, 0)
   130  	}
   131  	return nil
   132  }
   133  
   134  // Get returns the sampled rows.
   135  func (sr *SampleReservoir) Get() []SampledRow {
   136  	return sr.samples
   137  }
   138  
   139  func (sr *SampleReservoir) copyRow(
   140  	ctx context.Context, evalCtx *tree.EvalContext, dst, src sqlbase.EncDatumRow,
   141  ) error {
   142  	for i := range src {
   143  		if !sr.sampleCols.Contains(i) {
   144  			dst[i].Datum = tree.DNull
   145  			continue
   146  		}
   147  		// Copy only the decoded datum to ensure that we remove any reference to
   148  		// the encoded bytes. The encoded bytes would have been scanned in a batch
   149  		// of ~10000 rows, so we must delete the reference to allow the garbage
   150  		// collector to release the memory from the batch.
   151  		if err := src[i].EnsureDecoded(sr.colTypes[i], &sr.da); err != nil {
   152  			return err
   153  		}
   154  		beforeSize := dst[i].Size()
   155  		dst[i] = sqlbase.DatumToEncDatum(sr.colTypes[i], src[i].Datum)
   156  		afterSize := dst[i].Size()
   157  
   158  		// If the datum is too large, truncate it (this also performs a copy).
   159  		// Otherwise, just perform a copy.
   160  		if afterSize > uintptr(maxBytesPerSample) {
   161  			dst[i].Datum = truncateDatum(evalCtx, dst[i].Datum, maxBytesPerSample)
   162  			afterSize = dst[i].Size()
   163  		} else {
   164  			if enc, ok := src[i].Encoding(); ok && enc != sqlbase.DatumEncoding_VALUE {
   165  				// Only datums that were key-encoded might reference the kv batch.
   166  				dst[i].Datum = deepCopyDatum(evalCtx, dst[i].Datum)
   167  			}
   168  		}
   169  
   170  		// Perform memory accounting.
   171  		if sr.memAcc != nil && afterSize > beforeSize {
   172  			if err := sr.memAcc.Grow(ctx, int64(afterSize-beforeSize)); err != nil {
   173  				return err
   174  			}
   175  		}
   176  	}
   177  	return nil
   178  }
   179  
   180  const maxBytesPerSample = 400
   181  
   182  // truncateDatum truncates large datums to avoid using excessive memory or disk
   183  // space. It performs a best-effort attempt to return a datum that is similar
   184  // to d using at most maxBytes bytes.
   185  //
   186  // For example, if maxBytes=10, "Cockroach Labs" would be truncated to
   187  // "Cockroach ".
   188  func truncateDatum(evalCtx *tree.EvalContext, d tree.Datum, maxBytes int) tree.Datum {
   189  	switch t := d.(type) {
   190  	case *tree.DBitArray:
   191  		b := tree.DBitArray{BitArray: t.ToWidth(uint(maxBytes * 8))}
   192  		return &b
   193  
   194  	case *tree.DBytes:
   195  		// Make a copy so the memory from the original byte string can be garbage
   196  		// collected.
   197  		b := make([]byte, maxBytes)
   198  		copy(b, *t)
   199  		return tree.NewDBytes(tree.DBytes(b))
   200  
   201  	case *tree.DString:
   202  		return tree.NewDString(truncateString(string(*t), maxBytes))
   203  
   204  	case *tree.DCollatedString:
   205  		contents := truncateString(t.Contents, maxBytes)
   206  
   207  		// Note: this will end up being larger than maxBytes due to the key and
   208  		// locale, so this is just a best-effort attempt to limit the size.
   209  		res, err := tree.NewDCollatedString(contents, t.Locale, &evalCtx.CollationEnv)
   210  		if err != nil {
   211  			return d
   212  		}
   213  		return res
   214  
   215  	case *tree.DOidWrapper:
   216  		return &tree.DOidWrapper{
   217  			Wrapped: truncateDatum(evalCtx, t.Wrapped, maxBytes),
   218  			Oid:     t.Oid,
   219  		}
   220  
   221  	default:
   222  		// It's not easy to truncate other types (e.g. Decimal).
   223  		return d
   224  	}
   225  }
   226  
   227  // truncateString truncates long strings to the longest valid substring that is
   228  // less than maxBytes bytes. It is rune-aware so it does not cut unicode
   229  // characters in half.
   230  func truncateString(s string, maxBytes int) string {
   231  	last := 0
   232  	// For strings, range skips from rune to rune and i is the byte index of
   233  	// the current rune.
   234  	for i := range s {
   235  		if i > maxBytes {
   236  			break
   237  		}
   238  		last = i
   239  	}
   240  
   241  	// Copy the truncated string so that the memory from the longer string can
   242  	// be garbage collected.
   243  	b := make([]byte, last)
   244  	copy(b, s)
   245  	return string(b)
   246  }
   247  
   248  // deepCopyDatum performs a deep copy for datums such as DString to remove any
   249  // references to the kv batch and allow the batch to be garbage collected.
   250  // Note: this function is currently only called for key-encoded datums. Update
   251  // the calling function if there is a need to call this for value-encoded
   252  // datums as well.
   253  func deepCopyDatum(evalCtx *tree.EvalContext, d tree.Datum) tree.Datum {
   254  	switch t := d.(type) {
   255  	case *tree.DString:
   256  		return tree.NewDString(deepCopyString(string(*t)))
   257  
   258  	case *tree.DCollatedString:
   259  		return &tree.DCollatedString{
   260  			Contents: deepCopyString(t.Contents),
   261  			Locale:   t.Locale,
   262  			Key:      t.Key,
   263  		}
   264  
   265  	case *tree.DOidWrapper:
   266  		return &tree.DOidWrapper{
   267  			Wrapped: deepCopyDatum(evalCtx, t.Wrapped),
   268  			Oid:     t.Oid,
   269  		}
   270  
   271  	default:
   272  		// We do not collect stats on JSON, and other types do not require a deep
   273  		// copy (or they are already copied during decoding).
   274  		return d
   275  	}
   276  }
   277  
   278  // deepCopyString performs a deep copy of a string.
   279  func deepCopyString(s string) string {
   280  	b := make([]byte, len(s))
   281  	copy(b, s)
   282  	return string(b)
   283  }