github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/stats/row_sampling.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package stats 12 13 import ( 14 "container/heap" 15 "context" 16 17 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 18 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 19 "github.com/cockroachdb/cockroach/pkg/sql/types" 20 "github.com/cockroachdb/cockroach/pkg/util" 21 "github.com/cockroachdb/cockroach/pkg/util/mon" 22 ) 23 24 // SampledRow is a row that was sampled. 25 type SampledRow struct { 26 Row sqlbase.EncDatumRow 27 Rank uint64 28 } 29 30 // SampleReservoir implements reservoir sampling using random sort. Each 31 // row is assigned a rank (which should be a uniformly generated random value), 32 // and rows with the smallest K ranks are retained. 33 // 34 // This is implemented as a max-heap of the smallest K ranks; each row can 35 // replace the row with the maximum rank. Note that heap operations only happen 36 // when we actually encounter a row that is among the top K so far; the 37 // probability of this is K/N if there were N rows so far; for large streams, we 38 // would have O(K log K) heap operations. The overall running time for a stream 39 // of size N is O(N + K log^2 K). 40 // 41 // The same structure can be used to combine sample sets (as long as the 42 // original ranks are preserved) for distributed reservoir sampling. The 43 // requirement is that the capacity of each distributed reservoir must have been 44 // at least as large as this reservoir. 45 type SampleReservoir struct { 46 samples []SampledRow 47 colTypes []*types.T 48 da sqlbase.DatumAlloc 49 ra sqlbase.EncDatumRowAlloc 50 memAcc *mon.BoundAccount 51 52 // sampleCols contains the ordinals of columns that should be sampled from 53 // each row. Note that the sampled rows still contain all columns, but 54 // any columns not part of this set are given a null value. 55 sampleCols util.FastIntSet 56 } 57 58 var _ heap.Interface = &SampleReservoir{} 59 60 // Init initializes a SampleReservoir. 61 func (sr *SampleReservoir) Init( 62 numSamples int, colTypes []*types.T, memAcc *mon.BoundAccount, sampleCols util.FastIntSet, 63 ) { 64 sr.samples = make([]SampledRow, 0, numSamples) 65 sr.colTypes = colTypes 66 sr.memAcc = memAcc 67 sr.sampleCols = sampleCols 68 } 69 70 // Disable releases the memory of this SampleReservoir and sets its capacity 71 // to zero. 72 func (sr *SampleReservoir) Disable() { 73 sr.samples = nil 74 } 75 76 // Len is part of heap.Interface. 77 func (sr *SampleReservoir) Len() int { 78 return len(sr.samples) 79 } 80 81 // Less is part of heap.Interface. 82 func (sr *SampleReservoir) Less(i, j int) bool { 83 // We want a max heap, so higher ranks sort first. 84 return sr.samples[i].Rank > sr.samples[j].Rank 85 } 86 87 // Swap is part of heap.Interface. 88 func (sr *SampleReservoir) Swap(i, j int) { 89 sr.samples[i], sr.samples[j] = sr.samples[j], sr.samples[i] 90 } 91 92 // Push is part of heap.Interface, but we're not using it. 93 func (sr *SampleReservoir) Push(x interface{}) { panic("unimplemented") } 94 95 // Pop is part of heap.Interface, but we're not using it. 96 func (sr *SampleReservoir) Pop() interface{} { panic("unimplemented") } 97 98 // SampleRow looks at a row and either drops it or adds it to the reservoir. 99 func (sr *SampleReservoir) SampleRow( 100 ctx context.Context, evalCtx *tree.EvalContext, row sqlbase.EncDatumRow, rank uint64, 101 ) error { 102 if len(sr.samples) < cap(sr.samples) { 103 // We haven't accumulated enough rows yet, just append. 104 rowCopy := sr.ra.AllocRow(len(row)) 105 106 // Perform memory accounting for the allocated EncDatumRow. We will account 107 // for the additional memory used after copying inside copyRow. 108 if sr.memAcc != nil { 109 if err := sr.memAcc.Grow(ctx, int64(len(rowCopy))*int64(rowCopy[0].Size())); err != nil { 110 return err 111 } 112 } 113 if err := sr.copyRow(ctx, evalCtx, rowCopy, row); err != nil { 114 return err 115 } 116 sr.samples = append(sr.samples, SampledRow{Row: rowCopy, Rank: rank}) 117 if len(sr.samples) == cap(sr.samples) { 118 // We just reached the limit; initialize the heap. 119 heap.Init(sr) 120 } 121 return nil 122 } 123 // Replace the max rank if ours is smaller. 124 if len(sr.samples) > 0 && rank < sr.samples[0].Rank { 125 if err := sr.copyRow(ctx, evalCtx, sr.samples[0].Row, row); err != nil { 126 return err 127 } 128 sr.samples[0].Rank = rank 129 heap.Fix(sr, 0) 130 } 131 return nil 132 } 133 134 // Get returns the sampled rows. 135 func (sr *SampleReservoir) Get() []SampledRow { 136 return sr.samples 137 } 138 139 func (sr *SampleReservoir) copyRow( 140 ctx context.Context, evalCtx *tree.EvalContext, dst, src sqlbase.EncDatumRow, 141 ) error { 142 for i := range src { 143 if !sr.sampleCols.Contains(i) { 144 dst[i].Datum = tree.DNull 145 continue 146 } 147 // Copy only the decoded datum to ensure that we remove any reference to 148 // the encoded bytes. The encoded bytes would have been scanned in a batch 149 // of ~10000 rows, so we must delete the reference to allow the garbage 150 // collector to release the memory from the batch. 151 if err := src[i].EnsureDecoded(sr.colTypes[i], &sr.da); err != nil { 152 return err 153 } 154 beforeSize := dst[i].Size() 155 dst[i] = sqlbase.DatumToEncDatum(sr.colTypes[i], src[i].Datum) 156 afterSize := dst[i].Size() 157 158 // If the datum is too large, truncate it (this also performs a copy). 159 // Otherwise, just perform a copy. 160 if afterSize > uintptr(maxBytesPerSample) { 161 dst[i].Datum = truncateDatum(evalCtx, dst[i].Datum, maxBytesPerSample) 162 afterSize = dst[i].Size() 163 } else { 164 if enc, ok := src[i].Encoding(); ok && enc != sqlbase.DatumEncoding_VALUE { 165 // Only datums that were key-encoded might reference the kv batch. 166 dst[i].Datum = deepCopyDatum(evalCtx, dst[i].Datum) 167 } 168 } 169 170 // Perform memory accounting. 171 if sr.memAcc != nil && afterSize > beforeSize { 172 if err := sr.memAcc.Grow(ctx, int64(afterSize-beforeSize)); err != nil { 173 return err 174 } 175 } 176 } 177 return nil 178 } 179 180 const maxBytesPerSample = 400 181 182 // truncateDatum truncates large datums to avoid using excessive memory or disk 183 // space. It performs a best-effort attempt to return a datum that is similar 184 // to d using at most maxBytes bytes. 185 // 186 // For example, if maxBytes=10, "Cockroach Labs" would be truncated to 187 // "Cockroach ". 188 func truncateDatum(evalCtx *tree.EvalContext, d tree.Datum, maxBytes int) tree.Datum { 189 switch t := d.(type) { 190 case *tree.DBitArray: 191 b := tree.DBitArray{BitArray: t.ToWidth(uint(maxBytes * 8))} 192 return &b 193 194 case *tree.DBytes: 195 // Make a copy so the memory from the original byte string can be garbage 196 // collected. 197 b := make([]byte, maxBytes) 198 copy(b, *t) 199 return tree.NewDBytes(tree.DBytes(b)) 200 201 case *tree.DString: 202 return tree.NewDString(truncateString(string(*t), maxBytes)) 203 204 case *tree.DCollatedString: 205 contents := truncateString(t.Contents, maxBytes) 206 207 // Note: this will end up being larger than maxBytes due to the key and 208 // locale, so this is just a best-effort attempt to limit the size. 209 res, err := tree.NewDCollatedString(contents, t.Locale, &evalCtx.CollationEnv) 210 if err != nil { 211 return d 212 } 213 return res 214 215 case *tree.DOidWrapper: 216 return &tree.DOidWrapper{ 217 Wrapped: truncateDatum(evalCtx, t.Wrapped, maxBytes), 218 Oid: t.Oid, 219 } 220 221 default: 222 // It's not easy to truncate other types (e.g. Decimal). 223 return d 224 } 225 } 226 227 // truncateString truncates long strings to the longest valid substring that is 228 // less than maxBytes bytes. It is rune-aware so it does not cut unicode 229 // characters in half. 230 func truncateString(s string, maxBytes int) string { 231 last := 0 232 // For strings, range skips from rune to rune and i is the byte index of 233 // the current rune. 234 for i := range s { 235 if i > maxBytes { 236 break 237 } 238 last = i 239 } 240 241 // Copy the truncated string so that the memory from the longer string can 242 // be garbage collected. 243 b := make([]byte, last) 244 copy(b, s) 245 return string(b) 246 } 247 248 // deepCopyDatum performs a deep copy for datums such as DString to remove any 249 // references to the kv batch and allow the batch to be garbage collected. 250 // Note: this function is currently only called for key-encoded datums. Update 251 // the calling function if there is a need to call this for value-encoded 252 // datums as well. 253 func deepCopyDatum(evalCtx *tree.EvalContext, d tree.Datum) tree.Datum { 254 switch t := d.(type) { 255 case *tree.DString: 256 return tree.NewDString(deepCopyString(string(*t))) 257 258 case *tree.DCollatedString: 259 return &tree.DCollatedString{ 260 Contents: deepCopyString(t.Contents), 261 Locale: t.Locale, 262 Key: t.Key, 263 } 264 265 case *tree.DOidWrapper: 266 return &tree.DOidWrapper{ 267 Wrapped: deepCopyDatum(evalCtx, t.Wrapped), 268 Oid: t.Oid, 269 } 270 271 default: 272 // We do not collect stats on JSON, and other types do not require a deep 273 // copy (or they are already copied during decoding). 274 return d 275 } 276 } 277 278 // deepCopyString performs a deep copy of a string. 279 func deepCopyString(s string) string { 280 b := make([]byte, len(s)) 281 copy(b, s) 282 return string(b) 283 }