github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sample_aggregator_test.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 gosql "database/sql" 16 "reflect" 17 "testing" 18 19 "github.com/cockroachdb/cockroach/pkg/base" 20 "github.com/cockroachdb/cockroach/pkg/gossip" 21 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 22 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 23 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 24 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 25 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 26 "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" 27 "github.com/cockroachdb/cockroach/pkg/sql/stats" 28 "github.com/cockroachdb/cockroach/pkg/sql/types" 29 "github.com/cockroachdb/cockroach/pkg/testutils/distsqlutils" 30 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 31 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 32 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 33 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 34 "github.com/cockroachdb/cockroach/pkg/util/randutil" 35 ) 36 37 func TestSampleAggregator(t *testing.T) { 38 defer leaktest.AfterTest(t)() 39 40 server, sqlDB, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) 41 defer server.Stopper().Stop(context.Background()) 42 43 st := cluster.MakeTestingClusterSettings() 44 evalCtx := tree.MakeTestingEvalContext(st) 45 defer evalCtx.Stop(context.Background()) 46 47 runTest := func(memLimitBytes int64, expectOutOfMemory bool) { 48 flowCtx := execinfra.FlowCtx{ 49 EvalCtx: &evalCtx, 50 Cfg: &execinfra.ServerConfig{ 51 Settings: st, 52 DB: kvDB, 53 Executor: server.InternalExecutor().(sqlutil.InternalExecutor), 54 Gossip: gossip.MakeExposedGossip(server.GossipI().(*gossip.Gossip)), 55 }, 56 } 57 // Override the default memory limit. If memLimitBytes is small but 58 // non-zero, the processor will hit this limit and disable sampling. 59 flowCtx.Cfg.TestingKnobs.MemoryLimitBytes = memLimitBytes 60 61 inputRows := [][]int{ 62 {-1, 1}, 63 {1, 1}, 64 {2, 2}, 65 {1, 3}, 66 {2, 4}, 67 {1, 5}, 68 {2, 6}, 69 {1, 7}, 70 {2, 8}, 71 {-1, 3}, 72 {1, -1}, 73 } 74 75 // We randomly distribute the input rows between multiple Samplers and 76 // aggregate the results. 77 numSamplers := 3 78 79 samplerOutTypes := []*types.T{ 80 types.Int, // original column 81 types.Int, // original column 82 types.Int, // rank 83 types.Int, // sketch index 84 types.Int, // num rows 85 types.Int, // null vals 86 types.Bytes, // sketch data 87 } 88 89 sketchSpecs := []execinfrapb.SketchSpec{ 90 { 91 SketchType: execinfrapb.SketchType_HLL_PLUS_PLUS_V1, 92 Columns: []uint32{0}, 93 GenerateHistogram: false, 94 StatName: "a", 95 }, 96 { 97 SketchType: execinfrapb.SketchType_HLL_PLUS_PLUS_V1, 98 Columns: []uint32{1}, 99 GenerateHistogram: true, 100 HistogramMaxBuckets: 4, 101 }, 102 } 103 104 rng, _ := randutil.NewPseudoRand() 105 rowPartitions := make([][][]int, numSamplers) 106 for _, row := range inputRows { 107 j := rng.Intn(numSamplers) 108 rowPartitions[j] = append(rowPartitions[j], row) 109 } 110 111 outputs := make([]*distsqlutils.RowBuffer, numSamplers) 112 for i := 0; i < numSamplers; i++ { 113 rows := sqlbase.GenEncDatumRowsInt(rowPartitions[i]) 114 in := distsqlutils.NewRowBuffer(sqlbase.TwoIntCols, rows, distsqlutils.RowBufferArgs{}) 115 outputs[i] = distsqlutils.NewRowBuffer(samplerOutTypes, nil /* rows */, distsqlutils.RowBufferArgs{}) 116 117 spec := &execinfrapb.SamplerSpec{SampleSize: 100, Sketches: sketchSpecs} 118 p, err := newSamplerProcessor( 119 &flowCtx, 0 /* processorID */, spec, in, &execinfrapb.PostProcessSpec{}, outputs[i], 120 ) 121 if err != nil { 122 t.Fatal(err) 123 } 124 p.Run(context.Background()) 125 } 126 // Randomly interleave the output rows from the samplers into a single buffer. 127 samplerResults := distsqlutils.NewRowBuffer(samplerOutTypes, nil /* rows */, distsqlutils.RowBufferArgs{}) 128 for len(outputs) > 0 { 129 i := rng.Intn(len(outputs)) 130 row, meta := outputs[i].Next() 131 if meta != nil { 132 if meta.SamplerProgress == nil { 133 t.Fatalf("unexpected metadata: %v", meta) 134 } 135 } else if row == nil { 136 outputs = append(outputs[:i], outputs[i+1:]...) 137 } else { 138 samplerResults.Push(row, nil /* meta */) 139 } 140 } 141 142 // Now run the sample aggregator. 143 finalOut := distsqlutils.NewRowBuffer([]*types.T{}, nil /* rows*/, distsqlutils.RowBufferArgs{}) 144 spec := &execinfrapb.SampleAggregatorSpec{ 145 SampleSize: 100, 146 Sketches: sketchSpecs, 147 SampledColumnIDs: []sqlbase.ColumnID{100, 101}, 148 TableID: 13, 149 } 150 151 agg, err := newSampleAggregator( 152 &flowCtx, 0 /* processorID */, spec, samplerResults, &execinfrapb.PostProcessSpec{}, finalOut, 153 ) 154 if err != nil { 155 t.Fatal(err) 156 } 157 agg.Run(context.Background()) 158 // Make sure there was no error. 159 finalOut.GetRowsNoMeta(t) 160 r := sqlutils.MakeSQLRunner(sqlDB) 161 162 rows := r.Query(t, ` 163 SELECT "tableID", 164 "name", 165 "columnIDs", 166 "rowCount", 167 "distinctCount", 168 "nullCount", 169 histogram 170 FROM system.table_statistics 171 `) 172 defer rows.Close() 173 174 type resultBucket struct { 175 numEq, numRange, upper int 176 } 177 178 type result struct { 179 tableID int 180 name, colIDs string 181 rowCount, distinctCount, nullCount int 182 buckets []resultBucket 183 } 184 185 expected := []result{ 186 { 187 tableID: 13, 188 name: "a", 189 colIDs: "{100}", 190 rowCount: 11, 191 distinctCount: 3, 192 nullCount: 2, 193 }, 194 { 195 tableID: 13, 196 name: "<NULL>", 197 colIDs: "{101}", 198 rowCount: 11, 199 distinctCount: 9, 200 nullCount: 1, 201 buckets: []resultBucket{ 202 {numEq: 2, numRange: 0, upper: 1}, 203 {numEq: 2, numRange: 1, upper: 3}, 204 {numEq: 1, numRange: 1, upper: 5}, 205 {numEq: 1, numRange: 2, upper: 8}, 206 }, 207 }, 208 } 209 210 for _, exp := range expected { 211 if !rows.Next() { 212 t.Fatal("fewer rows than expected") 213 } 214 if expectOutOfMemory { 215 exp.buckets = nil 216 } 217 218 var histData []byte 219 var name gosql.NullString 220 var r result 221 if err := rows.Scan( 222 &r.tableID, &name, &r.colIDs, &r.rowCount, &r.distinctCount, &r.nullCount, &histData, 223 ); err != nil { 224 t.Fatal(err) 225 } 226 if name.Valid { 227 r.name = name.String 228 } else { 229 r.name = "<NULL>" 230 } 231 232 if len(histData) > 0 { 233 var h stats.HistogramData 234 if err := protoutil.Unmarshal(histData, &h); err != nil { 235 t.Fatal(err) 236 } 237 238 for _, b := range h.Buckets { 239 ed, _, err := sqlbase.EncDatumFromBuffer( 240 types.Int, sqlbase.DatumEncoding_ASCENDING_KEY, b.UpperBound, 241 ) 242 if err != nil { 243 t.Fatal(err) 244 } 245 var d sqlbase.DatumAlloc 246 if err := ed.EnsureDecoded(types.Int, &d); err != nil { 247 t.Fatal(err) 248 } 249 r.buckets = append(r.buckets, resultBucket{ 250 numEq: int(b.NumEq), 251 numRange: int(b.NumRange), 252 upper: int(*ed.Datum.(*tree.DInt)), 253 }) 254 } 255 } else if len(exp.buckets) > 0 { 256 t.Error("no histogram") 257 } 258 259 if !reflect.DeepEqual(exp, r) { 260 t.Errorf("Expected:\n %v\ngot:\n %v", exp, r) 261 } 262 } 263 if rows.Next() { 264 t.Fatal("more rows than expected") 265 } 266 } 267 268 runTest(0 /* memLimitBytes */, false /* expectOutOfMemory */) 269 runTest(1 /* memLimitBytes */, true /* expectOutOfMemory */) 270 runTest(20 /* memLimitBytes */, true /* expectOutOfMemory */) 271 runTest(20*1024 /* memLimitBytes */, false /* expectOutOfMemory */) 272 }