github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sample_aggregator_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"reflect"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/base"
    20  	"github.com/cockroachdb/cockroach/pkg/gossip"
    21  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/stats"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    29  	"github.com/cockroachdb/cockroach/pkg/testutils/distsqlutils"
    30  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    31  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    32  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    33  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    34  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    35  )
    36  
    37  func TestSampleAggregator(t *testing.T) {
    38  	defer leaktest.AfterTest(t)()
    39  
    40  	server, sqlDB, kvDB := serverutils.StartServer(t, base.TestServerArgs{})
    41  	defer server.Stopper().Stop(context.Background())
    42  
    43  	st := cluster.MakeTestingClusterSettings()
    44  	evalCtx := tree.MakeTestingEvalContext(st)
    45  	defer evalCtx.Stop(context.Background())
    46  
    47  	runTest := func(memLimitBytes int64, expectOutOfMemory bool) {
    48  		flowCtx := execinfra.FlowCtx{
    49  			EvalCtx: &evalCtx,
    50  			Cfg: &execinfra.ServerConfig{
    51  				Settings: st,
    52  				DB:       kvDB,
    53  				Executor: server.InternalExecutor().(sqlutil.InternalExecutor),
    54  				Gossip:   gossip.MakeExposedGossip(server.GossipI().(*gossip.Gossip)),
    55  			},
    56  		}
    57  		// Override the default memory limit. If memLimitBytes is small but
    58  		// non-zero, the processor will hit this limit and disable sampling.
    59  		flowCtx.Cfg.TestingKnobs.MemoryLimitBytes = memLimitBytes
    60  
    61  		inputRows := [][]int{
    62  			{-1, 1},
    63  			{1, 1},
    64  			{2, 2},
    65  			{1, 3},
    66  			{2, 4},
    67  			{1, 5},
    68  			{2, 6},
    69  			{1, 7},
    70  			{2, 8},
    71  			{-1, 3},
    72  			{1, -1},
    73  		}
    74  
    75  		// We randomly distribute the input rows between multiple Samplers and
    76  		// aggregate the results.
    77  		numSamplers := 3
    78  
    79  		samplerOutTypes := []*types.T{
    80  			types.Int,   // original column
    81  			types.Int,   // original column
    82  			types.Int,   // rank
    83  			types.Int,   // sketch index
    84  			types.Int,   // num rows
    85  			types.Int,   // null vals
    86  			types.Bytes, // sketch data
    87  		}
    88  
    89  		sketchSpecs := []execinfrapb.SketchSpec{
    90  			{
    91  				SketchType:        execinfrapb.SketchType_HLL_PLUS_PLUS_V1,
    92  				Columns:           []uint32{0},
    93  				GenerateHistogram: false,
    94  				StatName:          "a",
    95  			},
    96  			{
    97  				SketchType:          execinfrapb.SketchType_HLL_PLUS_PLUS_V1,
    98  				Columns:             []uint32{1},
    99  				GenerateHistogram:   true,
   100  				HistogramMaxBuckets: 4,
   101  			},
   102  		}
   103  
   104  		rng, _ := randutil.NewPseudoRand()
   105  		rowPartitions := make([][][]int, numSamplers)
   106  		for _, row := range inputRows {
   107  			j := rng.Intn(numSamplers)
   108  			rowPartitions[j] = append(rowPartitions[j], row)
   109  		}
   110  
   111  		outputs := make([]*distsqlutils.RowBuffer, numSamplers)
   112  		for i := 0; i < numSamplers; i++ {
   113  			rows := sqlbase.GenEncDatumRowsInt(rowPartitions[i])
   114  			in := distsqlutils.NewRowBuffer(sqlbase.TwoIntCols, rows, distsqlutils.RowBufferArgs{})
   115  			outputs[i] = distsqlutils.NewRowBuffer(samplerOutTypes, nil /* rows */, distsqlutils.RowBufferArgs{})
   116  
   117  			spec := &execinfrapb.SamplerSpec{SampleSize: 100, Sketches: sketchSpecs}
   118  			p, err := newSamplerProcessor(
   119  				&flowCtx, 0 /* processorID */, spec, in, &execinfrapb.PostProcessSpec{}, outputs[i],
   120  			)
   121  			if err != nil {
   122  				t.Fatal(err)
   123  			}
   124  			p.Run(context.Background())
   125  		}
   126  		// Randomly interleave the output rows from the samplers into a single buffer.
   127  		samplerResults := distsqlutils.NewRowBuffer(samplerOutTypes, nil /* rows */, distsqlutils.RowBufferArgs{})
   128  		for len(outputs) > 0 {
   129  			i := rng.Intn(len(outputs))
   130  			row, meta := outputs[i].Next()
   131  			if meta != nil {
   132  				if meta.SamplerProgress == nil {
   133  					t.Fatalf("unexpected metadata: %v", meta)
   134  				}
   135  			} else if row == nil {
   136  				outputs = append(outputs[:i], outputs[i+1:]...)
   137  			} else {
   138  				samplerResults.Push(row, nil /* meta */)
   139  			}
   140  		}
   141  
   142  		// Now run the sample aggregator.
   143  		finalOut := distsqlutils.NewRowBuffer([]*types.T{}, nil /* rows*/, distsqlutils.RowBufferArgs{})
   144  		spec := &execinfrapb.SampleAggregatorSpec{
   145  			SampleSize:       100,
   146  			Sketches:         sketchSpecs,
   147  			SampledColumnIDs: []sqlbase.ColumnID{100, 101},
   148  			TableID:          13,
   149  		}
   150  
   151  		agg, err := newSampleAggregator(
   152  			&flowCtx, 0 /* processorID */, spec, samplerResults, &execinfrapb.PostProcessSpec{}, finalOut,
   153  		)
   154  		if err != nil {
   155  			t.Fatal(err)
   156  		}
   157  		agg.Run(context.Background())
   158  		// Make sure there was no error.
   159  		finalOut.GetRowsNoMeta(t)
   160  		r := sqlutils.MakeSQLRunner(sqlDB)
   161  
   162  		rows := r.Query(t, `
   163  	  SELECT "tableID",
   164  					 "name",
   165  					 "columnIDs",
   166  					 "rowCount",
   167  					 "distinctCount",
   168  					 "nullCount",
   169  					 histogram
   170  	  FROM system.table_statistics
   171    `)
   172  		defer rows.Close()
   173  
   174  		type resultBucket struct {
   175  			numEq, numRange, upper int
   176  		}
   177  
   178  		type result struct {
   179  			tableID                            int
   180  			name, colIDs                       string
   181  			rowCount, distinctCount, nullCount int
   182  			buckets                            []resultBucket
   183  		}
   184  
   185  		expected := []result{
   186  			{
   187  				tableID:       13,
   188  				name:          "a",
   189  				colIDs:        "{100}",
   190  				rowCount:      11,
   191  				distinctCount: 3,
   192  				nullCount:     2,
   193  			},
   194  			{
   195  				tableID:       13,
   196  				name:          "<NULL>",
   197  				colIDs:        "{101}",
   198  				rowCount:      11,
   199  				distinctCount: 9,
   200  				nullCount:     1,
   201  				buckets: []resultBucket{
   202  					{numEq: 2, numRange: 0, upper: 1},
   203  					{numEq: 2, numRange: 1, upper: 3},
   204  					{numEq: 1, numRange: 1, upper: 5},
   205  					{numEq: 1, numRange: 2, upper: 8},
   206  				},
   207  			},
   208  		}
   209  
   210  		for _, exp := range expected {
   211  			if !rows.Next() {
   212  				t.Fatal("fewer rows than expected")
   213  			}
   214  			if expectOutOfMemory {
   215  				exp.buckets = nil
   216  			}
   217  
   218  			var histData []byte
   219  			var name gosql.NullString
   220  			var r result
   221  			if err := rows.Scan(
   222  				&r.tableID, &name, &r.colIDs, &r.rowCount, &r.distinctCount, &r.nullCount, &histData,
   223  			); err != nil {
   224  				t.Fatal(err)
   225  			}
   226  			if name.Valid {
   227  				r.name = name.String
   228  			} else {
   229  				r.name = "<NULL>"
   230  			}
   231  
   232  			if len(histData) > 0 {
   233  				var h stats.HistogramData
   234  				if err := protoutil.Unmarshal(histData, &h); err != nil {
   235  					t.Fatal(err)
   236  				}
   237  
   238  				for _, b := range h.Buckets {
   239  					ed, _, err := sqlbase.EncDatumFromBuffer(
   240  						types.Int, sqlbase.DatumEncoding_ASCENDING_KEY, b.UpperBound,
   241  					)
   242  					if err != nil {
   243  						t.Fatal(err)
   244  					}
   245  					var d sqlbase.DatumAlloc
   246  					if err := ed.EnsureDecoded(types.Int, &d); err != nil {
   247  						t.Fatal(err)
   248  					}
   249  					r.buckets = append(r.buckets, resultBucket{
   250  						numEq:    int(b.NumEq),
   251  						numRange: int(b.NumRange),
   252  						upper:    int(*ed.Datum.(*tree.DInt)),
   253  					})
   254  				}
   255  			} else if len(exp.buckets) > 0 {
   256  				t.Error("no histogram")
   257  			}
   258  
   259  			if !reflect.DeepEqual(exp, r) {
   260  				t.Errorf("Expected:\n  %v\ngot:\n  %v", exp, r)
   261  			}
   262  		}
   263  		if rows.Next() {
   264  			t.Fatal("more rows than expected")
   265  		}
   266  	}
   267  
   268  	runTest(0 /* memLimitBytes */, false /* expectOutOfMemory */)
   269  	runTest(1 /* memLimitBytes */, true /* expectOutOfMemory */)
   270  	runTest(20 /* memLimitBytes */, true /* expectOutOfMemory */)
   271  	runTest(20*1024 /* memLimitBytes */, false /* expectOutOfMemory */)
   272  }