github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/distinct_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"testing"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    24  	"github.com/cockroachdb/cockroach/pkg/testutils/distsqlutils"
    25  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    26  )
    27  
    28  func TestDistinct(t *testing.T) {
    29  	defer leaktest.AfterTest(t)()
    30  
    31  	v := [15]sqlbase.EncDatum{}
    32  	for i := range v {
    33  		v[i] = sqlbase.DatumToEncDatum(types.Int, tree.NewDInt(tree.DInt(i)))
    34  	}
    35  	vNull := sqlbase.DatumToEncDatum(types.Unknown, tree.DNull)
    36  
    37  	testCases := []struct {
    38  		spec     execinfrapb.DistinctSpec
    39  		input    sqlbase.EncDatumRows
    40  		expected sqlbase.EncDatumRows
    41  		error    string
    42  	}{
    43  		{
    44  			spec: execinfrapb.DistinctSpec{
    45  				DistinctColumns: []uint32{0, 1},
    46  			},
    47  			input: sqlbase.EncDatumRows{
    48  				{v[2], v[3], v[1]},
    49  				{v[5], v[6], v[2]},
    50  				{v[2], v[3], v[3]},
    51  				{v[5], v[6], v[4]},
    52  				{v[2], v[6], v[5]},
    53  				{v[3], v[5], v[6]},
    54  				{v[2], v[9], v[7]},
    55  			},
    56  			expected: sqlbase.EncDatumRows{
    57  				{v[2], v[3], v[1]},
    58  				{v[5], v[6], v[2]},
    59  				{v[2], v[6], v[5]},
    60  				{v[3], v[5], v[6]},
    61  				{v[2], v[9], v[7]},
    62  			},
    63  		},
    64  		{
    65  			spec: execinfrapb.DistinctSpec{
    66  				OrderedColumns:  []uint32{1},
    67  				DistinctColumns: []uint32{0, 1},
    68  			},
    69  			input: sqlbase.EncDatumRows{
    70  				{v[2], v[3], v[1]},
    71  				{v[2], v[3], v[2]},
    72  				{v[2], v[6], v[3]},
    73  				{v[2], v[9], v[4]},
    74  				{v[3], v[5], v[5]},
    75  				{v[5], v[6], v[6]},
    76  				{v[5], v[6], v[7]},
    77  			},
    78  			expected: sqlbase.EncDatumRows{
    79  				{v[2], v[3], v[1]},
    80  				{v[2], v[6], v[3]},
    81  				{v[2], v[9], v[4]},
    82  				{v[3], v[5], v[5]},
    83  				{v[5], v[6], v[6]},
    84  			},
    85  		},
    86  		{
    87  			spec: execinfrapb.DistinctSpec{
    88  				OrderedColumns:  []uint32{1},
    89  				DistinctColumns: []uint32{1},
    90  			},
    91  			input: sqlbase.EncDatumRows{
    92  				{v[2], v[3], v[1]},
    93  				{v[2], v[3], v[2]},
    94  				{v[2], v[6], v[3]},
    95  				{v[2], v[9], v[4]},
    96  				{v[3], v[5], v[5]},
    97  				{v[5], v[6], v[6]},
    98  				{v[6], v[6], v[7]},
    99  				{v[7], v[6], v[8]},
   100  			},
   101  			expected: sqlbase.EncDatumRows{
   102  				{v[2], v[3], v[1]},
   103  				{v[2], v[6], v[3]},
   104  				{v[2], v[9], v[4]},
   105  				{v[3], v[5], v[5]},
   106  				{v[5], v[6], v[6]},
   107  			},
   108  		},
   109  		{
   110  			spec: execinfrapb.DistinctSpec{
   111  				OrderedColumns:  []uint32{1},
   112  				DistinctColumns: []uint32{1},
   113  			},
   114  			input: sqlbase.EncDatumRows{
   115  				{v[2], v[3], v[1]},
   116  				{v[2], v[3], v[2]},
   117  				{v[2], v[6], v[3]},
   118  				{v[2], v[9], v[4]},
   119  				{v[3], v[5], v[5]},
   120  				{v[5], v[6], v[6]},
   121  				{v[6], v[6], v[7]},
   122  				{v[7], v[6], v[8]},
   123  			},
   124  			expected: sqlbase.EncDatumRows{
   125  				{v[2], v[3], v[1]},
   126  				{v[2], v[6], v[3]},
   127  				{v[2], v[9], v[4]},
   128  				{v[3], v[5], v[5]},
   129  				{v[5], v[6], v[6]},
   130  			},
   131  		},
   132  
   133  		// Test NullsAreDistinct flag (not ordered).
   134  		{
   135  			spec: execinfrapb.DistinctSpec{
   136  				DistinctColumns:  []uint32{0, 1},
   137  				NullsAreDistinct: false,
   138  			},
   139  			input: sqlbase.EncDatumRows{
   140  				{v[1], v[2], v[1]},
   141  				{vNull, vNull, v[2]},
   142  				{v[1], v[2], v[3]},
   143  				{vNull, vNull, v[4]},
   144  				{v[1], vNull, v[5]},
   145  				{vNull, v[2], v[6]},
   146  				{vNull, v[2], v[7]},
   147  				{v[1], vNull, v[8]},
   148  			},
   149  			expected: sqlbase.EncDatumRows{
   150  				{v[1], v[2], v[1]},
   151  				{vNull, vNull, v[2]},
   152  				{v[1], vNull, v[5]},
   153  				{vNull, v[2], v[6]},
   154  			},
   155  		},
   156  		{
   157  			spec: execinfrapb.DistinctSpec{
   158  				DistinctColumns:  []uint32{0, 1},
   159  				NullsAreDistinct: true,
   160  			},
   161  			input: sqlbase.EncDatumRows{
   162  				{v[1], v[2], v[1]},
   163  				{vNull, vNull, v[2]},
   164  				{v[1], v[2], v[3]},
   165  				{vNull, vNull, v[4]},
   166  				{v[1], vNull, v[5]},
   167  				{vNull, v[2], v[6]},
   168  				{vNull, v[2], v[7]},
   169  				{v[1], vNull, v[8]},
   170  			},
   171  			expected: sqlbase.EncDatumRows{
   172  				{v[1], v[2], v[1]},
   173  				{vNull, vNull, v[2]},
   174  				{vNull, vNull, v[4]},
   175  				{v[1], vNull, v[5]},
   176  				{vNull, v[2], v[6]},
   177  				{vNull, v[2], v[7]},
   178  				{v[1], vNull, v[8]},
   179  			},
   180  		},
   181  
   182  		// Test NullsAreDistinct flag (ordered).
   183  		{
   184  			spec: execinfrapb.DistinctSpec{
   185  				OrderedColumns:   []uint32{0},
   186  				DistinctColumns:  []uint32{0, 1},
   187  				NullsAreDistinct: false,
   188  			},
   189  			input: sqlbase.EncDatumRows{
   190  				{vNull, v[2], v[1]},
   191  				{vNull, vNull, v[2]},
   192  				{vNull, v[2], v[3]},
   193  				{vNull, vNull, v[4]},
   194  				{v[1], vNull, v[5]},
   195  				{v[1], v[2], v[6]},
   196  				{v[1], vNull, v[7]},
   197  				{v[1], v[2], v[8]},
   198  			},
   199  			expected: sqlbase.EncDatumRows{
   200  				{vNull, v[2], v[1]},
   201  				{vNull, vNull, v[2]},
   202  				{v[1], vNull, v[5]},
   203  				{v[1], v[2], v[6]},
   204  			},
   205  		},
   206  		{
   207  			spec: execinfrapb.DistinctSpec{
   208  				OrderedColumns:   []uint32{0},
   209  				DistinctColumns:  []uint32{0, 1},
   210  				NullsAreDistinct: true,
   211  			},
   212  			input: sqlbase.EncDatumRows{
   213  				{vNull, v[2], v[1]},
   214  				{vNull, vNull, v[2]},
   215  				{vNull, v[2], v[3]},
   216  				{vNull, vNull, v[4]},
   217  				{v[1], vNull, v[5]},
   218  				{v[1], v[2], v[6]},
   219  				{v[1], vNull, v[7]},
   220  				{v[1], v[2], v[8]},
   221  			},
   222  			expected: sqlbase.EncDatumRows{
   223  				{vNull, v[2], v[1]},
   224  				{vNull, vNull, v[2]},
   225  				{vNull, v[2], v[3]},
   226  				{vNull, vNull, v[4]},
   227  				{v[1], vNull, v[5]},
   228  				{v[1], v[2], v[6]},
   229  				{v[1], vNull, v[7]},
   230  			},
   231  		},
   232  
   233  		// Test ErrorOnDup flag (ordered).
   234  		{
   235  			spec: execinfrapb.DistinctSpec{
   236  				OrderedColumns:  []uint32{0},
   237  				DistinctColumns: []uint32{0, 1},
   238  				ErrorOnDup:      "duplicate rows",
   239  			},
   240  			input: sqlbase.EncDatumRows{
   241  				{v[1], v[2], v[1]},
   242  				{v[2], v[3], v[2]},
   243  				{v[2], v[3], v[3]},
   244  				{v[3], v[4], v[4]},
   245  			},
   246  			error: "duplicate rows",
   247  		},
   248  
   249  		// Test ErrorOnDup flag (unordered).
   250  		{
   251  			spec: execinfrapb.DistinctSpec{
   252  				DistinctColumns: []uint32{0, 1},
   253  				ErrorOnDup:      "duplicate rows",
   254  			},
   255  			input: sqlbase.EncDatumRows{
   256  				{v[2], v[3], v[1]},
   257  				{v[1], v[2], v[2]},
   258  				{v[3], v[4], v[3]},
   259  				{v[2], v[3], v[4]},
   260  			},
   261  			error: "duplicate rows",
   262  		},
   263  	}
   264  
   265  	for _, c := range testCases {
   266  		t.Run("", func(t *testing.T) {
   267  			ds := c.spec
   268  
   269  			in := distsqlutils.NewRowBuffer(sqlbase.ThreeIntCols, c.input, distsqlutils.RowBufferArgs{})
   270  			out := &distsqlutils.RowBuffer{}
   271  
   272  			st := cluster.MakeTestingClusterSettings()
   273  			evalCtx := tree.MakeTestingEvalContext(st)
   274  			defer evalCtx.Stop(context.Background())
   275  			flowCtx := execinfra.FlowCtx{
   276  				Cfg:     &execinfra.ServerConfig{Settings: st},
   277  				EvalCtx: &evalCtx,
   278  			}
   279  
   280  			d, err := newDistinct(&flowCtx, 0 /* processorID */, &ds, in, &execinfrapb.PostProcessSpec{}, out)
   281  			if err != nil {
   282  				t.Fatal(err)
   283  			}
   284  
   285  			d.Run(context.Background())
   286  			if !out.ProducerClosed() {
   287  				t.Fatalf("output RowReceiver not closed")
   288  			}
   289  			var res sqlbase.EncDatumRows
   290  			for {
   291  				row, meta := out.Next()
   292  				if meta != nil {
   293  					err = meta.Err
   294  					break
   295  				}
   296  				if row == nil {
   297  					break
   298  				}
   299  				res = append(res, row.Copy())
   300  			}
   301  
   302  			if c.error != "" {
   303  				if err == nil || err.Error() != c.error {
   304  					t.Errorf("expected error: %v, got %v", c.error, err)
   305  				}
   306  			} else {
   307  				if result := res.String(sqlbase.ThreeIntCols); result != c.expected.String(sqlbase.ThreeIntCols) {
   308  					t.Errorf("invalid results: %v, expected %v'", result, c.expected.String(sqlbase.ThreeIntCols))
   309  				}
   310  			}
   311  		})
   312  	}
   313  }
   314  
   315  func benchmarkDistinct(b *testing.B, orderedColumns []uint32) {
   316  	const numCols = 2
   317  
   318  	ctx := context.Background()
   319  	st := cluster.MakeTestingClusterSettings()
   320  	evalCtx := tree.MakeTestingEvalContext(st)
   321  	defer evalCtx.Stop(ctx)
   322  
   323  	flowCtx := &execinfra.FlowCtx{
   324  		Cfg:     &execinfra.ServerConfig{Settings: st},
   325  		EvalCtx: &evalCtx,
   326  	}
   327  	spec := &execinfrapb.DistinctSpec{
   328  		DistinctColumns: []uint32{0, 1},
   329  	}
   330  	spec.OrderedColumns = orderedColumns
   331  
   332  	post := &execinfrapb.PostProcessSpec{}
   333  	for _, numRows := range []int{1 << 4, 1 << 8, 1 << 12, 1 << 16} {
   334  		b.Run(fmt.Sprintf("rows=%d", numRows), func(b *testing.B) {
   335  			input := execinfra.NewRepeatableRowSource(sqlbase.TwoIntCols, sqlbase.MakeIntRows(numRows, numCols))
   336  
   337  			b.SetBytes(int64(8 * numRows * numCols))
   338  			b.ResetTimer()
   339  			for i := 0; i < b.N; i++ {
   340  				d, err := newDistinct(flowCtx, 0 /* processorID */, spec, input, post, &rowDisposer{})
   341  				if err != nil {
   342  					b.Fatal(err)
   343  				}
   344  				d.Run(context.Background())
   345  				input.Reset()
   346  			}
   347  		})
   348  	}
   349  }
   350  
   351  func BenchmarkOrderedDistinct(b *testing.B) {
   352  	benchmarkDistinct(b, []uint32{0, 1})
   353  }
   354  
   355  func BenchmarkPartiallyOrderedDistinct(b *testing.B) {
   356  	benchmarkDistinct(b, []uint32{0})
   357  }
   358  
   359  func BenchmarkUnorderedDistinct(b *testing.B) {
   360  	benchmarkDistinct(b, []uint32{})
   361  }