github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/distinct_test.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "fmt" 16 "testing" 17 18 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 21 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 22 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 23 "github.com/cockroachdb/cockroach/pkg/sql/types" 24 "github.com/cockroachdb/cockroach/pkg/testutils/distsqlutils" 25 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 26 ) 27 28 func TestDistinct(t *testing.T) { 29 defer leaktest.AfterTest(t)() 30 31 v := [15]sqlbase.EncDatum{} 32 for i := range v { 33 v[i] = sqlbase.DatumToEncDatum(types.Int, tree.NewDInt(tree.DInt(i))) 34 } 35 vNull := sqlbase.DatumToEncDatum(types.Unknown, tree.DNull) 36 37 testCases := []struct { 38 spec execinfrapb.DistinctSpec 39 input sqlbase.EncDatumRows 40 expected sqlbase.EncDatumRows 41 error string 42 }{ 43 { 44 spec: execinfrapb.DistinctSpec{ 45 DistinctColumns: []uint32{0, 1}, 46 }, 47 input: sqlbase.EncDatumRows{ 48 {v[2], v[3], v[1]}, 49 {v[5], v[6], v[2]}, 50 {v[2], v[3], v[3]}, 51 {v[5], v[6], v[4]}, 52 {v[2], v[6], v[5]}, 53 {v[3], v[5], v[6]}, 54 {v[2], v[9], v[7]}, 55 }, 56 expected: sqlbase.EncDatumRows{ 57 {v[2], v[3], v[1]}, 58 {v[5], v[6], v[2]}, 59 {v[2], v[6], v[5]}, 60 {v[3], v[5], v[6]}, 61 {v[2], v[9], v[7]}, 62 }, 63 }, 64 { 65 spec: execinfrapb.DistinctSpec{ 66 OrderedColumns: []uint32{1}, 67 DistinctColumns: []uint32{0, 1}, 68 }, 69 input: sqlbase.EncDatumRows{ 70 {v[2], v[3], v[1]}, 71 {v[2], v[3], v[2]}, 72 {v[2], v[6], v[3]}, 73 {v[2], v[9], v[4]}, 74 {v[3], v[5], v[5]}, 75 {v[5], v[6], v[6]}, 76 {v[5], v[6], v[7]}, 77 }, 78 expected: sqlbase.EncDatumRows{ 79 {v[2], v[3], v[1]}, 80 {v[2], v[6], v[3]}, 81 {v[2], v[9], v[4]}, 82 {v[3], v[5], v[5]}, 83 {v[5], v[6], v[6]}, 84 }, 85 }, 86 { 87 spec: execinfrapb.DistinctSpec{ 88 OrderedColumns: []uint32{1}, 89 DistinctColumns: []uint32{1}, 90 }, 91 input: sqlbase.EncDatumRows{ 92 {v[2], v[3], v[1]}, 93 {v[2], v[3], v[2]}, 94 {v[2], v[6], v[3]}, 95 {v[2], v[9], v[4]}, 96 {v[3], v[5], v[5]}, 97 {v[5], v[6], v[6]}, 98 {v[6], v[6], v[7]}, 99 {v[7], v[6], v[8]}, 100 }, 101 expected: sqlbase.EncDatumRows{ 102 {v[2], v[3], v[1]}, 103 {v[2], v[6], v[3]}, 104 {v[2], v[9], v[4]}, 105 {v[3], v[5], v[5]}, 106 {v[5], v[6], v[6]}, 107 }, 108 }, 109 { 110 spec: execinfrapb.DistinctSpec{ 111 OrderedColumns: []uint32{1}, 112 DistinctColumns: []uint32{1}, 113 }, 114 input: sqlbase.EncDatumRows{ 115 {v[2], v[3], v[1]}, 116 {v[2], v[3], v[2]}, 117 {v[2], v[6], v[3]}, 118 {v[2], v[9], v[4]}, 119 {v[3], v[5], v[5]}, 120 {v[5], v[6], v[6]}, 121 {v[6], v[6], v[7]}, 122 {v[7], v[6], v[8]}, 123 }, 124 expected: sqlbase.EncDatumRows{ 125 {v[2], v[3], v[1]}, 126 {v[2], v[6], v[3]}, 127 {v[2], v[9], v[4]}, 128 {v[3], v[5], v[5]}, 129 {v[5], v[6], v[6]}, 130 }, 131 }, 132 133 // Test NullsAreDistinct flag (not ordered). 134 { 135 spec: execinfrapb.DistinctSpec{ 136 DistinctColumns: []uint32{0, 1}, 137 NullsAreDistinct: false, 138 }, 139 input: sqlbase.EncDatumRows{ 140 {v[1], v[2], v[1]}, 141 {vNull, vNull, v[2]}, 142 {v[1], v[2], v[3]}, 143 {vNull, vNull, v[4]}, 144 {v[1], vNull, v[5]}, 145 {vNull, v[2], v[6]}, 146 {vNull, v[2], v[7]}, 147 {v[1], vNull, v[8]}, 148 }, 149 expected: sqlbase.EncDatumRows{ 150 {v[1], v[2], v[1]}, 151 {vNull, vNull, v[2]}, 152 {v[1], vNull, v[5]}, 153 {vNull, v[2], v[6]}, 154 }, 155 }, 156 { 157 spec: execinfrapb.DistinctSpec{ 158 DistinctColumns: []uint32{0, 1}, 159 NullsAreDistinct: true, 160 }, 161 input: sqlbase.EncDatumRows{ 162 {v[1], v[2], v[1]}, 163 {vNull, vNull, v[2]}, 164 {v[1], v[2], v[3]}, 165 {vNull, vNull, v[4]}, 166 {v[1], vNull, v[5]}, 167 {vNull, v[2], v[6]}, 168 {vNull, v[2], v[7]}, 169 {v[1], vNull, v[8]}, 170 }, 171 expected: sqlbase.EncDatumRows{ 172 {v[1], v[2], v[1]}, 173 {vNull, vNull, v[2]}, 174 {vNull, vNull, v[4]}, 175 {v[1], vNull, v[5]}, 176 {vNull, v[2], v[6]}, 177 {vNull, v[2], v[7]}, 178 {v[1], vNull, v[8]}, 179 }, 180 }, 181 182 // Test NullsAreDistinct flag (ordered). 183 { 184 spec: execinfrapb.DistinctSpec{ 185 OrderedColumns: []uint32{0}, 186 DistinctColumns: []uint32{0, 1}, 187 NullsAreDistinct: false, 188 }, 189 input: sqlbase.EncDatumRows{ 190 {vNull, v[2], v[1]}, 191 {vNull, vNull, v[2]}, 192 {vNull, v[2], v[3]}, 193 {vNull, vNull, v[4]}, 194 {v[1], vNull, v[5]}, 195 {v[1], v[2], v[6]}, 196 {v[1], vNull, v[7]}, 197 {v[1], v[2], v[8]}, 198 }, 199 expected: sqlbase.EncDatumRows{ 200 {vNull, v[2], v[1]}, 201 {vNull, vNull, v[2]}, 202 {v[1], vNull, v[5]}, 203 {v[1], v[2], v[6]}, 204 }, 205 }, 206 { 207 spec: execinfrapb.DistinctSpec{ 208 OrderedColumns: []uint32{0}, 209 DistinctColumns: []uint32{0, 1}, 210 NullsAreDistinct: true, 211 }, 212 input: sqlbase.EncDatumRows{ 213 {vNull, v[2], v[1]}, 214 {vNull, vNull, v[2]}, 215 {vNull, v[2], v[3]}, 216 {vNull, vNull, v[4]}, 217 {v[1], vNull, v[5]}, 218 {v[1], v[2], v[6]}, 219 {v[1], vNull, v[7]}, 220 {v[1], v[2], v[8]}, 221 }, 222 expected: sqlbase.EncDatumRows{ 223 {vNull, v[2], v[1]}, 224 {vNull, vNull, v[2]}, 225 {vNull, v[2], v[3]}, 226 {vNull, vNull, v[4]}, 227 {v[1], vNull, v[5]}, 228 {v[1], v[2], v[6]}, 229 {v[1], vNull, v[7]}, 230 }, 231 }, 232 233 // Test ErrorOnDup flag (ordered). 234 { 235 spec: execinfrapb.DistinctSpec{ 236 OrderedColumns: []uint32{0}, 237 DistinctColumns: []uint32{0, 1}, 238 ErrorOnDup: "duplicate rows", 239 }, 240 input: sqlbase.EncDatumRows{ 241 {v[1], v[2], v[1]}, 242 {v[2], v[3], v[2]}, 243 {v[2], v[3], v[3]}, 244 {v[3], v[4], v[4]}, 245 }, 246 error: "duplicate rows", 247 }, 248 249 // Test ErrorOnDup flag (unordered). 250 { 251 spec: execinfrapb.DistinctSpec{ 252 DistinctColumns: []uint32{0, 1}, 253 ErrorOnDup: "duplicate rows", 254 }, 255 input: sqlbase.EncDatumRows{ 256 {v[2], v[3], v[1]}, 257 {v[1], v[2], v[2]}, 258 {v[3], v[4], v[3]}, 259 {v[2], v[3], v[4]}, 260 }, 261 error: "duplicate rows", 262 }, 263 } 264 265 for _, c := range testCases { 266 t.Run("", func(t *testing.T) { 267 ds := c.spec 268 269 in := distsqlutils.NewRowBuffer(sqlbase.ThreeIntCols, c.input, distsqlutils.RowBufferArgs{}) 270 out := &distsqlutils.RowBuffer{} 271 272 st := cluster.MakeTestingClusterSettings() 273 evalCtx := tree.MakeTestingEvalContext(st) 274 defer evalCtx.Stop(context.Background()) 275 flowCtx := execinfra.FlowCtx{ 276 Cfg: &execinfra.ServerConfig{Settings: st}, 277 EvalCtx: &evalCtx, 278 } 279 280 d, err := newDistinct(&flowCtx, 0 /* processorID */, &ds, in, &execinfrapb.PostProcessSpec{}, out) 281 if err != nil { 282 t.Fatal(err) 283 } 284 285 d.Run(context.Background()) 286 if !out.ProducerClosed() { 287 t.Fatalf("output RowReceiver not closed") 288 } 289 var res sqlbase.EncDatumRows 290 for { 291 row, meta := out.Next() 292 if meta != nil { 293 err = meta.Err 294 break 295 } 296 if row == nil { 297 break 298 } 299 res = append(res, row.Copy()) 300 } 301 302 if c.error != "" { 303 if err == nil || err.Error() != c.error { 304 t.Errorf("expected error: %v, got %v", c.error, err) 305 } 306 } else { 307 if result := res.String(sqlbase.ThreeIntCols); result != c.expected.String(sqlbase.ThreeIntCols) { 308 t.Errorf("invalid results: %v, expected %v'", result, c.expected.String(sqlbase.ThreeIntCols)) 309 } 310 } 311 }) 312 } 313 } 314 315 func benchmarkDistinct(b *testing.B, orderedColumns []uint32) { 316 const numCols = 2 317 318 ctx := context.Background() 319 st := cluster.MakeTestingClusterSettings() 320 evalCtx := tree.MakeTestingEvalContext(st) 321 defer evalCtx.Stop(ctx) 322 323 flowCtx := &execinfra.FlowCtx{ 324 Cfg: &execinfra.ServerConfig{Settings: st}, 325 EvalCtx: &evalCtx, 326 } 327 spec := &execinfrapb.DistinctSpec{ 328 DistinctColumns: []uint32{0, 1}, 329 } 330 spec.OrderedColumns = orderedColumns 331 332 post := &execinfrapb.PostProcessSpec{} 333 for _, numRows := range []int{1 << 4, 1 << 8, 1 << 12, 1 << 16} { 334 b.Run(fmt.Sprintf("rows=%d", numRows), func(b *testing.B) { 335 input := execinfra.NewRepeatableRowSource(sqlbase.TwoIntCols, sqlbase.MakeIntRows(numRows, numCols)) 336 337 b.SetBytes(int64(8 * numRows * numCols)) 338 b.ResetTimer() 339 for i := 0; i < b.N; i++ { 340 d, err := newDistinct(flowCtx, 0 /* processorID */, spec, input, post, &rowDisposer{}) 341 if err != nil { 342 b.Fatal(err) 343 } 344 d.Run(context.Background()) 345 input.Reset() 346 } 347 }) 348 } 349 } 350 351 func BenchmarkOrderedDistinct(b *testing.B) { 352 benchmarkDistinct(b, []uint32{0, 1}) 353 } 354 355 func BenchmarkPartiallyOrderedDistinct(b *testing.B) { 356 benchmarkDistinct(b, []uint32{0}) 357 } 358 359 func BenchmarkUnorderedDistinct(b *testing.B) { 360 benchmarkDistinct(b, []uint32{}) 361 }