github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/distinct_tmpl.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // {{/* 12 // +build execgen_template 13 // 14 // This file is the execgen template for distinct.eg.go. It's formatted in a 15 // special way, so it's both valid Go and a valid text/template input. This 16 // permits editing this file with editor support. 17 // 18 // */}} 19 20 package colexec 21 22 import ( 23 "context" 24 25 "github.com/cockroachdb/cockroach/pkg/col/coldata" 26 "github.com/cockroachdb/cockroach/pkg/col/typeconv" 27 "github.com/cockroachdb/cockroach/pkg/sql/colexec/execgen" 28 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 29 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 30 "github.com/cockroachdb/cockroach/pkg/sql/types" 31 "github.com/cockroachdb/errors" 32 ) 33 34 // OrderedDistinctColsToOperators is a utility function that given an input and 35 // a slice of columns, creates a chain of distinct operators and returns the 36 // last distinct operator in that chain as well as its output column. 37 func OrderedDistinctColsToOperators( 38 input colexecbase.Operator, distinctCols []uint32, typs []*types.T, 39 ) (colexecbase.Operator, []bool, error) { 40 distinctCol := make([]bool, coldata.BatchSize()) 41 // zero the boolean column on every iteration. 42 input = fnOp{ 43 OneInputNode: NewOneInputNode(input), 44 fn: func() { copy(distinctCol, zeroBoolColumn) }, 45 } 46 var ( 47 err error 48 r resettableOperator 49 ok bool 50 ) 51 for i := range distinctCols { 52 input, err = newSingleDistinct(input, int(distinctCols[i]), distinctCol, typs[distinctCols[i]]) 53 if err != nil { 54 return nil, nil, err 55 } 56 } 57 if r, ok = input.(resettableOperator); !ok { 58 colexecerror.InternalError("unexpectedly an ordered distinct is not a resetter") 59 } 60 distinctChain := &distinctChainOps{ 61 resettableOperator: r, 62 } 63 return distinctChain, distinctCol, nil 64 } 65 66 type distinctChainOps struct { 67 resettableOperator 68 } 69 70 var _ resettableOperator = &distinctChainOps{} 71 72 // NewOrderedDistinct creates a new ordered distinct operator on the given 73 // input columns with the given types. 74 func NewOrderedDistinct( 75 input colexecbase.Operator, distinctCols []uint32, typs []*types.T, 76 ) (colexecbase.Operator, error) { 77 op, outputCol, err := OrderedDistinctColsToOperators(input, distinctCols, typs) 78 if err != nil { 79 return nil, err 80 } 81 return &boolVecToSelOp{ 82 OneInputNode: NewOneInputNode(op), 83 outputCol: outputCol, 84 }, nil 85 } 86 87 // Remove unused warning. 88 var _ = execgen.UNSAFEGET 89 90 // {{/* 91 92 // Declarations to make the template compile properly. 93 94 // _GOTYPE is the template variable. 95 type _GOTYPE interface{} 96 97 // _GOTYPESLICE is the template variable. 98 type _GOTYPESLICE interface{} 99 100 // _ASSIGN_NE is the template equality function for assigning the first input 101 // to the result of the second input != the third input. 102 func _ASSIGN_NE(_ bool, _, _, _, _, _ _GOTYPE) bool { 103 colexecerror.InternalError("") 104 } 105 106 // _CANONICAL_TYPE_FAMILY is the template variable. 107 const _CANONICAL_TYPE_FAMILY = types.UnknownFamily 108 109 // _TYPE_WIDTH is the template variable. 110 const _TYPE_WIDTH = 0 111 112 // */}} 113 114 func newSingleDistinct( 115 input colexecbase.Operator, distinctColIdx int, outputCol []bool, t *types.T, 116 ) (colexecbase.Operator, error) { 117 switch typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()) { 118 // {{range .}} 119 case _CANONICAL_TYPE_FAMILY: 120 switch t.Width() { 121 // {{range .WidthOverloads}} 122 case _TYPE_WIDTH: 123 return &distinct_TYPEOp{ 124 OneInputNode: NewOneInputNode(input), 125 distinctColIdx: distinctColIdx, 126 outputCol: outputCol, 127 }, nil 128 // {{end}} 129 } 130 // {{end}} 131 } 132 return nil, errors.Errorf("unsupported distinct type %s", t) 133 } 134 135 // partitioner is a simple implementation of sorted distinct that's useful for 136 // other operators that need to partition an arbitrarily-sized Vec. 137 type partitioner interface { 138 // partition partitions the input colVec of size n, writing true to the 139 // outputCol for every value that differs from the previous one. 140 partition(colVec coldata.Vec, outputCol []bool, n int) 141 142 // partitionWithOrder is like partition, except it performs the partitioning 143 // on the input Vec as if it were ordered via the input order vector, which is 144 // a selection vector. The output is written in absolute order, however. For 145 // example, with an input vector [a,b,b] and an order vector [1,2,0], which 146 // implies a reordered input vector [b,b,a], the resultant outputCol would be 147 // [true, false, true], indicating a distinct value at the 0th and 2nd 148 // elements. 149 partitionWithOrder(colVec coldata.Vec, order []int, outputCol []bool, n int) 150 } 151 152 // newPartitioner returns a new partitioner on type t. 153 func newPartitioner(t *types.T) (partitioner, error) { 154 switch typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()) { 155 // {{range .}} 156 case _CANONICAL_TYPE_FAMILY: 157 switch t.Width() { 158 // {{range .WidthOverloads}} 159 case _TYPE_WIDTH: 160 return partitioner_TYPE{}, nil 161 // {{end}} 162 } 163 // {{end}} 164 } 165 return nil, errors.Errorf("unsupported partition type %s", t) 166 } 167 168 // {{range .}} 169 // {{range .WidthOverloads}} 170 171 // distinct_TYPEOp runs a distinct on the column in distinctColIdx, writing 172 // true to the resultant bool column for every value that differs from the 173 // previous one. 174 type distinct_TYPEOp struct { 175 OneInputNode 176 177 // distinctColIdx is the index of the column to distinct upon. 178 distinctColIdx int 179 180 // outputCol is the boolean output column. It is shared by all of the 181 // other distinct operators in a distinct operator set. 182 outputCol []bool 183 184 // Set to true at runtime when we've seen the first row. Distinct always 185 // outputs the first row that it sees. 186 foundFirstRow bool 187 188 // lastVal is the last value seen by the operator, so that the distincting 189 // still works across batch boundaries. 190 lastVal _GOTYPE 191 lastValNull bool 192 } 193 194 var _ resettableOperator = &distinct_TYPEOp{} 195 196 func (p *distinct_TYPEOp) Init() { 197 p.input.Init() 198 } 199 200 func (p *distinct_TYPEOp) reset(ctx context.Context) { 201 p.foundFirstRow = false 202 p.lastValNull = false 203 if resetter, ok := p.input.(resetter); ok { 204 resetter.reset(ctx) 205 } 206 } 207 208 func (p *distinct_TYPEOp) Next(ctx context.Context) coldata.Batch { 209 batch := p.input.Next(ctx) 210 if batch.Length() == 0 { 211 return batch 212 } 213 outputCol := p.outputCol 214 vec := batch.ColVec(p.distinctColIdx) 215 var nulls *coldata.Nulls 216 if vec.MaybeHasNulls() { 217 nulls = vec.Nulls() 218 } 219 col := vec.TemplateType() 220 221 // We always output the first row. 222 lastVal := p.lastVal 223 lastValNull := p.lastValNull 224 sel := batch.Selection() 225 firstIdx := 0 226 if sel != nil { 227 firstIdx = sel[0] 228 } 229 if !p.foundFirstRow { 230 outputCol[firstIdx] = true 231 p.foundFirstRow = true 232 } else if nulls == nil && lastValNull { 233 // The last value of the previous batch was null, so the first value of this 234 // non-null batch is distinct. 235 outputCol[firstIdx] = true 236 lastValNull = false 237 } 238 239 n := batch.Length() 240 if sel != nil { 241 // Bounds check elimination. 242 sel = sel[:n] 243 if nulls != nil { 244 for _, checkIdx := range sel { 245 outputIdx := checkIdx 246 _CHECK_DISTINCT_WITH_NULLS(checkIdx, outputIdx, lastVal, nulls, lastValNull, col, outputCol) 247 } 248 } else { 249 for _, checkIdx := range sel { 250 outputIdx := checkIdx 251 _CHECK_DISTINCT(checkIdx, outputIdx, lastVal, col, outputCol) 252 } 253 } 254 } else { 255 // Bounds check elimination. 256 col = execgen.SLICE(col, 0, n) 257 outputCol = outputCol[:n] 258 _ = outputCol[n-1] 259 if nulls != nil { 260 for execgen.RANGE(checkIdx, col, 0, n) { 261 outputIdx := checkIdx 262 _CHECK_DISTINCT_WITH_NULLS(checkIdx, outputIdx, lastVal, nulls, lastValNull, col, outputCol) 263 } 264 } else { 265 for execgen.RANGE(checkIdx, col, 0, n) { 266 outputIdx := checkIdx 267 _CHECK_DISTINCT(checkIdx, outputIdx, lastVal, col, outputCol) 268 } 269 } 270 } 271 272 p.lastVal = lastVal 273 p.lastValNull = lastValNull 274 275 return batch 276 } 277 278 // partitioner_TYPE partitions an arbitrary-length colVec by running a distinct 279 // operation over it. It writes the same format to outputCol that sorted 280 // distinct does: true for every row that differs from the previous row in the 281 // input column. 282 type partitioner_TYPE struct{} 283 284 func (p partitioner_TYPE) partitionWithOrder( 285 colVec coldata.Vec, order []int, outputCol []bool, n int, 286 ) { 287 var lastVal _GOTYPE 288 var lastValNull bool 289 var nulls *coldata.Nulls 290 if colVec.MaybeHasNulls() { 291 nulls = colVec.Nulls() 292 } 293 294 col := colVec.TemplateType() 295 col = execgen.SLICE(col, 0, n) 296 outputCol = outputCol[:n] 297 outputCol[0] = true 298 if nulls != nil { 299 for outputIdx, checkIdx := range order { 300 _CHECK_DISTINCT_WITH_NULLS(checkIdx, outputIdx, lastVal, nulls, lastValNull, col, outputCol) 301 } 302 } else { 303 for outputIdx, checkIdx := range order { 304 _CHECK_DISTINCT(checkIdx, outputIdx, lastVal, col, outputCol) 305 } 306 } 307 } 308 309 func (p partitioner_TYPE) partition(colVec coldata.Vec, outputCol []bool, n int) { 310 var ( 311 lastVal _GOTYPE 312 lastValNull bool 313 nulls *coldata.Nulls 314 ) 315 if colVec.MaybeHasNulls() { 316 nulls = colVec.Nulls() 317 } 318 319 col := colVec.TemplateType() 320 col = execgen.SLICE(col, 0, n) 321 outputCol = outputCol[:n] 322 outputCol[0] = true 323 if nulls != nil { 324 for execgen.RANGE(checkIdx, col, 0, n) { 325 outputIdx := checkIdx 326 _CHECK_DISTINCT_WITH_NULLS(checkIdx, outputIdx, lastVal, nulls, lastValNull, col, outputCol) 327 } 328 } else { 329 for execgen.RANGE(checkIdx, col, 0, n) { 330 outputIdx := checkIdx 331 _CHECK_DISTINCT(checkIdx, outputIdx, lastVal, col, outputCol) 332 } 333 } 334 } 335 336 // {{end}} 337 // {{end}} 338 339 // {{/* 340 // _CHECK_DISTINCT retrieves the value at the ith index of col, compares it 341 // to the passed in lastVal, and sets the ith value of outputCol to true if the 342 // compared values were distinct. It presumes that the current batch has no null 343 // values. 344 func _CHECK_DISTINCT( 345 checkIdx int, outputIdx int, lastVal _GOTYPE, col []_GOTYPE, outputCol []bool, 346 ) { // */}} 347 348 // {{define "checkDistinct" -}} 349 // {{with .Global}} 350 v := execgen.UNSAFEGET(col, checkIdx) 351 var unique bool 352 _ASSIGN_NE(unique, v, lastVal, _, col, _) 353 outputCol[outputIdx] = outputCol[outputIdx] || unique 354 execgen.COPYVAL(lastVal, v) 355 // {{end}} 356 // {{end}} 357 358 // {{/* 359 } // */}} 360 361 // {{/* 362 // _CHECK_DISTINCT_WITH_NULLS behaves the same as _CHECK_DISTINCT, but it also 363 // considers whether the previous and current values are null. It assumes that 364 // `nulls` is non-nil. 365 func _CHECK_DISTINCT_WITH_NULLS( 366 checkIdx int, 367 outputIdx int, 368 lastVal _GOTYPE, 369 nulls *coldata.Nulls, 370 lastValNull bool, 371 col []_GOTYPE, 372 outputCol []bool, 373 ) { // */}} 374 375 // {{define "checkDistinctWithNulls" -}} 376 // {{with .Global}} 377 null := nulls.NullAt(checkIdx) 378 if null { 379 if !lastValNull { 380 // The current value is null while the previous was not. 381 outputCol[outputIdx] = true 382 } 383 } else { 384 v := execgen.UNSAFEGET(col, checkIdx) 385 if lastValNull { 386 // The previous value was null while the current is not. 387 outputCol[outputIdx] = true 388 } else { 389 // Neither value is null, so we must compare. 390 var unique bool 391 _ASSIGN_NE(unique, v, lastVal, _, col, _) 392 outputCol[outputIdx] = outputCol[outputIdx] || unique 393 } 394 execgen.COPYVAL(lastVal, v) 395 } 396 lastValNull = null 397 // {{end}} 398 // {{end}} 399 400 // {{/* 401 } // */}}