github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/simple_project.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    18  	"github.com/cockroachdb/cockroach/pkg/util/log"
    19  )
    20  
    21  // simpleProjectOp is an operator that implements "simple projection" - removal of
    22  // columns that aren't needed by later operators.
    23  type simpleProjectOp struct {
    24  	OneInputNode
    25  	NonExplainable
    26  	closerHelper
    27  
    28  	projection []uint32
    29  	batches    map[coldata.Batch]*projectingBatch
    30  	// numBatchesLoggingThreshold is the threshold on the number of items in
    31  	// 'batches' map at which we will log a message when a new projectingBatch
    32  	// is created. It is growing exponentially.
    33  	numBatchesLoggingThreshold int
    34  }
    35  
    36  var _ closableOperator = &simpleProjectOp{}
    37  
    38  // projectingBatch is a Batch that applies a simple projection to another,
    39  // underlying batch, discarding all columns but the ones in its projection
    40  // slice, in order.
    41  type projectingBatch struct {
    42  	coldata.Batch
    43  
    44  	projection []uint32
    45  	// colVecs is a lazily populated slice of coldata.Vecs to support returning
    46  	// these in ColVecs().
    47  	colVecs []coldata.Vec
    48  }
    49  
    50  func newProjectionBatch(projection []uint32) *projectingBatch {
    51  	p := &projectingBatch{
    52  		projection: make([]uint32, len(projection)),
    53  	}
    54  	// We make a copy of projection to be safe.
    55  	copy(p.projection, projection)
    56  	return p
    57  }
    58  
    59  func (b *projectingBatch) ColVec(i int) coldata.Vec {
    60  	return b.Batch.ColVec(int(b.projection[i]))
    61  }
    62  
    63  func (b *projectingBatch) ColVecs() []coldata.Vec {
    64  	if b.Batch == coldata.ZeroBatch {
    65  		return nil
    66  	}
    67  	if b.colVecs == nil || len(b.colVecs) != len(b.projection) {
    68  		b.colVecs = make([]coldata.Vec, len(b.projection))
    69  	}
    70  	for i := range b.colVecs {
    71  		b.colVecs[i] = b.Batch.ColVec(int(b.projection[i]))
    72  	}
    73  	return b.colVecs
    74  }
    75  
    76  func (b *projectingBatch) Width() int {
    77  	return len(b.projection)
    78  }
    79  
    80  func (b *projectingBatch) AppendCol(col coldata.Vec) {
    81  	b.Batch.AppendCol(col)
    82  	b.projection = append(b.projection, uint32(b.Batch.Width())-1)
    83  }
    84  
    85  func (b *projectingBatch) ReplaceCol(col coldata.Vec, idx int) {
    86  	b.Batch.ReplaceCol(col, int(b.projection[idx]))
    87  }
    88  
    89  // NewSimpleProjectOp returns a new simpleProjectOp that applies a simple
    90  // projection on the columns in its input batch, returning a new batch with
    91  // only the columns in the projection slice, in order. In a degenerate case
    92  // when input already outputs batches that satisfy the projection, a
    93  // simpleProjectOp is not planned and input is returned.
    94  func NewSimpleProjectOp(
    95  	input colexecbase.Operator, numInputCols int, projection []uint32,
    96  ) colexecbase.Operator {
    97  	if numInputCols == len(projection) {
    98  		projectionIsRedundant := true
    99  		for i := range projection {
   100  			if projection[i] != uint32(i) {
   101  				projectionIsRedundant = false
   102  			}
   103  		}
   104  		if projectionIsRedundant {
   105  			return input
   106  		}
   107  	}
   108  	s := &simpleProjectOp{
   109  		OneInputNode:               NewOneInputNode(input),
   110  		projection:                 make([]uint32, len(projection)),
   111  		batches:                    make(map[coldata.Batch]*projectingBatch),
   112  		numBatchesLoggingThreshold: 128,
   113  	}
   114  	// We make a copy of projection to be safe.
   115  	copy(s.projection, projection)
   116  	return s
   117  }
   118  
   119  func (d *simpleProjectOp) Init() {
   120  	d.input.Init()
   121  }
   122  
   123  func (d *simpleProjectOp) Next(ctx context.Context) coldata.Batch {
   124  	batch := d.input.Next(ctx)
   125  	projBatch, found := d.batches[batch]
   126  	if !found {
   127  		projBatch = newProjectionBatch(d.projection)
   128  		d.batches[batch] = projBatch
   129  		if len(d.batches) == d.numBatchesLoggingThreshold {
   130  			if log.V(1) {
   131  				log.Infof(ctx, "simpleProjectOp: size of 'batches' map = %d", len(d.batches))
   132  			}
   133  			d.numBatchesLoggingThreshold = d.numBatchesLoggingThreshold * 2
   134  		}
   135  	}
   136  	projBatch.Batch = batch
   137  	return projBatch
   138  }
   139  
   140  // Close closes the simpleProjectOp's input.
   141  // TODO(asubiotto): Remove this method. It only exists so that we can call Close
   142  //  from some runTests subtests when not draining the input fully. The test
   143  //  should pass in the testing.T object used so that the caller can decide to
   144  //  explicitly close the input after checking the test.
   145  func (d *simpleProjectOp) IdempotentClose(ctx context.Context) error {
   146  	if !d.close() {
   147  		return nil
   148  	}
   149  	if c, ok := d.input.(IdempotentCloser); ok {
   150  		return c.IdempotentClose(ctx)
   151  	}
   152  	return nil
   153  }