github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/operator.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    23  )
    24  
    25  // OperatorInitStatus indicates whether Init method has already been called on
    26  // an Operator.
    27  type OperatorInitStatus int
    28  
    29  const (
    30  	// OperatorNotInitialized indicates that Init has not been called yet.
    31  	OperatorNotInitialized OperatorInitStatus = iota
    32  	// OperatorInitialized indicates that Init has already been called.
    33  	OperatorInitialized
    34  )
    35  
    36  // NonExplainable is a marker interface which identifies an Operator that
    37  // should be omitted from the output of EXPLAIN (VEC). Note that VERBOSE
    38  // explain option will override the omitting behavior.
    39  type NonExplainable interface {
    40  	// nonExplainableMarker is just a marker method. It should never be called.
    41  	nonExplainableMarker()
    42  }
    43  
    44  // NewOneInputNode returns an execinfra.OpNode with a single Operator input.
    45  func NewOneInputNode(input colexecbase.Operator) OneInputNode {
    46  	return OneInputNode{input: input}
    47  }
    48  
    49  // OneInputNode is an execinfra.OpNode with a single Operator input.
    50  type OneInputNode struct {
    51  	input colexecbase.Operator
    52  }
    53  
    54  // ChildCount implements the execinfra.OpNode interface.
    55  func (OneInputNode) ChildCount(verbose bool) int {
    56  	return 1
    57  }
    58  
    59  // Child implements the execinfra.OpNode interface.
    60  func (n OneInputNode) Child(nth int, verbose bool) execinfra.OpNode {
    61  	if nth == 0 {
    62  		return n.input
    63  	}
    64  	colexecerror.InternalError(fmt.Sprintf("invalid index %d", nth))
    65  	// This code is unreachable, but the compiler cannot infer that.
    66  	return nil
    67  }
    68  
    69  // Input returns the single input of this OneInputNode as an Operator.
    70  func (n OneInputNode) Input() colexecbase.Operator {
    71  	return n.input
    72  }
    73  
    74  // newTwoInputNode returns an execinfra.OpNode with two Operator inputs.
    75  func newTwoInputNode(inputOne, inputTwo colexecbase.Operator) twoInputNode {
    76  	return twoInputNode{inputOne: inputOne, inputTwo: inputTwo}
    77  }
    78  
    79  type twoInputNode struct {
    80  	inputOne colexecbase.Operator
    81  	inputTwo colexecbase.Operator
    82  }
    83  
    84  func (twoInputNode) ChildCount(verbose bool) int {
    85  	return 2
    86  }
    87  
    88  func (n *twoInputNode) Child(nth int, verbose bool) execinfra.OpNode {
    89  	switch nth {
    90  	case 0:
    91  		return n.inputOne
    92  	case 1:
    93  		return n.inputTwo
    94  	}
    95  	colexecerror.InternalError(fmt.Sprintf("invalid idx %d", nth))
    96  	// This code is unreachable, but the compiler cannot infer that.
    97  	return nil
    98  }
    99  
   100  // TODO(yuzefovich): audit all Operators to make sure that all internal memory
   101  // is accounted for.
   102  
   103  // InternalMemoryOperator is an interface that operators which use internal
   104  // memory need to implement. "Internal memory" is defined as memory that is
   105  // "private" to the operator and is not exposed to the outside; notably, it
   106  // does *not* include any coldata.Batch'es and coldata.Vec's.
   107  type InternalMemoryOperator interface {
   108  	colexecbase.Operator
   109  	// InternalMemoryUsage reports the internal memory usage (in bytes) of an
   110  	// operator.
   111  	InternalMemoryUsage() int
   112  }
   113  
   114  // resetter is an interface that operators can implement if they can be reset
   115  // either for reusing (to keep the already allocated memory) or during tests.
   116  type resetter interface {
   117  	reset(ctx context.Context)
   118  }
   119  
   120  // resettableOperator is an Operator that can be reset.
   121  type resettableOperator interface {
   122  	colexecbase.Operator
   123  	resetter
   124  }
   125  
   126  // IdempotentCloser is an object that releases resource on the first call to
   127  // IdempotentClose but does nothing for any subsequent call.
   128  type IdempotentCloser interface {
   129  	IdempotentClose(ctx context.Context) error
   130  }
   131  
   132  // closerHelper is a simple helper that helps Operators implement
   133  // IdempotentCloser. If close returns true, resources may be released, if it
   134  // returns false, close has already been called.
   135  // use.
   136  type closerHelper struct {
   137  	closed bool
   138  }
   139  
   140  // close marks the closerHelper as closed. If true is returned, this is the
   141  // first call to close.
   142  func (c *closerHelper) close() bool {
   143  	if c.closed {
   144  		return false
   145  	}
   146  	c.closed = true
   147  	return true
   148  }
   149  
   150  type closableOperator interface {
   151  	colexecbase.Operator
   152  	IdempotentCloser
   153  }
   154  
   155  type noopOperator struct {
   156  	OneInputNode
   157  	NonExplainable
   158  }
   159  
   160  var _ colexecbase.Operator = &noopOperator{}
   161  
   162  // NewNoop returns a new noop Operator.
   163  func NewNoop(input colexecbase.Operator) colexecbase.Operator {
   164  	return &noopOperator{OneInputNode: NewOneInputNode(input)}
   165  }
   166  
   167  func (n *noopOperator) Init() {
   168  	n.input.Init()
   169  }
   170  
   171  func (n *noopOperator) Next(ctx context.Context) coldata.Batch {
   172  	return n.input.Next(ctx)
   173  }
   174  
   175  func (n *noopOperator) reset(ctx context.Context) {
   176  	if r, ok := n.input.(resetter); ok {
   177  		r.reset(ctx)
   178  	}
   179  }
   180  
   181  type zeroOperator struct {
   182  	OneInputNode
   183  	NonExplainable
   184  }
   185  
   186  var _ colexecbase.Operator = &zeroOperator{}
   187  
   188  // NewZeroOp creates a new operator which just returns an empty batch.
   189  func NewZeroOp(input colexecbase.Operator) colexecbase.Operator {
   190  	return &zeroOperator{OneInputNode: NewOneInputNode(input)}
   191  }
   192  
   193  func (s *zeroOperator) Init() {
   194  	s.input.Init()
   195  }
   196  
   197  func (s *zeroOperator) Next(ctx context.Context) coldata.Batch {
   198  	return coldata.ZeroBatch
   199  }
   200  
   201  type singleTupleNoInputOperator struct {
   202  	colexecbase.ZeroInputNode
   203  	NonExplainable
   204  	batch  coldata.Batch
   205  	nexted bool
   206  }
   207  
   208  var _ colexecbase.Operator = &singleTupleNoInputOperator{}
   209  
   210  // NewSingleTupleNoInputOp creates a new Operator which returns a batch of
   211  // length 1 with no actual columns on the first call to Next() and zero-length
   212  // batches on all consecutive calls.
   213  func NewSingleTupleNoInputOp(allocator *colmem.Allocator) colexecbase.Operator {
   214  	return &singleTupleNoInputOperator{
   215  		batch: allocator.NewMemBatchWithSize(nil /* types */, 1 /* size */),
   216  	}
   217  }
   218  
   219  func (s *singleTupleNoInputOperator) Init() {
   220  }
   221  
   222  func (s *singleTupleNoInputOperator) Next(ctx context.Context) coldata.Batch {
   223  	s.batch.ResetInternalBatch()
   224  	if s.nexted {
   225  		return coldata.ZeroBatch
   226  	}
   227  	s.nexted = true
   228  	s.batch.SetLength(1)
   229  	return s.batch
   230  }
   231  
   232  // feedOperator is used to feed an Operator chain with input by manually
   233  // setting the next batch.
   234  type feedOperator struct {
   235  	colexecbase.ZeroInputNode
   236  	NonExplainable
   237  	batch coldata.Batch
   238  }
   239  
   240  func (feedOperator) Init() {}
   241  
   242  func (o *feedOperator) Next(context.Context) coldata.Batch {
   243  	return o.batch
   244  }
   245  
   246  var _ colexecbase.Operator = &feedOperator{}
   247  
   248  // vectorTypeEnforcer is a utility Operator that on every call to Next
   249  // enforces that non-zero length batch from the input has a vector of the
   250  // desired type in the desired position. If the width of the batch is less than
   251  // the desired position, a new vector will be appended; if the batch has a
   252  // well-typed vector of an undesired type in the desired position, an error
   253  // will occur.
   254  //
   255  // This Operator is designed to be planned as a wrapper on the input to a
   256  // "projecting" Operator (such Operator that has a single column as its output
   257  // and does not touch other columns by simply passing them along).
   258  //
   259  // The intended diagram is as follows:
   260  //
   261  //       original input                (with schema [t1, ..., tN])
   262  //       --------------
   263  //             |
   264  //             ↓
   265  //     vectorTypeEnforcer              (will enforce that tN+1 = outputType)
   266  //     ------------------
   267  //             |
   268  //             ↓
   269  //   "projecting" operator             (projects its output of type outputType
   270  //   ---------------------              in column at position of N+1)
   271  //
   272  type vectorTypeEnforcer struct {
   273  	OneInputNode
   274  	NonExplainable
   275  
   276  	allocator *colmem.Allocator
   277  	typ       *types.T
   278  	idx       int
   279  }
   280  
   281  var _ colexecbase.Operator = &vectorTypeEnforcer{}
   282  
   283  func newVectorTypeEnforcer(
   284  	allocator *colmem.Allocator, input colexecbase.Operator, typ *types.T, idx int,
   285  ) colexecbase.Operator {
   286  	return &vectorTypeEnforcer{
   287  		OneInputNode: NewOneInputNode(input),
   288  		allocator:    allocator,
   289  		typ:          typ,
   290  		idx:          idx,
   291  	}
   292  }
   293  
   294  func (e *vectorTypeEnforcer) Init() {
   295  	e.input.Init()
   296  }
   297  
   298  func (e *vectorTypeEnforcer) Next(ctx context.Context) coldata.Batch {
   299  	b := e.input.Next(ctx)
   300  	if b.Length() == 0 {
   301  		return b
   302  	}
   303  	e.allocator.MaybeAppendColumn(b, e.typ, e.idx)
   304  	return b
   305  }
   306  
   307  // batchSchemaSubsetEnforcer is similar to vectorTypeEnforcer in its purpose,
   308  // but it enforces that the subset of the columns of the non-zero length batch
   309  // satisfies the desired schema. It needs to wrap the input to a "projecting"
   310  // operator that internally uses other "projecting" operators (for example,
   311  // caseOp and logical projection operators). This operator supports type
   312  // schemas with unsupported types in which case in the corresponding
   313  // position an "unknown" vector can be appended.
   314  //
   315  // The word "subset" is actually more like a "range", but we chose the former
   316  // since the latter is overloaded.
   317  //
   318  // NOTE: the type schema passed into batchSchemaSubsetEnforcer *must* include
   319  // the output type of the Operator that the enforcer will be the input to.
   320  type batchSchemaSubsetEnforcer struct {
   321  	OneInputNode
   322  	NonExplainable
   323  
   324  	allocator                    *colmem.Allocator
   325  	typs                         []*types.T
   326  	subsetStartIdx, subsetEndIdx int
   327  }
   328  
   329  var _ colexecbase.Operator = &batchSchemaSubsetEnforcer{}
   330  
   331  // newBatchSchemaSubsetEnforcer creates a new batchSchemaSubsetEnforcer.
   332  // - subsetStartIdx and subsetEndIdx define the boundaries of the range of
   333  // columns that the projecting operator and its internal projecting operators
   334  // own.
   335  func newBatchSchemaSubsetEnforcer(
   336  	allocator *colmem.Allocator,
   337  	input colexecbase.Operator,
   338  	typs []*types.T,
   339  	subsetStartIdx, subsetEndIdx int,
   340  ) *batchSchemaSubsetEnforcer {
   341  	return &batchSchemaSubsetEnforcer{
   342  		OneInputNode:   NewOneInputNode(input),
   343  		allocator:      allocator,
   344  		typs:           typs,
   345  		subsetStartIdx: subsetStartIdx,
   346  		subsetEndIdx:   subsetEndIdx,
   347  	}
   348  }
   349  
   350  func (e *batchSchemaSubsetEnforcer) Init() {
   351  	e.input.Init()
   352  	if e.subsetStartIdx >= e.subsetEndIdx {
   353  		colexecerror.InternalError("unexpectedly subsetStartIdx is not less than subsetEndIdx")
   354  	}
   355  }
   356  
   357  func (e *batchSchemaSubsetEnforcer) Next(ctx context.Context) coldata.Batch {
   358  	b := e.input.Next(ctx)
   359  	if b.Length() == 0 {
   360  		return b
   361  	}
   362  	for i := e.subsetStartIdx; i < e.subsetEndIdx; i++ {
   363  		e.allocator.MaybeAppendColumn(b, e.typs[i], i)
   364  	}
   365  	return b
   366  }