github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/execplan.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/execplan.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math"
    17  	"reflect"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    20  	"github.com/cockroachdb/cockroach/pkg/col/coldataext"
    21  	"github.com/cockroachdb/cockroach/pkg/col/typeconv"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/colcontainer"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    32  	"github.com/cockroachdb/cockroach/pkg/util"
    33  	"github.com/cockroachdb/cockroach/pkg/util/log"
    34  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    35  	"github.com/cockroachdb/errors"
    36  	"github.com/marusama/semaphore"
    37  )
    38  
    39  func checkNumIn(inputs []colexecbase.Operator, numIn int) error {
    40  	if len(inputs) != numIn {
    41  		return errors.Errorf("expected %d input(s), got %d", numIn, len(inputs))
    42  	}
    43  	return nil
    44  }
    45  
    46  // wrapRowSources, given input Operators, integrates toWrap into a columnar
    47  // execution flow and returns toWrap's output as an Operator.
    48  func wrapRowSources(
    49  	ctx context.Context,
    50  	flowCtx *execinfra.FlowCtx,
    51  	inputs []colexecbase.Operator,
    52  	inputTypes [][]*types.T,
    53  	acc *mon.BoundAccount,
    54  	processorID int32,
    55  	newToWrap func([]execinfra.RowSource) (execinfra.RowSource, error),
    56  	factory coldata.ColumnFactory,
    57  ) (*Columnarizer, error) {
    58  	var toWrapInputs []execinfra.RowSource
    59  	for i, input := range inputs {
    60  		// Optimization: if the input is a Columnarizer, its input is necessarily a
    61  		// execinfra.RowSource, so remove the unnecessary conversion.
    62  		if c, ok := input.(*Columnarizer); ok {
    63  			// TODO(asubiotto): We might need to do some extra work to remove references
    64  			// to this operator (e.g. streamIDToOp).
    65  			toWrapInputs = append(toWrapInputs, c.input)
    66  		} else {
    67  			toWrapInput, err := NewMaterializer(
    68  				flowCtx,
    69  				processorID,
    70  				input,
    71  				inputTypes[i],
    72  				nil, /* output */
    73  				nil, /* metadataSourcesQueue */
    74  				nil, /* toClose */
    75  				nil, /* outputStatsToTrace */
    76  				nil, /* cancelFlow */
    77  			)
    78  			if err != nil {
    79  				return nil, err
    80  			}
    81  			toWrapInputs = append(toWrapInputs, toWrapInput)
    82  		}
    83  	}
    84  
    85  	toWrap, err := newToWrap(toWrapInputs)
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	return NewColumnarizer(ctx, colmem.NewAllocator(ctx, acc, factory), flowCtx, processorID, toWrap)
    91  }
    92  
    93  // NewColOperatorArgs is a helper struct that encompasses all of the input
    94  // arguments to NewColOperator call.
    95  type NewColOperatorArgs struct {
    96  	Spec                 *execinfrapb.ProcessorSpec
    97  	Inputs               []colexecbase.Operator
    98  	StreamingMemAccount  *mon.BoundAccount
    99  	ProcessorConstructor execinfra.ProcessorConstructor
   100  	DiskQueueCfg         colcontainer.DiskQueueCfg
   101  	FDSemaphore          semaphore.Semaphore
   102  	TestingKnobs         struct {
   103  		// UseStreamingMemAccountForBuffering specifies whether to use
   104  		// StreamingMemAccount when creating buffering operators and should only be
   105  		// set to 'true' in tests. The idea behind this flag is reducing the number
   106  		// of memory accounts and monitors we need to close, so we plumbed it into
   107  		// the planning code so that it doesn't create extra memory monitoring
   108  		// infrastructure (and so that we could use testMemAccount defined in
   109  		// main_test.go).
   110  		UseStreamingMemAccountForBuffering bool
   111  		// SpillingCallbackFn will be called when the spilling from an in-memory to
   112  		// disk-backed operator occurs. It should only be set in tests.
   113  		SpillingCallbackFn func()
   114  		// DiskSpillingDisabled specifies whether only in-memory operators should
   115  		// be created.
   116  		DiskSpillingDisabled bool
   117  		// NumForcedRepartitions specifies a number of "repartitions" that a
   118  		// disk-backed operator should be forced to perform. "Repartition" can mean
   119  		// different things depending on the operator (for example, for hash joiner
   120  		// it is dividing original partition into multiple new partitions; for
   121  		// sorter it is merging already created partitions into new one before
   122  		// proceeding to the next partition from the input).
   123  		NumForcedRepartitions int
   124  		// DelegateFDAcquisitions should be observed by users of a
   125  		// PartitionedDiskQueue. During normal operations, these should acquire the
   126  		// maximum number of file descriptors they will use from FDSemaphore up
   127  		// front. Setting this testing knob to true disables that behavior and
   128  		// lets the PartitionedDiskQueue interact with the semaphore as partitions
   129  		// are opened/closed, which ensures that the number of open files never
   130  		// exceeds what is expected.
   131  		DelegateFDAcquisitions bool
   132  	}
   133  }
   134  
   135  // NewColOperatorResult is a helper struct that encompasses all of the return
   136  // values of NewColOperator call.
   137  type NewColOperatorResult struct {
   138  	Op               colexecbase.Operator
   139  	ColumnTypes      []*types.T
   140  	InternalMemUsage int
   141  	MetadataSources  []execinfrapb.MetadataSource
   142  	// ToClose is a slice of components that need to be Closed. Close should be
   143  	// idempotent.
   144  	ToClose     []IdempotentCloser
   145  	IsStreaming bool
   146  	OpMonitors  []*mon.BytesMonitor
   147  	OpAccounts  []*mon.BoundAccount
   148  }
   149  
   150  // resetToState resets r to the state specified in arg. arg may be a shallow
   151  // copy made at a given point in time.
   152  func (r *NewColOperatorResult) resetToState(ctx context.Context, arg NewColOperatorResult) {
   153  	// MetadataSources are left untouched since there is no need to do any
   154  	// cleaning there.
   155  
   156  	// Close BoundAccounts that are not present in arg.OpAccounts.
   157  	accs := make(map[*mon.BoundAccount]struct{})
   158  	for _, a := range arg.OpAccounts {
   159  		accs[a] = struct{}{}
   160  	}
   161  	for _, a := range r.OpAccounts {
   162  		if _, ok := accs[a]; !ok {
   163  			a.Close(ctx)
   164  		}
   165  	}
   166  	// Stop BytesMonitors that are not present in arg.OpMonitors.
   167  	mons := make(map[*mon.BytesMonitor]struct{})
   168  	for _, m := range arg.OpMonitors {
   169  		mons[m] = struct{}{}
   170  	}
   171  
   172  	for _, m := range r.OpMonitors {
   173  		if _, ok := mons[m]; !ok {
   174  			m.Stop(ctx)
   175  		}
   176  	}
   177  
   178  	// Shallow copy over the rest.
   179  	*r = arg
   180  }
   181  
   182  const noFilterIdx = -1
   183  
   184  // isSupported checks whether we have a columnar operator equivalent to a
   185  // processor described by spec. Note that it doesn't perform any other checks
   186  // (like validity of the number of inputs).
   187  func isSupported(
   188  	allocator *colmem.Allocator, mode sessiondata.VectorizeExecMode, spec *execinfrapb.ProcessorSpec,
   189  ) (bool, error) {
   190  	core := spec.Core
   191  	isFullVectorization := mode == sessiondata.VectorizeOn ||
   192  		mode == sessiondata.VectorizeExperimentalAlways
   193  
   194  	switch {
   195  	case core.Noop != nil:
   196  		return true, nil
   197  
   198  	case core.TableReader != nil:
   199  		if core.TableReader.IsCheck {
   200  			return false, errors.Newf("scrub table reader is unsupported in vectorized")
   201  		}
   202  		return true, nil
   203  
   204  	case core.Aggregator != nil:
   205  		aggSpec := core.Aggregator
   206  		for _, agg := range aggSpec.Aggregations {
   207  			if agg.Distinct {
   208  				return false, errors.Newf("distinct aggregation not supported")
   209  			}
   210  			if agg.FilterColIdx != nil {
   211  				return false, errors.Newf("filtering aggregation not supported")
   212  			}
   213  			if len(agg.Arguments) > 0 {
   214  				return false, errors.Newf("aggregates with arguments not supported")
   215  			}
   216  			inputTypes := make([]*types.T, len(agg.ColIdx))
   217  			for pos, colIdx := range agg.ColIdx {
   218  				inputTypes[pos] = spec.Input[0].ColumnTypes[colIdx]
   219  			}
   220  			if supported, err := isAggregateSupported(allocator, agg.Func, inputTypes); !supported {
   221  				return false, err
   222  			}
   223  		}
   224  		return true, nil
   225  
   226  	case core.Distinct != nil:
   227  		if core.Distinct.NullsAreDistinct {
   228  			return false, errors.Newf("distinct with unique nulls not supported")
   229  		}
   230  		if core.Distinct.ErrorOnDup != "" {
   231  			return false, errors.Newf("distinct with error on duplicates not supported")
   232  		}
   233  		if !isFullVectorization {
   234  			if len(core.Distinct.OrderedColumns) < len(core.Distinct.DistinctColumns) {
   235  				return false, errors.Newf("unordered distinct can only run in vectorize 'on' mode")
   236  			}
   237  		}
   238  		return true, nil
   239  
   240  	case core.Ordinality != nil:
   241  		return true, nil
   242  
   243  	case core.HashJoiner != nil:
   244  		if !core.HashJoiner.OnExpr.Empty() && core.HashJoiner.Type != sqlbase.InnerJoin {
   245  			return false, errors.Newf("can't plan vectorized non-inner hash joins with ON expressions")
   246  		}
   247  		leftInput, rightInput := spec.Input[0], spec.Input[1]
   248  		if len(leftInput.ColumnTypes) == 0 || len(rightInput.ColumnTypes) == 0 {
   249  			// We have a cross join of two inputs, and at least one of them has
   250  			// zero-length schema. However, the hash join operators (both
   251  			// external and in-memory) have a built-in assumption of non-empty
   252  			// inputs, so we will fallback to row execution in such cases.
   253  			// TODO(yuzefovich): implement specialized cross join operator.
   254  			return false, errors.Newf("can't plan vectorized hash joins with an empty input schema")
   255  		}
   256  		return true, nil
   257  
   258  	case core.MergeJoiner != nil:
   259  		if !core.MergeJoiner.OnExpr.Empty() &&
   260  			core.MergeJoiner.Type != sqlbase.InnerJoin {
   261  			return false, errors.Errorf("can't plan non-inner merge join with ON expressions")
   262  		}
   263  		return true, nil
   264  
   265  	case core.Sorter != nil:
   266  		return true, nil
   267  
   268  	case core.Windower != nil:
   269  		for _, wf := range core.Windower.WindowFns {
   270  			if wf.Frame != nil {
   271  				frame, err := wf.Frame.ConvertToAST()
   272  				if err != nil {
   273  					return false, err
   274  				}
   275  				if !frame.IsDefaultFrame() {
   276  					return false, errors.Newf("window functions with non-default window frames are not supported")
   277  				}
   278  			}
   279  			if wf.FilterColIdx != noFilterIdx {
   280  				return false, errors.Newf("window functions with FILTER clause are not supported")
   281  			}
   282  			if wf.Func.AggregateFunc != nil {
   283  				return false, errors.Newf("aggregate functions used as window functions are not supported")
   284  			}
   285  
   286  			if _, supported := SupportedWindowFns[*wf.Func.WindowFunc]; !supported {
   287  				return false, errors.Newf("window function %s is not supported", wf.String())
   288  			}
   289  			if !isFullVectorization {
   290  				switch *wf.Func.WindowFunc {
   291  				case execinfrapb.WindowerSpec_PERCENT_RANK, execinfrapb.WindowerSpec_CUME_DIST:
   292  					return false, errors.Newf("window function %s can only run in vectorize 'on' mode", wf.String())
   293  				}
   294  			}
   295  		}
   296  		return true, nil
   297  
   298  	default:
   299  		return false, errors.Newf("unsupported processor core %q", core)
   300  	}
   301  }
   302  
   303  // createDiskBackedSort creates a new disk-backed operator that sorts the input
   304  // according to ordering.
   305  // - matchLen specifies the length of the prefix of ordering columns the input
   306  // is already ordered on.
   307  // - maxNumberPartitions (when non-zero) overrides the semi-dynamically
   308  // computed maximum number of partitions that the external sorter will have
   309  // at once.
   310  // - processorID is the ProcessorID of the processor core that requested
   311  // creation of this operator. It is used only to distinguish memory monitors.
   312  // - post describes the post-processing spec of the processor. It will be used
   313  // to determine whether top K sort can be planned. If you want the general sort
   314  // operator, then pass in empty struct.
   315  func (r *NewColOperatorResult) createDiskBackedSort(
   316  	ctx context.Context,
   317  	flowCtx *execinfra.FlowCtx,
   318  	args NewColOperatorArgs,
   319  	input colexecbase.Operator,
   320  	inputTypes []*types.T,
   321  	ordering execinfrapb.Ordering,
   322  	matchLen uint32,
   323  	maxNumberPartitions int,
   324  	processorID int32,
   325  	post *execinfrapb.PostProcessSpec,
   326  	memMonitorNamePrefix string,
   327  	factory coldata.ColumnFactory,
   328  ) (colexecbase.Operator, error) {
   329  	streamingMemAccount := args.StreamingMemAccount
   330  	useStreamingMemAccountForBuffering := args.TestingKnobs.UseStreamingMemAccountForBuffering
   331  	var (
   332  		sorterMemMonitorName string
   333  		inMemorySorter       colexecbase.Operator
   334  		err                  error
   335  	)
   336  	if len(ordering.Columns) == int(matchLen) {
   337  		// The input is already fully ordered, so there is nothing to sort.
   338  		return input, nil
   339  	}
   340  	if matchLen > 0 {
   341  		// The input is already partially ordered. Use a chunks sorter to avoid
   342  		// loading all the rows into memory.
   343  		sorterMemMonitorName = fmt.Sprintf("%ssort-chunks-%d", memMonitorNamePrefix, processorID)
   344  		var sortChunksMemAccount *mon.BoundAccount
   345  		if useStreamingMemAccountForBuffering {
   346  			sortChunksMemAccount = streamingMemAccount
   347  		} else {
   348  			sortChunksMemAccount = r.createMemAccountForSpillStrategy(
   349  				ctx, flowCtx, sorterMemMonitorName,
   350  			)
   351  		}
   352  		inMemorySorter, err = NewSortChunks(
   353  			colmem.NewAllocator(ctx, sortChunksMemAccount, factory), input, inputTypes,
   354  			ordering.Columns, int(matchLen),
   355  		)
   356  	} else if post.Limit != 0 && post.Filter.Empty() && int(post.Limit+post.Offset) > 0 {
   357  		// There is a limit specified with no post-process filter, so we know
   358  		// exactly how many rows the sorter should output. The last part of the
   359  		// condition is making sure there is no overflow when converting from
   360  		// the sum of two uint64s to int.
   361  		//
   362  		// Choose a top K sorter, which uses a heap to avoid storing more rows
   363  		// than necessary.
   364  		sorterMemMonitorName = fmt.Sprintf("%stopk-sort-%d", memMonitorNamePrefix, processorID)
   365  		var topKSorterMemAccount *mon.BoundAccount
   366  		if useStreamingMemAccountForBuffering {
   367  			topKSorterMemAccount = streamingMemAccount
   368  		} else {
   369  			topKSorterMemAccount = r.createMemAccountForSpillStrategy(
   370  				ctx, flowCtx, sorterMemMonitorName,
   371  			)
   372  		}
   373  		k := int(post.Limit + post.Offset)
   374  		inMemorySorter = NewTopKSorter(
   375  			colmem.NewAllocator(ctx, topKSorterMemAccount, factory), input, inputTypes,
   376  			ordering.Columns, k,
   377  		)
   378  	} else {
   379  		// No optimizations possible. Default to the standard sort operator.
   380  		sorterMemMonitorName = fmt.Sprintf("%ssort-all-%d", memMonitorNamePrefix, processorID)
   381  		var sorterMemAccount *mon.BoundAccount
   382  		if useStreamingMemAccountForBuffering {
   383  			sorterMemAccount = streamingMemAccount
   384  		} else {
   385  			sorterMemAccount = r.createMemAccountForSpillStrategy(
   386  				ctx, flowCtx, sorterMemMonitorName,
   387  			)
   388  		}
   389  		inMemorySorter, err = NewSorter(
   390  			colmem.NewAllocator(ctx, sorterMemAccount, factory), input, inputTypes, ordering.Columns,
   391  		)
   392  	}
   393  	if err != nil {
   394  		return nil, err
   395  	}
   396  	if inMemorySorter == nil {
   397  		return nil, errors.AssertionFailedf("unexpectedly inMemorySorter is nil")
   398  	}
   399  	// NOTE: when spilling to disk, we're using the same general external
   400  	// sorter regardless of which sorter variant we have instantiated (i.e.
   401  	// we don't take advantage of the limits and of partial ordering). We
   402  	// could improve this.
   403  	return newOneInputDiskSpiller(
   404  		input, inMemorySorter.(bufferingInMemoryOperator),
   405  		sorterMemMonitorName,
   406  		func(input colexecbase.Operator) colexecbase.Operator {
   407  			monitorNamePrefix := fmt.Sprintf("%sexternal-sorter", memMonitorNamePrefix)
   408  			// We are using an unlimited memory monitor here because external
   409  			// sort itself is responsible for making sure that we stay within
   410  			// the memory limit.
   411  			unlimitedAllocator := colmem.NewAllocator(
   412  				ctx, r.createBufferingUnlimitedMemAccount(
   413  					ctx, flowCtx, monitorNamePrefix,
   414  				), factory)
   415  			standaloneMemAccount := r.createStandaloneMemAccount(
   416  				ctx, flowCtx, monitorNamePrefix,
   417  			)
   418  			diskAccount := r.createDiskAccount(ctx, flowCtx, monitorNamePrefix)
   419  			// Make a copy of the DiskQueueCfg and set defaults for the sorter.
   420  			// The cache mode is chosen to reuse the cache to have a smaller
   421  			// cache per partition without affecting performance.
   422  			diskQueueCfg := args.DiskQueueCfg
   423  			diskQueueCfg.CacheMode = colcontainer.DiskQueueCacheModeReuseCache
   424  			diskQueueCfg.SetDefaultBufferSizeBytesForCacheMode()
   425  			if args.TestingKnobs.NumForcedRepartitions != 0 {
   426  				maxNumberPartitions = args.TestingKnobs.NumForcedRepartitions
   427  			}
   428  			es := newExternalSorter(
   429  				ctx,
   430  				unlimitedAllocator,
   431  				standaloneMemAccount,
   432  				input, inputTypes, ordering,
   433  				execinfra.GetWorkMemLimit(flowCtx.Cfg),
   434  				maxNumberPartitions,
   435  				args.TestingKnobs.DelegateFDAcquisitions,
   436  				diskQueueCfg,
   437  				args.FDSemaphore,
   438  				diskAccount,
   439  			)
   440  			r.ToClose = append(r.ToClose, es.(IdempotentCloser))
   441  			return es
   442  		},
   443  		args.TestingKnobs.SpillingCallbackFn,
   444  	), nil
   445  }
   446  
   447  // createAndWrapRowSource takes a processor spec, creating the row source and
   448  // wrapping it using wrapRowSources. Note that the post process spec is included
   449  // in the processor creation, so make sure to clear it if it will be inspected
   450  // again. NewColOperatorResult is updated with the new OutputTypes and the
   451  // resulting Columnarizer if there is no error. The result is also annotated as
   452  // streaming because the resulting operator is not a buffering operator (even if
   453  // it is a buffering processor). This is not a problem for memory accounting
   454  // because each processor does that on its own, so the used memory will be
   455  // accounted for.
   456  func (r *NewColOperatorResult) createAndWrapRowSource(
   457  	ctx context.Context,
   458  	flowCtx *execinfra.FlowCtx,
   459  	inputs []colexecbase.Operator,
   460  	inputTypes [][]*types.T,
   461  	streamingMemAccount *mon.BoundAccount,
   462  	spec *execinfrapb.ProcessorSpec,
   463  	processorConstructor execinfra.ProcessorConstructor,
   464  	factory coldata.ColumnFactory,
   465  ) error {
   466  	if flowCtx.EvalCtx.SessionData.VectorizeMode == sessiondata.Vectorize201Auto &&
   467  		spec.Core.JoinReader == nil {
   468  		return errors.New("rowexec processor wrapping for non-JoinReader core unsupported in vectorize=201auto mode")
   469  	}
   470  	c, err := wrapRowSources(
   471  		ctx,
   472  		flowCtx,
   473  		inputs,
   474  		inputTypes,
   475  		streamingMemAccount,
   476  		spec.ProcessorID,
   477  		func(inputs []execinfra.RowSource) (execinfra.RowSource, error) {
   478  			// We provide a slice with a single nil as 'outputs' parameter because
   479  			// all processors expect a single output. Passing nil is ok here
   480  			// because when wrapping the processor, the materializer will be its
   481  			// output, and it will be set up in wrapRowSources.
   482  			proc, err := processorConstructor(
   483  				ctx, flowCtx, spec.ProcessorID, &spec.Core, &spec.Post, inputs,
   484  				[]execinfra.RowReceiver{nil}, /* outputs */
   485  				nil,                          /* localProcessors */
   486  			)
   487  			if err != nil {
   488  				return nil, err
   489  			}
   490  			var (
   491  				rs execinfra.RowSource
   492  				ok bool
   493  			)
   494  			if rs, ok = proc.(execinfra.RowSource); !ok {
   495  				return nil, errors.Newf(
   496  					"processor %s is not an execinfra.RowSource", spec.Core.String(),
   497  				)
   498  			}
   499  			r.ColumnTypes = rs.OutputTypes()
   500  			return rs, nil
   501  		},
   502  		factory,
   503  	)
   504  	if err != nil {
   505  		return err
   506  	}
   507  	// We say that the wrapped processor is "streaming" because it is not a
   508  	// buffering operator (even if it is a buffering processor). This is not a
   509  	// problem for memory accounting because each processor does that on its
   510  	// own, so the used memory will be accounted for.
   511  	r.Op, r.IsStreaming = c, true
   512  	r.MetadataSources = append(r.MetadataSources, c)
   513  	return nil
   514  }
   515  
   516  // NOTE: throughout this file we do not append an output type of a projecting
   517  // operator to the passed-in type schema - we, instead, always allocate a new
   518  // type slice and copy over the old schema and set the output column of a
   519  // projecting operator in the next slot. We attempt to enforce this by a linter
   520  // rule, and such behavior prevents the type schema corruption scenario as
   521  // described below.
   522  //
   523  // Without explicit new allocations, it is possible that planSelectionOperators
   524  // (and other planning functions) reuse the same array for filterColumnTypes as
   525  // result.ColumnTypes is using because there was enough capacity to do so.
   526  // As an example, consider the following scenario in the context of
   527  // planFilterExpr method:
   528  // 1. r.ColumnTypes={types.Bool} with len=1 and cap=4
   529  // 2. planSelectionOperators adds another types.Int column, so
   530  //    filterColumnTypes={types.Bool, types.Int} with len=2 and cap=4
   531  //    Crucially, it uses exact same underlying array as r.ColumnTypes
   532  //    uses.
   533  // 3. we project out second column, so r.ColumnTypes={types.Bool}
   534  // 4. later, we add another types.Float column, so
   535  //    r.ColumnTypes={types.Bool, types.Float}, but there is enough
   536  //    capacity in the array, so we simply overwrite the second slot
   537  //    with the new type which corrupts filterColumnTypes to become
   538  //    {types.Bool, types.Float}, and we can get into a runtime type
   539  //    mismatch situation.
   540  
   541  // NewColOperator creates a new columnar operator according to the given spec.
   542  func NewColOperator(
   543  	ctx context.Context, flowCtx *execinfra.FlowCtx, args NewColOperatorArgs,
   544  ) (result NewColOperatorResult, err error) {
   545  	// Make sure that we clean up memory monitoring infrastructure in case of an
   546  	// error or a panic.
   547  	defer func() {
   548  		returnedErr := err
   549  		panicErr := recover()
   550  		if returnedErr != nil || panicErr != nil {
   551  			for _, acc := range result.OpAccounts {
   552  				acc.Close(ctx)
   553  			}
   554  			result.OpAccounts = result.OpAccounts[:0]
   555  			for _, mon := range result.OpMonitors {
   556  				mon.Stop(ctx)
   557  			}
   558  			result.OpMonitors = result.OpMonitors[:0]
   559  		}
   560  		if panicErr != nil {
   561  			colexecerror.InternalError(panicErr)
   562  		}
   563  	}()
   564  	spec := args.Spec
   565  	inputs := args.Inputs
   566  	factory := coldataext.NewExtendedColumnFactory(flowCtx.NewEvalCtx())
   567  	streamingMemAccount := args.StreamingMemAccount
   568  	streamingAllocator := colmem.NewAllocator(ctx, streamingMemAccount, factory)
   569  	useStreamingMemAccountForBuffering := args.TestingKnobs.UseStreamingMemAccountForBuffering
   570  	processorConstructor := args.ProcessorConstructor
   571  
   572  	log.VEventf(ctx, 2, "planning col operator for spec %q", spec)
   573  
   574  	core := &spec.Core
   575  	post := &spec.Post
   576  
   577  	// By default, we safely assume that an operator is not streaming. Note that
   578  	// projections, renders, filters, limits, offsets as well as all internal
   579  	// operators (like stats collectors and cancel checkers) are streaming, so in
   580  	// order to determine whether the resulting chain of operators is streaming,
   581  	// it is sufficient to look only at the "core" operator.
   582  	result.IsStreaming = false
   583  
   584  	// resultPreSpecPlanningStateShallowCopy is a shallow copy of the result
   585  	// before any specs are planned. Used if there is a need to backtrack.
   586  	resultPreSpecPlanningStateShallowCopy := result
   587  
   588  	supported, err := isSupported(streamingAllocator, flowCtx.EvalCtx.SessionData.VectorizeMode, spec)
   589  	if !supported {
   590  		// We refuse to wrap LocalPlanNode processor (which is a DistSQL wrapper
   591  		// around a planNode) because it creates complications, and a flow with
   592  		// such processor probably will not benefit from the vectorization.
   593  		if core.LocalPlanNode != nil {
   594  			return result, errors.Newf("core.LocalPlanNode is not supported")
   595  		}
   596  		// We also do not wrap MetadataTest{Sender,Receiver} because of the way
   597  		// metadata is propagated through the vectorized flow - it is drained at
   598  		// the flow shutdown unlike these test processors expect.
   599  		if core.MetadataTestSender != nil {
   600  			return result, errors.Newf("core.MetadataTestSender is not supported")
   601  		}
   602  		if core.MetadataTestReceiver != nil {
   603  			return result, errors.Newf("core.MetadataTestReceiver is not supported")
   604  		}
   605  
   606  		log.VEventf(ctx, 1, "planning a wrapped processor because %s", err.Error())
   607  
   608  		inputTypes := make([][]*types.T, len(spec.Input))
   609  		for inputIdx, input := range spec.Input {
   610  			inputTypes[inputIdx] = make([]*types.T, len(input.ColumnTypes))
   611  			copy(inputTypes[inputIdx], input.ColumnTypes)
   612  		}
   613  
   614  		err = result.createAndWrapRowSource(ctx, flowCtx, inputs, inputTypes,
   615  			streamingMemAccount, spec, processorConstructor, factory)
   616  		// The wrapped processors need to be passed the post-process specs,
   617  		// since they inspect them to figure out information about needed
   618  		// columns. This means that we'll let those processors do any renders
   619  		// or filters, which isn't ideal. We could improve this.
   620  		post = &execinfrapb.PostProcessSpec{}
   621  
   622  	} else {
   623  		switch {
   624  		case core.Noop != nil:
   625  			if err := checkNumIn(inputs, 1); err != nil {
   626  				return result, err
   627  			}
   628  			result.Op, result.IsStreaming = NewNoop(inputs[0]), true
   629  			result.ColumnTypes = make([]*types.T, len(spec.Input[0].ColumnTypes))
   630  			copy(result.ColumnTypes, spec.Input[0].ColumnTypes)
   631  		case core.TableReader != nil:
   632  			if err := checkNumIn(inputs, 0); err != nil {
   633  				return result, err
   634  			}
   635  			var scanOp *colBatchScan
   636  			scanOp, err = newColBatchScan(streamingAllocator, flowCtx, core.TableReader, post)
   637  			if err != nil {
   638  				return result, err
   639  			}
   640  			result.Op, result.IsStreaming = scanOp, true
   641  			result.MetadataSources = append(result.MetadataSources, scanOp)
   642  			// colBatchScan is wrapped with a cancel checker below, so we need to
   643  			// log its creation separately.
   644  			log.VEventf(ctx, 1, "made op %T\n", result.Op)
   645  
   646  			// We want to check for cancellation once per input batch, and wrapping
   647  			// only colBatchScan with a CancelChecker allows us to do just that.
   648  			// It's sufficient for most of the operators since they are extremely fast.
   649  			// However, some of the long-running operators (for example, sorter) are
   650  			// still responsible for doing the cancellation check on their own while
   651  			// performing long operations.
   652  			result.Op = NewCancelChecker(result.Op)
   653  			returnMutations := core.TableReader.Visibility == execinfra.ScanVisibilityPublicAndNotPublic
   654  			result.ColumnTypes = core.TableReader.Table.ColumnTypesWithMutations(returnMutations)
   655  		case core.Aggregator != nil:
   656  			if err := checkNumIn(inputs, 1); err != nil {
   657  				return result, err
   658  			}
   659  			aggSpec := core.Aggregator
   660  			if len(aggSpec.Aggregations) == 0 {
   661  				// We can get an aggregator when no aggregate functions are present if
   662  				// HAVING clause is present, for example, with a query as follows:
   663  				// SELECT 1 FROM t HAVING true. In this case, we plan a special operator
   664  				// that outputs a batch of length 1 without actual columns once and then
   665  				// zero-length batches. The actual "data" will be added by projections
   666  				// below.
   667  				// TODO(solon): The distsql plan for this case includes a TableReader, so
   668  				// we end up creating an orphaned colBatchScan. We should avoid that.
   669  				// Ideally the optimizer would not plan a scan in this unusual case.
   670  				result.Op, result.IsStreaming, err = NewSingleTupleNoInputOp(streamingAllocator), true, nil
   671  				// We make ColumnTypes non-nil so that sanity check doesn't panic.
   672  				result.ColumnTypes = []*types.T{}
   673  				break
   674  			}
   675  			if aggSpec.IsRowCount() {
   676  				result.Op, result.IsStreaming, err = NewCountOp(streamingAllocator, inputs[0]), true, nil
   677  				result.ColumnTypes = []*types.T{types.Int}
   678  				break
   679  			}
   680  
   681  			var groupCols, orderedCols util.FastIntSet
   682  
   683  			for _, col := range aggSpec.OrderedGroupCols {
   684  				orderedCols.Add(int(col))
   685  			}
   686  
   687  			needHash := false
   688  			for _, col := range aggSpec.GroupCols {
   689  				if !orderedCols.Contains(int(col)) {
   690  					needHash = true
   691  				}
   692  				groupCols.Add(int(col))
   693  			}
   694  			if !orderedCols.SubsetOf(groupCols) {
   695  				return result, errors.AssertionFailedf("ordered cols must be a subset of grouping cols")
   696  			}
   697  
   698  			aggTyps := make([][]*types.T, len(aggSpec.Aggregations))
   699  			aggCols := make([][]uint32, len(aggSpec.Aggregations))
   700  			aggFns := make([]execinfrapb.AggregatorSpec_Func, len(aggSpec.Aggregations))
   701  			result.ColumnTypes = make([]*types.T, len(aggSpec.Aggregations))
   702  			for i, agg := range aggSpec.Aggregations {
   703  				aggTyps[i] = make([]*types.T, len(agg.ColIdx))
   704  				for j, colIdx := range agg.ColIdx {
   705  					aggTyps[i][j] = spec.Input[0].ColumnTypes[colIdx]
   706  				}
   707  				aggCols[i] = agg.ColIdx
   708  				aggFns[i] = agg.Func
   709  				_, retType, err := execinfrapb.GetAggregateInfo(agg.Func, aggTyps[i]...)
   710  				if err != nil {
   711  					return result, err
   712  				}
   713  				result.ColumnTypes[i] = retType
   714  			}
   715  			typs := make([]*types.T, len(spec.Input[0].ColumnTypes))
   716  			copy(typs, spec.Input[0].ColumnTypes)
   717  			if needHash {
   718  				hashAggregatorMemAccount := streamingMemAccount
   719  				if !useStreamingMemAccountForBuffering {
   720  					// Create an unlimited mem account explicitly even though there is no
   721  					// disk spilling because the memory usage of an aggregator is
   722  					// proportional to the number of groups, not the number of inputs.
   723  					// The row execution engine also gives an unlimited (that still
   724  					// needs to be approved by the upstream monitor, so not really
   725  					// "unlimited") amount of memory to the aggregator.
   726  					hashAggregatorMemAccount = result.createBufferingUnlimitedMemAccount(ctx, flowCtx, "hash-aggregator")
   727  				}
   728  				result.Op, err = NewHashAggregator(
   729  					colmem.NewAllocator(ctx, hashAggregatorMemAccount, factory), inputs[0], typs, aggFns,
   730  					aggSpec.GroupCols, aggCols,
   731  				)
   732  			} else {
   733  				result.Op, err = NewOrderedAggregator(
   734  					streamingAllocator, inputs[0], typs, aggFns,
   735  					aggSpec.GroupCols, aggCols, aggSpec.IsScalar(),
   736  				)
   737  				result.IsStreaming = true
   738  			}
   739  
   740  		case core.Distinct != nil:
   741  			if err := checkNumIn(inputs, 1); err != nil {
   742  				return result, err
   743  			}
   744  			result.ColumnTypes = make([]*types.T, len(spec.Input[0].ColumnTypes))
   745  			copy(result.ColumnTypes, spec.Input[0].ColumnTypes)
   746  			if len(core.Distinct.OrderedColumns) == len(core.Distinct.DistinctColumns) {
   747  				result.Op, err = NewOrderedDistinct(inputs[0], core.Distinct.OrderedColumns, result.ColumnTypes)
   748  				result.IsStreaming = true
   749  			} else {
   750  				distinctMemAccount := streamingMemAccount
   751  				if !useStreamingMemAccountForBuffering {
   752  					// Create an unlimited mem account explicitly even though there is no
   753  					// disk spilling because the memory usage of an unordered distinct
   754  					// operator is proportional to the number of distinct tuples, not the
   755  					// number of input tuples.
   756  					// The row execution engine also gives an unlimited amount (that still
   757  					// needs to be approved by the upstream monitor, so not really
   758  					// "unlimited") amount of memory to the unordered distinct operator.
   759  					distinctMemAccount = result.createBufferingUnlimitedMemAccount(ctx, flowCtx, "distinct")
   760  				}
   761  				// TODO(yuzefovich): we have an implementation of partially ordered
   762  				// distinct, and we should plan it when we have non-empty ordered
   763  				// columns and we think that the probability of distinct tuples in the
   764  				// input is about 0.01 or less.
   765  				result.Op = NewUnorderedDistinct(
   766  					colmem.NewAllocator(ctx, distinctMemAccount, factory), inputs[0],
   767  					core.Distinct.DistinctColumns, result.ColumnTypes, hashTableNumBuckets,
   768  				)
   769  			}
   770  
   771  		case core.Ordinality != nil:
   772  			if err := checkNumIn(inputs, 1); err != nil {
   773  				return result, err
   774  			}
   775  			outputIdx := len(spec.Input[0].ColumnTypes)
   776  			result.Op = NewOrdinalityOp(streamingAllocator, inputs[0], outputIdx)
   777  			result.IsStreaming = true
   778  			result.ColumnTypes = appendOneType(spec.Input[0].ColumnTypes, types.Int)
   779  
   780  		case core.HashJoiner != nil:
   781  			if err := checkNumIn(inputs, 2); err != nil {
   782  				return result, err
   783  			}
   784  			leftTypes := make([]*types.T, len(spec.Input[0].ColumnTypes))
   785  			copy(leftTypes, spec.Input[0].ColumnTypes)
   786  			rightTypes := make([]*types.T, len(spec.Input[1].ColumnTypes))
   787  			copy(rightTypes, spec.Input[1].ColumnTypes)
   788  
   789  			hashJoinerMemMonitorName := fmt.Sprintf("hash-joiner-%d", spec.ProcessorID)
   790  			var hashJoinerMemAccount *mon.BoundAccount
   791  			if useStreamingMemAccountForBuffering {
   792  				hashJoinerMemAccount = streamingMemAccount
   793  			} else {
   794  				hashJoinerMemAccount = result.createMemAccountForSpillStrategy(
   795  					ctx, flowCtx, hashJoinerMemMonitorName,
   796  				)
   797  			}
   798  			// It is valid for empty set of equality columns to be considered as
   799  			// "key" (for example, the input has at most 1 row). However, hash
   800  			// joiner, in order to handle NULL values correctly, needs to think
   801  			// that an empty set of equality columns doesn't form a key.
   802  			rightEqColsAreKey := core.HashJoiner.RightEqColumnsAreKey && len(core.HashJoiner.RightEqColumns) > 0
   803  			hjSpec, err := makeHashJoinerSpec(
   804  				core.HashJoiner.Type,
   805  				core.HashJoiner.LeftEqColumns,
   806  				core.HashJoiner.RightEqColumns,
   807  				leftTypes,
   808  				rightTypes,
   809  				rightEqColsAreKey,
   810  			)
   811  			if err != nil {
   812  				return result, err
   813  			}
   814  			inMemoryHashJoiner := newHashJoiner(
   815  				colmem.NewAllocator(ctx, hashJoinerMemAccount, factory), hjSpec, inputs[0], inputs[1],
   816  			)
   817  			if args.TestingKnobs.DiskSpillingDisabled {
   818  				// We will not be creating a disk-backed hash joiner because we're
   819  				// running a test that explicitly asked for only in-memory hash
   820  				// joiner.
   821  				result.Op = inMemoryHashJoiner
   822  			} else {
   823  				diskAccount := result.createDiskAccount(ctx, flowCtx, hashJoinerMemMonitorName)
   824  				result.Op = newTwoInputDiskSpiller(
   825  					inputs[0], inputs[1], inMemoryHashJoiner.(bufferingInMemoryOperator),
   826  					hashJoinerMemMonitorName,
   827  					func(inputOne, inputTwo colexecbase.Operator) colexecbase.Operator {
   828  						monitorNamePrefix := "external-hash-joiner"
   829  						unlimitedAllocator := colmem.NewAllocator(
   830  							ctx, result.createBufferingUnlimitedMemAccount(
   831  								ctx, flowCtx, monitorNamePrefix,
   832  							), factory)
   833  						// Make a copy of the DiskQueueCfg and set defaults for the hash
   834  						// joiner. The cache mode is chosen to automatically close the cache
   835  						// belonging to partitions at a parent level when repartitioning.
   836  						diskQueueCfg := args.DiskQueueCfg
   837  						diskQueueCfg.CacheMode = colcontainer.DiskQueueCacheModeClearAndReuseCache
   838  						diskQueueCfg.SetDefaultBufferSizeBytesForCacheMode()
   839  						ehj := newExternalHashJoiner(
   840  							unlimitedAllocator, hjSpec,
   841  							inputOne, inputTwo,
   842  							execinfra.GetWorkMemLimit(flowCtx.Cfg),
   843  							diskQueueCfg,
   844  							args.FDSemaphore,
   845  							func(input colexecbase.Operator, inputTypes []*types.T, orderingCols []execinfrapb.Ordering_Column, maxNumberPartitions int) (colexecbase.Operator, error) {
   846  								sortArgs := args
   847  								if !args.TestingKnobs.DelegateFDAcquisitions {
   848  									// Set the FDSemaphore to nil. This indicates that no FDs
   849  									// should be acquired. The external hash joiner will do this
   850  									// up front.
   851  									sortArgs.FDSemaphore = nil
   852  								}
   853  								return result.createDiskBackedSort(
   854  									ctx, flowCtx, sortArgs, input, inputTypes,
   855  									execinfrapb.Ordering{Columns: orderingCols},
   856  									0 /* matchLen */, maxNumberPartitions, spec.ProcessorID,
   857  									&execinfrapb.PostProcessSpec{}, monitorNamePrefix+"-", factory)
   858  							},
   859  							args.TestingKnobs.NumForcedRepartitions,
   860  							args.TestingKnobs.DelegateFDAcquisitions,
   861  							diskAccount,
   862  						)
   863  						result.ToClose = append(result.ToClose, ehj.(IdempotentCloser))
   864  						return ehj
   865  					},
   866  					args.TestingKnobs.SpillingCallbackFn,
   867  				)
   868  			}
   869  			result.ColumnTypes = make([]*types.T, len(leftTypes)+len(rightTypes))
   870  			copy(result.ColumnTypes, leftTypes)
   871  			if !core.HashJoiner.Type.ShouldIncludeRightColsInOutput() {
   872  				result.ColumnTypes = result.ColumnTypes[:len(leftTypes):len(leftTypes)]
   873  			} else {
   874  				copy(result.ColumnTypes[len(leftTypes):], rightTypes)
   875  			}
   876  
   877  			if !core.HashJoiner.OnExpr.Empty() && core.HashJoiner.Type == sqlbase.InnerJoin {
   878  				if err =
   879  					result.planAndMaybeWrapOnExprAsFilter(ctx, flowCtx, core.HashJoiner.OnExpr,
   880  						streamingMemAccount, processorConstructor, factory); err != nil {
   881  					return result, err
   882  				}
   883  			}
   884  
   885  		case core.MergeJoiner != nil:
   886  			if err := checkNumIn(inputs, 2); err != nil {
   887  				return result, err
   888  			}
   889  			// Merge joiner is a streaming operator when equality columns form a key
   890  			// for both of the inputs.
   891  			result.IsStreaming = core.MergeJoiner.LeftEqColumnsAreKey && core.MergeJoiner.RightEqColumnsAreKey
   892  
   893  			leftTypes := make([]*types.T, len(spec.Input[0].ColumnTypes))
   894  			copy(leftTypes, spec.Input[0].ColumnTypes)
   895  			rightTypes := make([]*types.T, len(spec.Input[1].ColumnTypes))
   896  			copy(rightTypes, spec.Input[1].ColumnTypes)
   897  
   898  			joinType := core.MergeJoiner.Type
   899  			var onExpr *execinfrapb.Expression
   900  			if !core.MergeJoiner.OnExpr.Empty() {
   901  				if joinType != sqlbase.InnerJoin {
   902  					return result, errors.AssertionFailedf(
   903  						"ON expression (%s) was unexpectedly planned for merge joiner with join type %s",
   904  						core.MergeJoiner.OnExpr.String(), core.MergeJoiner.Type.String(),
   905  					)
   906  				}
   907  				onExpr = &core.MergeJoiner.OnExpr
   908  			}
   909  
   910  			monitorName := "merge-joiner"
   911  			// We are using an unlimited memory monitor here because merge joiner
   912  			// itself is responsible for making sure that we stay within the memory
   913  			// limit, and it will fall back to disk if necessary.
   914  			unlimitedAllocator := colmem.NewAllocator(
   915  				ctx, result.createBufferingUnlimitedMemAccount(
   916  					ctx, flowCtx, monitorName,
   917  				), factory)
   918  			diskAccount := result.createDiskAccount(ctx, flowCtx, monitorName)
   919  			mj, err := newMergeJoinOp(
   920  				unlimitedAllocator, execinfra.GetWorkMemLimit(flowCtx.Cfg),
   921  				args.DiskQueueCfg, args.FDSemaphore,
   922  				joinType, inputs[0], inputs[1], leftTypes, rightTypes,
   923  				core.MergeJoiner.LeftOrdering.Columns, core.MergeJoiner.RightOrdering.Columns,
   924  				diskAccount,
   925  			)
   926  			if err != nil {
   927  				return result, err
   928  			}
   929  
   930  			result.Op = mj
   931  			result.ToClose = append(result.ToClose, mj.(IdempotentCloser))
   932  			result.ColumnTypes = make([]*types.T, len(leftTypes)+len(rightTypes))
   933  			copy(result.ColumnTypes, leftTypes)
   934  			if !core.MergeJoiner.Type.ShouldIncludeRightColsInOutput() {
   935  				result.ColumnTypes = result.ColumnTypes[:len(leftTypes):len(leftTypes)]
   936  			} else {
   937  				copy(result.ColumnTypes[len(leftTypes):], rightTypes)
   938  			}
   939  
   940  			if onExpr != nil {
   941  				if err = result.planAndMaybeWrapOnExprAsFilter(ctx, flowCtx, *onExpr,
   942  					streamingMemAccount, processorConstructor, factory); err != nil {
   943  					return result, err
   944  				}
   945  			}
   946  
   947  		case core.Sorter != nil:
   948  			if err := checkNumIn(inputs, 1); err != nil {
   949  				return result, err
   950  			}
   951  			input := inputs[0]
   952  			result.ColumnTypes = make([]*types.T, len(spec.Input[0].ColumnTypes))
   953  			copy(result.ColumnTypes, spec.Input[0].ColumnTypes)
   954  			ordering := core.Sorter.OutputOrdering
   955  			matchLen := core.Sorter.OrderingMatchLen
   956  			result.Op, err = result.createDiskBackedSort(
   957  				ctx, flowCtx, args, input, result.ColumnTypes, ordering, matchLen, 0, /* maxNumberPartitions */
   958  				spec.ProcessorID, post, "" /* memMonitorNamePrefix */, factory,
   959  			)
   960  
   961  		case core.Windower != nil:
   962  			if err := checkNumIn(inputs, 1); err != nil {
   963  				return result, err
   964  			}
   965  			memMonitorsPrefix := "window-"
   966  			input := inputs[0]
   967  			result.ColumnTypes = make([]*types.T, len(spec.Input[0].ColumnTypes))
   968  			copy(result.ColumnTypes, spec.Input[0].ColumnTypes)
   969  			for _, wf := range core.Windower.WindowFns {
   970  				// We allocate the capacity for two extra types because of the
   971  				// temporary columns that can be appended below.
   972  				typs := make([]*types.T, len(result.ColumnTypes), len(result.ColumnTypes)+2)
   973  				copy(typs, result.ColumnTypes)
   974  				tempColOffset, partitionColIdx := uint32(0), columnOmitted
   975  				peersColIdx := columnOmitted
   976  				windowFn := *wf.Func.WindowFunc
   977  				if len(core.Windower.PartitionBy) > 0 {
   978  					// TODO(yuzefovich): add support for hashing partitioner (probably by
   979  					// leveraging hash routers once we can distribute). The decision about
   980  					// which kind of partitioner to use should come from the optimizer.
   981  					partitionColIdx = int(wf.OutputColIdx)
   982  					input, err = NewWindowSortingPartitioner(
   983  						streamingAllocator, input, typs,
   984  						core.Windower.PartitionBy, wf.Ordering.Columns, int(wf.OutputColIdx),
   985  						func(input colexecbase.Operator, inputTypes []*types.T, orderingCols []execinfrapb.Ordering_Column) (colexecbase.Operator, error) {
   986  							return result.createDiskBackedSort(
   987  								ctx, flowCtx, args, input, inputTypes,
   988  								execinfrapb.Ordering{Columns: orderingCols}, 0, /* matchLen */
   989  								0 /* maxNumberPartitions */, spec.ProcessorID,
   990  								&execinfrapb.PostProcessSpec{}, memMonitorsPrefix, factory)
   991  						},
   992  					)
   993  					// Window partitioner will append a boolean column.
   994  					tempColOffset++
   995  					typs = typs[:len(typs)+1]
   996  					typs[len(typs)-1] = types.Bool
   997  				} else {
   998  					if len(wf.Ordering.Columns) > 0 {
   999  						input, err = result.createDiskBackedSort(
  1000  							ctx, flowCtx, args, input, typs,
  1001  							wf.Ordering, 0 /* matchLen */, 0, /* maxNumberPartitions */
  1002  							spec.ProcessorID, &execinfrapb.PostProcessSpec{}, memMonitorsPrefix, factory,
  1003  						)
  1004  					}
  1005  				}
  1006  				if err != nil {
  1007  					return result, err
  1008  				}
  1009  				if windowFnNeedsPeersInfo(*wf.Func.WindowFunc) {
  1010  					peersColIdx = int(wf.OutputColIdx + tempColOffset)
  1011  					input, err = NewWindowPeerGrouper(
  1012  						streamingAllocator, input, typs, wf.Ordering.Columns,
  1013  						partitionColIdx, peersColIdx,
  1014  					)
  1015  					// Window peer grouper will append a boolean column.
  1016  					tempColOffset++
  1017  					typs = typs[:len(typs)+1]
  1018  					typs[len(typs)-1] = types.Bool
  1019  				}
  1020  
  1021  				outputIdx := int(wf.OutputColIdx + tempColOffset)
  1022  				switch windowFn {
  1023  				case execinfrapb.WindowerSpec_ROW_NUMBER:
  1024  					result.Op = NewRowNumberOperator(streamingAllocator, input, outputIdx, partitionColIdx)
  1025  				case execinfrapb.WindowerSpec_RANK, execinfrapb.WindowerSpec_DENSE_RANK:
  1026  					result.Op, err = NewRankOperator(
  1027  						streamingAllocator, input, windowFn, wf.Ordering.Columns,
  1028  						outputIdx, partitionColIdx, peersColIdx,
  1029  					)
  1030  				case execinfrapb.WindowerSpec_PERCENT_RANK, execinfrapb.WindowerSpec_CUME_DIST:
  1031  					// We are using an unlimited memory monitor here because
  1032  					// relative rank operators themselves are responsible for
  1033  					// making sure that we stay within the memory limit, and
  1034  					// they will fall back to disk if necessary.
  1035  					memAccName := memMonitorsPrefix + "relative-rank"
  1036  					unlimitedAllocator := colmem.NewAllocator(
  1037  						ctx, result.createBufferingUnlimitedMemAccount(ctx, flowCtx, memAccName), factory,
  1038  					)
  1039  					diskAcc := result.createDiskAccount(ctx, flowCtx, memAccName)
  1040  					result.Op, err = NewRelativeRankOperator(
  1041  						unlimitedAllocator, execinfra.GetWorkMemLimit(flowCtx.Cfg), args.DiskQueueCfg,
  1042  						args.FDSemaphore, input, typs, windowFn, wf.Ordering.Columns,
  1043  						outputIdx, partitionColIdx, peersColIdx, diskAcc,
  1044  					)
  1045  					// NewRelativeRankOperator sometimes returns a constOp when there
  1046  					// are no ordering columns, so we check that the returned operator
  1047  					// is an IdempotentCloser.
  1048  					if c, ok := result.Op.(IdempotentCloser); ok {
  1049  						result.ToClose = append(result.ToClose, c)
  1050  					}
  1051  				default:
  1052  					return result, errors.AssertionFailedf("window function %s is not supported", wf.String())
  1053  				}
  1054  
  1055  				if tempColOffset > 0 {
  1056  					// We want to project out temporary columns (which have indices in the
  1057  					// range [wf.OutputColIdx, wf.OutputColIdx+tempColOffset)).
  1058  					projection := make([]uint32, 0, wf.OutputColIdx+tempColOffset)
  1059  					for i := uint32(0); i < wf.OutputColIdx; i++ {
  1060  						projection = append(projection, i)
  1061  					}
  1062  					projection = append(projection, wf.OutputColIdx+tempColOffset)
  1063  					result.Op = NewSimpleProjectOp(result.Op, int(wf.OutputColIdx+tempColOffset), projection)
  1064  				}
  1065  
  1066  				_, returnType, err := execinfrapb.GetWindowFunctionInfo(wf.Func, []*types.T{}...)
  1067  				if err != nil {
  1068  					return result, err
  1069  				}
  1070  				result.ColumnTypes = appendOneType(result.ColumnTypes, returnType)
  1071  				input = result.Op
  1072  			}
  1073  
  1074  		default:
  1075  			return result, errors.Newf("unsupported processor core %q", core)
  1076  		}
  1077  	}
  1078  
  1079  	if err != nil {
  1080  		return result, err
  1081  	}
  1082  
  1083  	// After constructing the base operator, calculate its internal memory usage.
  1084  	if sMem, ok := result.Op.(InternalMemoryOperator); ok {
  1085  		result.InternalMemUsage += sMem.InternalMemoryUsage()
  1086  	}
  1087  	log.VEventf(ctx, 1, "made op %T\n", result.Op)
  1088  
  1089  	// Note: at this point, it is legal for ColumnTypes to be empty (it is
  1090  	// legal for empty rows to be passed between processors).
  1091  
  1092  	ppr := postProcessResult{
  1093  		Op:          result.Op,
  1094  		ColumnTypes: result.ColumnTypes,
  1095  	}
  1096  	err = ppr.planPostProcessSpec(ctx, flowCtx, post, streamingMemAccount, factory)
  1097  	// TODO(yuzefovich): update unit tests to remove panic-catcher when fallback
  1098  	// to rowexec is not allowed.
  1099  	if err != nil && processorConstructor == nil {
  1100  		// Do not attempt to wrap as a row source if there is no
  1101  		// processorConstructor because it would fail.
  1102  		return result, err
  1103  	}
  1104  
  1105  	if err != nil {
  1106  		log.VEventf(
  1107  			ctx, 2,
  1108  			"vectorized post process planning failed with error %v post spec is %s, attempting to wrap as a row source",
  1109  			err, post,
  1110  		)
  1111  		if core.TableReader != nil {
  1112  			// We cannot naively wrap a TableReader's post-processing spec since it
  1113  			// might project out unneeded columns that are of unsupported types. These
  1114  			// columns are still returned, either as coltypes.Unhandled if the type is
  1115  			// unsupported, or as an empty column of a supported type. If we were to
  1116  			// wrap an unsupported post-processing spec, a Materializer would naively
  1117  			// decode these columns, which would return errors (e.g. UUIDs require 16
  1118  			// bytes, coltypes.Unhandled may not be decoded).
  1119  			inputTypes := make([][]*types.T, len(spec.Input))
  1120  			for inputIdx, input := range spec.Input {
  1121  				inputTypes[inputIdx] = make([]*types.T, len(input.ColumnTypes))
  1122  				copy(inputTypes[inputIdx], input.ColumnTypes)
  1123  			}
  1124  			result.resetToState(ctx, resultPreSpecPlanningStateShallowCopy)
  1125  			err = result.createAndWrapRowSource(
  1126  				ctx, flowCtx, inputs, inputTypes, streamingMemAccount, spec, processorConstructor, factory,
  1127  			)
  1128  			if err != nil {
  1129  				// There was an error wrapping the TableReader.
  1130  				return result, err
  1131  			}
  1132  		} else {
  1133  			err = result.wrapPostProcessSpec(ctx, flowCtx, post, streamingMemAccount, processorConstructor, factory)
  1134  		}
  1135  	} else {
  1136  		// The result can be updated with the post process result.
  1137  		result.updateWithPostProcessResult(ppr)
  1138  	}
  1139  	return result, err
  1140  }
  1141  
  1142  // planAndMaybeWrapOnExprAsFilter plans a joiner ON expression as a filter. If
  1143  // the filter is unsupported, it is planned as a wrapped noop processor with
  1144  // the filter as a post-processing stage.
  1145  func (r *NewColOperatorResult) planAndMaybeWrapOnExprAsFilter(
  1146  	ctx context.Context,
  1147  	flowCtx *execinfra.FlowCtx,
  1148  	onExpr execinfrapb.Expression,
  1149  	streamingMemAccount *mon.BoundAccount,
  1150  	processorConstructor execinfra.ProcessorConstructor,
  1151  	factory coldata.ColumnFactory,
  1152  ) error {
  1153  	// We will plan other Operators on top of r.Op, so we need to account for the
  1154  	// internal memory explicitly.
  1155  	if internalMemOp, ok := r.Op.(InternalMemoryOperator); ok {
  1156  		r.InternalMemUsage += internalMemOp.InternalMemoryUsage()
  1157  	}
  1158  	ppr := postProcessResult{
  1159  		Op:          r.Op,
  1160  		ColumnTypes: r.ColumnTypes,
  1161  	}
  1162  	if err := ppr.planFilterExpr(
  1163  		ctx, flowCtx.NewEvalCtx(), onExpr, streamingMemAccount, factory,
  1164  	); err != nil {
  1165  		// ON expression planning failed. Fall back to planning the filter
  1166  		// using row execution.
  1167  		log.VEventf(
  1168  			ctx, 2,
  1169  			"vectorized join ON expr planning failed with error %v ON expr is %s, attempting to wrap as a row source",
  1170  			err, onExpr.String(),
  1171  		)
  1172  
  1173  		onExprAsFilter := &execinfrapb.PostProcessSpec{Filter: onExpr}
  1174  		return r.wrapPostProcessSpec(ctx, flowCtx, onExprAsFilter, streamingMemAccount, processorConstructor, factory)
  1175  	}
  1176  	r.updateWithPostProcessResult(ppr)
  1177  	return nil
  1178  }
  1179  
  1180  // wrapPostProcessSpec plans the given post process spec by wrapping a noop
  1181  // processor with that output spec. This is used to fall back to row execution
  1182  // when encountering unsupported post processing specs. An error is returned
  1183  // if the wrapping failed. A reason for this could be an unsupported type, in
  1184  // which case the row execution engine is used fully.
  1185  func (r *NewColOperatorResult) wrapPostProcessSpec(
  1186  	ctx context.Context,
  1187  	flowCtx *execinfra.FlowCtx,
  1188  	post *execinfrapb.PostProcessSpec,
  1189  	streamingMemAccount *mon.BoundAccount,
  1190  	processorConstructor execinfra.ProcessorConstructor,
  1191  	factory coldata.ColumnFactory,
  1192  ) error {
  1193  	noopSpec := &execinfrapb.ProcessorSpec{
  1194  		Core: execinfrapb.ProcessorCoreUnion{
  1195  			Noop: &execinfrapb.NoopCoreSpec{},
  1196  		},
  1197  		Post: *post,
  1198  	}
  1199  	return r.createAndWrapRowSource(
  1200  		ctx, flowCtx, []colexecbase.Operator{r.Op}, [][]*types.T{r.ColumnTypes},
  1201  		streamingMemAccount, noopSpec, processorConstructor, factory,
  1202  	)
  1203  }
  1204  
  1205  // planPostProcessSpec plans the post processing stage specified in post on top
  1206  // of r.Op.
  1207  func (r *postProcessResult) planPostProcessSpec(
  1208  	ctx context.Context,
  1209  	flowCtx *execinfra.FlowCtx,
  1210  	post *execinfrapb.PostProcessSpec,
  1211  	streamingMemAccount *mon.BoundAccount,
  1212  	factory coldata.ColumnFactory,
  1213  ) error {
  1214  	if !post.Filter.Empty() {
  1215  		if err := r.planFilterExpr(
  1216  			ctx, flowCtx.NewEvalCtx(), post.Filter, streamingMemAccount, factory,
  1217  		); err != nil {
  1218  			return err
  1219  		}
  1220  	}
  1221  
  1222  	if post.Projection {
  1223  		r.addProjection(post.OutputColumns)
  1224  	} else if post.RenderExprs != nil {
  1225  		log.VEventf(ctx, 2, "planning render expressions %+v", post.RenderExprs)
  1226  		var renderedCols []uint32
  1227  		for _, expr := range post.RenderExprs {
  1228  			var (
  1229  				helper            execinfra.ExprHelper
  1230  				renderInternalMem int
  1231  			)
  1232  			err := helper.Init(expr, r.ColumnTypes, flowCtx.EvalCtx)
  1233  			if err != nil {
  1234  				return err
  1235  			}
  1236  			var outputIdx int
  1237  			r.Op, outputIdx, r.ColumnTypes, renderInternalMem, err = planProjectionOperators(
  1238  				ctx, flowCtx.NewEvalCtx(), helper.Expr, r.ColumnTypes, r.Op, streamingMemAccount, factory,
  1239  			)
  1240  			if err != nil {
  1241  				return errors.Wrapf(err, "unable to columnarize render expression %q", expr)
  1242  			}
  1243  			if outputIdx < 0 {
  1244  				return errors.AssertionFailedf("missing outputIdx")
  1245  			}
  1246  			r.InternalMemUsage += renderInternalMem
  1247  			renderedCols = append(renderedCols, uint32(outputIdx))
  1248  		}
  1249  		r.Op = NewSimpleProjectOp(r.Op, len(r.ColumnTypes), renderedCols)
  1250  		newTypes := make([]*types.T, len(renderedCols))
  1251  		for i, j := range renderedCols {
  1252  			newTypes[i] = r.ColumnTypes[j]
  1253  		}
  1254  		r.ColumnTypes = newTypes
  1255  	}
  1256  	if post.Offset != 0 {
  1257  		r.Op = NewOffsetOp(r.Op, int(post.Offset))
  1258  	}
  1259  	if post.Limit != 0 {
  1260  		r.Op = NewLimitOp(r.Op, int(post.Limit))
  1261  	}
  1262  	return nil
  1263  }
  1264  
  1265  // createBufferingUnlimitedMemMonitor instantiates an unlimited memory monitor.
  1266  // These should only be used when spilling to disk and an operator is made aware
  1267  // of a memory usage limit separately.
  1268  // The receiver is updated to have a reference to the unlimited memory monitor.
  1269  func (r *NewColOperatorResult) createBufferingUnlimitedMemMonitor(
  1270  	ctx context.Context, flowCtx *execinfra.FlowCtx, name string,
  1271  ) *mon.BytesMonitor {
  1272  	bufferingOpUnlimitedMemMonitor := execinfra.NewMonitor(
  1273  		ctx, flowCtx.EvalCtx.Mon, name+"-unlimited",
  1274  	)
  1275  	r.OpMonitors = append(r.OpMonitors, bufferingOpUnlimitedMemMonitor)
  1276  	return bufferingOpUnlimitedMemMonitor
  1277  }
  1278  
  1279  // createMemAccountForSpillStrategy instantiates a memory monitor and a memory
  1280  // account to be used with a buffering Operator that can fall back to disk.
  1281  // The default memory limit is used, if flowCtx.Cfg.ForceDiskSpill is used, this
  1282  // will be 1. The receiver is updated to have references to both objects.
  1283  func (r *NewColOperatorResult) createMemAccountForSpillStrategy(
  1284  	ctx context.Context, flowCtx *execinfra.FlowCtx, name string,
  1285  ) *mon.BoundAccount {
  1286  	bufferingOpMemMonitor := execinfra.NewLimitedMonitor(
  1287  		ctx, flowCtx.EvalCtx.Mon, flowCtx.Cfg, name+"-limited",
  1288  	)
  1289  	r.OpMonitors = append(r.OpMonitors, bufferingOpMemMonitor)
  1290  	bufferingMemAccount := bufferingOpMemMonitor.MakeBoundAccount()
  1291  	r.OpAccounts = append(r.OpAccounts, &bufferingMemAccount)
  1292  	return &bufferingMemAccount
  1293  }
  1294  
  1295  // createBufferingUnlimitedMemAccount instantiates an unlimited memory monitor
  1296  // and a memory account to be used with a buffering disk-backed Operator. The
  1297  // receiver is updated to have references to both objects. Note that the
  1298  // returned account is only "unlimited" in that it does not have a hard limit
  1299  // that it enforces, but a limit might be enforced by a root monitor.
  1300  func (r *NewColOperatorResult) createBufferingUnlimitedMemAccount(
  1301  	ctx context.Context, flowCtx *execinfra.FlowCtx, name string,
  1302  ) *mon.BoundAccount {
  1303  	bufferingOpUnlimitedMemMonitor := r.createBufferingUnlimitedMemMonitor(ctx, flowCtx, name)
  1304  	bufferingMemAccount := bufferingOpUnlimitedMemMonitor.MakeBoundAccount()
  1305  	r.OpAccounts = append(r.OpAccounts, &bufferingMemAccount)
  1306  	return &bufferingMemAccount
  1307  }
  1308  
  1309  // createStandaloneMemAccount instantiates an unlimited memory monitor and a
  1310  // memory account that have a standalone budget. This means that the memory
  1311  // registered with these objects is *not* reported to the root monitor (i.e.
  1312  // it will not count towards max-sql-memory). Use it only when the memory in
  1313  // use is accounted for with a different memory monitor. The receiver is
  1314  // updated to have references to both objects.
  1315  func (r *NewColOperatorResult) createStandaloneMemAccount(
  1316  	ctx context.Context, flowCtx *execinfra.FlowCtx, name string,
  1317  ) *mon.BoundAccount {
  1318  	standaloneMemMonitor := mon.MakeMonitor(
  1319  		name+"-standalone",
  1320  		mon.MemoryResource,
  1321  		nil,           /* curCount */
  1322  		nil,           /* maxHist */
  1323  		-1,            /* increment: use default increment */
  1324  		math.MaxInt64, /* noteworthy */
  1325  		flowCtx.Cfg.Settings,
  1326  	)
  1327  	r.OpMonitors = append(r.OpMonitors, &standaloneMemMonitor)
  1328  	standaloneMemMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(math.MaxInt64))
  1329  	standaloneMemAccount := standaloneMemMonitor.MakeBoundAccount()
  1330  	r.OpAccounts = append(r.OpAccounts, &standaloneMemAccount)
  1331  	return &standaloneMemAccount
  1332  }
  1333  
  1334  // createDiskAccount instantiates an unlimited disk monitor and a disk account
  1335  // to be used for disk spilling infrastructure in vectorized engine.
  1336  // TODO(azhng): consolidates all allocation monitors/account manage into one
  1337  // place after branch cut for 20.1.
  1338  func (r *NewColOperatorResult) createDiskAccount(
  1339  	ctx context.Context, flowCtx *execinfra.FlowCtx, name string,
  1340  ) *mon.BoundAccount {
  1341  	opDiskMonitor := execinfra.NewMonitor(ctx, flowCtx.Cfg.DiskMonitor, name)
  1342  	r.OpMonitors = append(r.OpMonitors, opDiskMonitor)
  1343  	opDiskAccount := opDiskMonitor.MakeBoundAccount()
  1344  	r.OpAccounts = append(r.OpAccounts, &opDiskAccount)
  1345  	return &opDiskAccount
  1346  }
  1347  
  1348  type postProcessResult struct {
  1349  	Op               colexecbase.Operator
  1350  	ColumnTypes      []*types.T
  1351  	InternalMemUsage int
  1352  }
  1353  
  1354  func (r *NewColOperatorResult) updateWithPostProcessResult(ppr postProcessResult) {
  1355  	r.Op = ppr.Op
  1356  	r.ColumnTypes = make([]*types.T, len(ppr.ColumnTypes))
  1357  	copy(r.ColumnTypes, ppr.ColumnTypes)
  1358  	r.InternalMemUsage += ppr.InternalMemUsage
  1359  }
  1360  
  1361  func (r *postProcessResult) planFilterExpr(
  1362  	ctx context.Context,
  1363  	evalCtx *tree.EvalContext,
  1364  	filter execinfrapb.Expression,
  1365  	acc *mon.BoundAccount,
  1366  	factory coldata.ColumnFactory,
  1367  ) error {
  1368  	var (
  1369  		helper               execinfra.ExprHelper
  1370  		selectionInternalMem int
  1371  	)
  1372  	err := helper.Init(filter, r.ColumnTypes, evalCtx)
  1373  	if err != nil {
  1374  		return err
  1375  	}
  1376  	if helper.Expr == tree.DNull {
  1377  		// The filter expression is tree.DNull meaning that it is always false, so
  1378  		// we put a zero operator.
  1379  		r.Op = NewZeroOp(r.Op)
  1380  		return nil
  1381  	}
  1382  	var filterColumnTypes []*types.T
  1383  	r.Op, _, filterColumnTypes, selectionInternalMem, err = planSelectionOperators(
  1384  		ctx, evalCtx, helper.Expr, r.ColumnTypes, r.Op, acc, factory,
  1385  	)
  1386  	if err != nil {
  1387  		return errors.Wrapf(err, "unable to columnarize filter expression %q", filter.Expr)
  1388  	}
  1389  	r.InternalMemUsage += selectionInternalMem
  1390  	if len(filterColumnTypes) > len(r.ColumnTypes) {
  1391  		// Additional columns were appended to store projections while evaluating
  1392  		// the filter. Project them away.
  1393  		var outputColumns []uint32
  1394  		for i := range r.ColumnTypes {
  1395  			outputColumns = append(outputColumns, uint32(i))
  1396  		}
  1397  		r.Op = NewSimpleProjectOp(r.Op, len(filterColumnTypes), outputColumns)
  1398  	}
  1399  	return nil
  1400  }
  1401  
  1402  // addProjection adds a simple projection to r (Op and ColumnTypes are updated
  1403  // accordingly).
  1404  func (r *postProcessResult) addProjection(projection []uint32) {
  1405  	r.Op = NewSimpleProjectOp(r.Op, len(r.ColumnTypes), projection)
  1406  	// Update output ColumnTypes.
  1407  	newTypes := make([]*types.T, len(projection))
  1408  	for i, j := range projection {
  1409  		newTypes[i] = r.ColumnTypes[j]
  1410  	}
  1411  	r.ColumnTypes = newTypes
  1412  }
  1413  
  1414  func planSelectionOperators(
  1415  	ctx context.Context,
  1416  	evalCtx *tree.EvalContext,
  1417  	expr tree.TypedExpr,
  1418  	columnTypes []*types.T,
  1419  	input colexecbase.Operator,
  1420  	acc *mon.BoundAccount,
  1421  	factory coldata.ColumnFactory,
  1422  ) (op colexecbase.Operator, resultIdx int, typs []*types.T, internalMemUsed int, err error) {
  1423  	switch t := expr.(type) {
  1424  	case *tree.IndexedVar:
  1425  		op, err = boolOrUnknownToSelOp(input, columnTypes, t.Idx)
  1426  		return op, -1, columnTypes, internalMemUsed, err
  1427  	case *tree.AndExpr:
  1428  		// AND expressions are handled by an implicit AND'ing of selection vectors.
  1429  		// First we select out the tuples that are true on the left side, and then,
  1430  		// only among the matched tuples, we select out the tuples that are true on
  1431  		// the right side.
  1432  		var leftOp, rightOp colexecbase.Operator
  1433  		var internalMemUsedLeft, internalMemUsedRight int
  1434  		leftOp, _, typs, internalMemUsedLeft, err = planSelectionOperators(
  1435  			ctx, evalCtx, t.TypedLeft(), columnTypes, input, acc, factory,
  1436  		)
  1437  		if err != nil {
  1438  			return nil, resultIdx, typs, internalMemUsed, err
  1439  		}
  1440  		rightOp, resultIdx, typs, internalMemUsedRight, err = planSelectionOperators(
  1441  			ctx, evalCtx, t.TypedRight(), typs, leftOp, acc, factory,
  1442  		)
  1443  		return rightOp, resultIdx, typs, internalMemUsedLeft + internalMemUsedRight, err
  1444  	case *tree.OrExpr:
  1445  		// OR expressions are handled by converting them to an equivalent CASE
  1446  		// statement. Since CASE statements don't have a selection form, plan a
  1447  		// projection and then convert the resulting boolean to a selection vector.
  1448  		//
  1449  		// Rewrite the OR expression as an equivalent CASE expression.
  1450  		// "a OR b" becomes "CASE WHEN a THEN true WHEN b THEN true ELSE false END".
  1451  		// This way we can take advantage of the short-circuiting logic built into
  1452  		// the CASE operator. (b should not be evaluated if a is true.)
  1453  		caseExpr, err := tree.NewTypedCaseExpr(
  1454  			nil, /* expr */
  1455  			[]*tree.When{
  1456  				{Cond: t.Left, Val: tree.DBoolTrue},
  1457  				{Cond: t.Right, Val: tree.DBoolTrue},
  1458  			},
  1459  			tree.DBoolFalse,
  1460  			types.Bool)
  1461  		if err != nil {
  1462  			return nil, resultIdx, typs, internalMemUsed, err
  1463  		}
  1464  		op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
  1465  			ctx, evalCtx, caseExpr, columnTypes, input, acc, factory,
  1466  		)
  1467  		if err != nil {
  1468  			return nil, resultIdx, typs, internalMemUsed, err
  1469  		}
  1470  		op, err = boolOrUnknownToSelOp(op, typs, resultIdx)
  1471  		return op, resultIdx, typs, internalMemUsed, err
  1472  	case *tree.CaseExpr:
  1473  		op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
  1474  			ctx, evalCtx, expr, columnTypes, input, acc, factory,
  1475  		)
  1476  		if err != nil {
  1477  			return op, resultIdx, typs, internalMemUsed, err
  1478  		}
  1479  		op, err = boolOrUnknownToSelOp(op, typs, resultIdx)
  1480  		return op, resultIdx, typs, internalMemUsed, err
  1481  	case *tree.IsNullExpr:
  1482  		op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
  1483  			ctx, evalCtx, t.TypedInnerExpr(), columnTypes, input, acc, factory,
  1484  		)
  1485  		op = newIsNullSelOp(op, resultIdx, false)
  1486  		return op, resultIdx, typs, internalMemUsed, err
  1487  	case *tree.IsNotNullExpr:
  1488  		op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
  1489  			ctx, evalCtx, t.TypedInnerExpr(), columnTypes, input, acc, factory,
  1490  		)
  1491  		op = newIsNullSelOp(op, resultIdx, true)
  1492  		return op, resultIdx, typs, internalMemUsed, err
  1493  	case *tree.ComparisonExpr:
  1494  		cmpOp := t.Operator
  1495  		leftOp, leftIdx, ct, internalMemUsedLeft, err := planProjectionOperators(
  1496  			ctx, evalCtx, t.TypedLeft(), columnTypes, input, acc, factory,
  1497  		)
  1498  		if err != nil {
  1499  			return nil, resultIdx, ct, internalMemUsed, err
  1500  		}
  1501  		lTyp := ct[leftIdx]
  1502  		if constArg, ok := t.Right.(tree.Datum); ok {
  1503  			if t.Operator == tree.Like || t.Operator == tree.NotLike {
  1504  				negate := t.Operator == tree.NotLike
  1505  				op, err = GetLikeOperator(
  1506  					evalCtx, leftOp, leftIdx, string(tree.MustBeDString(constArg)), negate)
  1507  				return op, resultIdx, ct, internalMemUsedLeft, err
  1508  			}
  1509  			if t.Operator == tree.In || t.Operator == tree.NotIn {
  1510  				negate := t.Operator == tree.NotIn
  1511  				datumTuple, ok := tree.AsDTuple(constArg)
  1512  				if !ok {
  1513  					err = errors.Errorf("IN is only supported for constant expressions")
  1514  					return nil, resultIdx, ct, internalMemUsed, err
  1515  				}
  1516  				op, err = GetInOperator(lTyp, leftOp, leftIdx, datumTuple, negate)
  1517  				return op, resultIdx, ct, internalMemUsedLeft, err
  1518  			}
  1519  			if t.Operator == tree.IsDistinctFrom || t.Operator == tree.IsNotDistinctFrom {
  1520  				if t.Right != tree.DNull {
  1521  					err = errors.Errorf("IS DISTINCT FROM and IS NOT DISTINCT FROM are supported only with NULL argument")
  1522  					return nil, resultIdx, ct, internalMemUsed, err
  1523  				}
  1524  				// IS NOT DISTINCT FROM NULL is synonymous with IS NULL and IS
  1525  				// DISTINCT FROM NULL is synonymous with IS NOT NULL (except for
  1526  				// tuples). Therefore, negate when the operator is IS DISTINCT
  1527  				// FROM NULL.
  1528  				negate := t.Operator == tree.IsDistinctFrom
  1529  				op = newIsNullSelOp(leftOp, leftIdx, negate)
  1530  				return op, resultIdx, ct, internalMemUsedLeft, err
  1531  			}
  1532  			op, err := GetSelectionConstOperator(
  1533  				lTyp, t.TypedRight().ResolvedType(), cmpOp, leftOp, leftIdx,
  1534  				constArg, overloadHelper{},
  1535  			)
  1536  			return op, resultIdx, ct, internalMemUsedLeft, err
  1537  		}
  1538  		rightOp, rightIdx, ct, internalMemUsedRight, err := planProjectionOperators(
  1539  			ctx, evalCtx, t.TypedRight(), ct, leftOp, acc, factory,
  1540  		)
  1541  		if err != nil {
  1542  			return nil, resultIdx, ct, internalMemUsed, err
  1543  		}
  1544  		op, err := GetSelectionOperator(
  1545  			lTyp, ct[rightIdx], cmpOp, rightOp, leftIdx, rightIdx,
  1546  			overloadHelper{},
  1547  		)
  1548  		return op, resultIdx, ct, internalMemUsedLeft + internalMemUsedRight, err
  1549  	default:
  1550  		return nil, resultIdx, nil, internalMemUsed, errors.Errorf("unhandled selection expression type: %s", reflect.TypeOf(t))
  1551  	}
  1552  }
  1553  
  1554  func checkCastSupported(fromType, toType *types.T) error {
  1555  	switch toType.Family() {
  1556  	case types.DecimalFamily:
  1557  		// If we're casting to a decimal, we're only allowing casting from the
  1558  		// decimal of the same precision due to the fact that we're losing
  1559  		// precision information once we start operating on coltypes.T. For
  1560  		// such casts we will fallback to row-by-row engine.
  1561  		// TODO(yuzefovich): coltypes.T type system has been removed,
  1562  		// reevaluate the situation.
  1563  		if !fromType.Identical(toType) {
  1564  			return errors.New("decimal casts with rounding unsupported")
  1565  		}
  1566  	}
  1567  	return nil
  1568  }
  1569  
  1570  // planCastOperator plans a CAST operator that casts the column at index
  1571  // 'inputIdx' coming from input of type 'fromType' into a column of type
  1572  // 'toType' that will be output at index 'resultIdx'.
  1573  func planCastOperator(
  1574  	ctx context.Context,
  1575  	acc *mon.BoundAccount,
  1576  	columnTypes []*types.T,
  1577  	input colexecbase.Operator,
  1578  	inputIdx int,
  1579  	fromType *types.T,
  1580  	toType *types.T,
  1581  	factory coldata.ColumnFactory,
  1582  ) (op colexecbase.Operator, resultIdx int, typs []*types.T, err error) {
  1583  	if err := checkCastSupported(fromType, toType); err != nil {
  1584  		return op, resultIdx, typs, err
  1585  	}
  1586  	outputIdx := len(columnTypes)
  1587  	op, err = GetCastOperator(colmem.NewAllocator(ctx, acc, factory), input, inputIdx, outputIdx, fromType, toType)
  1588  	typs = appendOneType(columnTypes, toType)
  1589  	return op, outputIdx, typs, err
  1590  }
  1591  
  1592  // planProjectionOperators plans a chain of operators to execute the provided
  1593  // expression. It returns the tail of the chain, as well as the column index
  1594  // of the expression's result (if any, otherwise -1) and the column types of the
  1595  // resulting batches.
  1596  func planProjectionOperators(
  1597  	ctx context.Context,
  1598  	evalCtx *tree.EvalContext,
  1599  	expr tree.TypedExpr,
  1600  	columnTypes []*types.T,
  1601  	input colexecbase.Operator,
  1602  	acc *mon.BoundAccount,
  1603  	factory coldata.ColumnFactory,
  1604  ) (op colexecbase.Operator, resultIdx int, typs []*types.T, internalMemUsed int, err error) {
  1605  	resultIdx = -1
  1606  	switch t := expr.(type) {
  1607  	case *tree.IndexedVar:
  1608  		return input, t.Idx, columnTypes, internalMemUsed, nil
  1609  	case *tree.ComparisonExpr:
  1610  		return planProjectionExpr(
  1611  			ctx, evalCtx, t.Operator, t.ResolvedType(), t.TypedLeft(), t.TypedRight(),
  1612  			columnTypes, input, acc, factory, overloadHelper{},
  1613  		)
  1614  	case *tree.BinaryExpr:
  1615  		if err = checkSupportedBinaryExpr(t.TypedLeft(), t.TypedRight(), t.ResolvedType()); err != nil {
  1616  			return op, resultIdx, typs, internalMemUsed, err
  1617  		}
  1618  		return planProjectionExpr(
  1619  			ctx, evalCtx, t.Operator, t.ResolvedType(), t.TypedLeft(), t.TypedRight(),
  1620  			columnTypes, input, acc, factory, overloadHelper{binFn: t.Fn},
  1621  		)
  1622  	case *tree.IsNullExpr:
  1623  		t.TypedInnerExpr()
  1624  		return planIsNullProjectionOp(ctx, evalCtx, t.ResolvedType(), t.TypedInnerExpr(), columnTypes, input, acc, false /* negate */, factory)
  1625  	case *tree.IsNotNullExpr:
  1626  		return planIsNullProjectionOp(ctx, evalCtx, t.ResolvedType(), t.TypedInnerExpr(), columnTypes, input, acc, true /* negate */, factory)
  1627  	case *tree.CastExpr:
  1628  		expr := t.Expr.(tree.TypedExpr)
  1629  		op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
  1630  			ctx, evalCtx, expr, columnTypes, input, acc, factory,
  1631  		)
  1632  		if err != nil {
  1633  			return nil, 0, nil, internalMemUsed, err
  1634  		}
  1635  		op, resultIdx, typs, err = planCastOperator(ctx, acc, typs, op, resultIdx, expr.ResolvedType(), t.ResolvedType(), factory)
  1636  		return op, resultIdx, typs, internalMemUsed, err
  1637  	case *tree.FuncExpr:
  1638  		var (
  1639  			inputCols             []int
  1640  			projectionInternalMem int
  1641  		)
  1642  		typs = make([]*types.T, len(columnTypes))
  1643  		copy(typs, columnTypes)
  1644  		op = input
  1645  		for _, e := range t.Exprs {
  1646  			var err error
  1647  			// TODO(rohany): This could be done better, especially in the case of
  1648  			// constant arguments, because the vectorized engine right now
  1649  			// creates a new column full of the constant value.
  1650  			op, resultIdx, typs, projectionInternalMem, err = planProjectionOperators(
  1651  				ctx, evalCtx, e.(tree.TypedExpr), typs, op, acc, factory,
  1652  			)
  1653  			if err != nil {
  1654  				return nil, resultIdx, nil, internalMemUsed, err
  1655  			}
  1656  			inputCols = append(inputCols, resultIdx)
  1657  			internalMemUsed += projectionInternalMem
  1658  		}
  1659  		resultIdx = len(typs)
  1660  		op, err = NewBuiltinFunctionOperator(
  1661  			colmem.NewAllocator(ctx, acc, factory), evalCtx, t, typs, inputCols, resultIdx, op,
  1662  		)
  1663  		typs = appendOneType(typs, t.ResolvedType())
  1664  		return op, resultIdx, typs, internalMemUsed, err
  1665  	case tree.Datum:
  1666  		datumType := t.ResolvedType()
  1667  		resultIdx = len(columnTypes)
  1668  		typs = appendOneType(columnTypes, datumType)
  1669  		if datumType.Family() == types.UnknownFamily {
  1670  			// We handle Unknown type by planning a special constant null
  1671  			// operator.
  1672  			op = NewConstNullOp(colmem.NewAllocator(ctx, acc, factory), input, resultIdx)
  1673  			return op, resultIdx, typs, internalMemUsed, nil
  1674  		}
  1675  		constVal, err := getDatumToPhysicalFn(datumType)(t)
  1676  		if err != nil {
  1677  			return nil, resultIdx, typs, internalMemUsed, err
  1678  		}
  1679  		op, err := NewConstOp(colmem.NewAllocator(ctx, acc, factory), input, datumType, constVal, resultIdx)
  1680  		if err != nil {
  1681  			return nil, resultIdx, typs, internalMemUsed, err
  1682  		}
  1683  		return op, resultIdx, typs, internalMemUsed, nil
  1684  	case *tree.CaseExpr:
  1685  		if t.Expr != nil {
  1686  			return nil, resultIdx, typs, internalMemUsed, errors.New("CASE <expr> WHEN expressions unsupported")
  1687  		}
  1688  
  1689  		allocator := colmem.NewAllocator(ctx, acc, factory)
  1690  		caseOutputType := t.ResolvedType()
  1691  		if typeconv.TypeFamilyToCanonicalTypeFamily(caseOutputType.Family()) == types.BytesFamily {
  1692  			// Currently, there is a contradiction between the way CASE operator
  1693  			// works (which populates its output in arbitrary order) and the flat
  1694  			// bytes implementation of Bytes type (which prohibits sets in arbitrary
  1695  			// order), so we reject such scenario to fall back to row-by-row engine.
  1696  			return nil, resultIdx, typs, internalMemUsed, errors.Newf(
  1697  				"unsupported type %s in CASE operator", caseOutputType)
  1698  		}
  1699  		caseOutputIdx := len(columnTypes)
  1700  		// We don't know the schema yet and will update it below, right before
  1701  		// instantiating caseOp. The same goes for subsetEndIdx.
  1702  		schemaEnforcer := newBatchSchemaSubsetEnforcer(
  1703  			allocator, input, nil /* typs */, caseOutputIdx, -1, /* subsetEndIdx */
  1704  		)
  1705  		buffer := NewBufferOp(schemaEnforcer)
  1706  		caseOps := make([]colexecbase.Operator, len(t.Whens))
  1707  		typs = appendOneType(columnTypes, caseOutputType)
  1708  		thenIdxs := make([]int, len(t.Whens)+1)
  1709  		for i, when := range t.Whens {
  1710  			// The case operator is assembled from n WHEN arms, n THEN arms, and an
  1711  			// ELSE arm. Each WHEN arm is a boolean projection. Each THEN arm (and the
  1712  			// ELSE arm) is a projection of the type of the CASE expression. We set up
  1713  			// each WHEN arm to write its output to a fresh column, and likewise for
  1714  			// the THEN arms and the ELSE arm. Each WHEN arm individually acts on the
  1715  			// single input batch from the CaseExpr's input and is then transformed
  1716  			// into a selection vector, after which the THEN arm runs to create the
  1717  			// output just for the tuples that matched the WHEN arm. Each subsequent
  1718  			// WHEN arm will use the inverse of the selection vector to avoid running
  1719  			// the WHEN projection on tuples that have already been matched by a
  1720  			// previous WHEN arm. Finally, after each WHEN arm runs, we copy the
  1721  			// results of the WHEN into a single output vector, assembling the final
  1722  			// result of the case projection.
  1723  			whenTyped := when.Cond.(tree.TypedExpr)
  1724  			var whenInternalMemUsed, thenInternalMemUsed int
  1725  			caseOps[i], resultIdx, typs, whenInternalMemUsed, err = planProjectionOperators(
  1726  				ctx, evalCtx, whenTyped, typs, buffer, acc, factory,
  1727  			)
  1728  			if err != nil {
  1729  				return nil, resultIdx, typs, internalMemUsed, err
  1730  			}
  1731  			caseOps[i], err = boolOrUnknownToSelOp(caseOps[i], typs, resultIdx)
  1732  			if err != nil {
  1733  				return nil, resultIdx, typs, internalMemUsed, err
  1734  			}
  1735  
  1736  			// Run the "then" clause on those tuples that were selected.
  1737  			caseOps[i], thenIdxs[i], typs, thenInternalMemUsed, err = planProjectionOperators(
  1738  				ctx, evalCtx, when.Val.(tree.TypedExpr), typs, caseOps[i], acc, factory,
  1739  			)
  1740  			if err != nil {
  1741  				return nil, resultIdx, typs, internalMemUsed, err
  1742  			}
  1743  			internalMemUsed += whenInternalMemUsed + thenInternalMemUsed
  1744  			if !typs[thenIdxs[i]].Identical(typs[caseOutputIdx]) {
  1745  				// It is possible that the projection of this THEN arm has different
  1746  				// column type (for example, we expect INT2, but INT8 is given). In
  1747  				// such case, we need to plan a cast.
  1748  				fromType, toType := typs[thenIdxs[i]], typs[caseOutputIdx]
  1749  				caseOps[i], thenIdxs[i], typs, err = planCastOperator(
  1750  					ctx, acc, typs, caseOps[i], thenIdxs[i], fromType, toType, factory,
  1751  				)
  1752  				if err != nil {
  1753  					return nil, resultIdx, typs, internalMemUsed, err
  1754  				}
  1755  			}
  1756  		}
  1757  		var elseInternalMemUsed int
  1758  		var elseOp colexecbase.Operator
  1759  		elseExpr := t.Else
  1760  		if elseExpr == nil {
  1761  			// If there's no ELSE arm, we write NULLs.
  1762  			elseExpr = tree.DNull
  1763  		}
  1764  		elseOp, thenIdxs[len(t.Whens)], typs, elseInternalMemUsed, err = planProjectionOperators(
  1765  			ctx, evalCtx, elseExpr.(tree.TypedExpr), typs, buffer, acc, factory,
  1766  		)
  1767  		if err != nil {
  1768  			return nil, resultIdx, typs, internalMemUsed, err
  1769  		}
  1770  		internalMemUsed += elseInternalMemUsed
  1771  		if !typs[thenIdxs[len(t.Whens)]].Identical(typs[caseOutputIdx]) {
  1772  			// It is possible that the projection of the ELSE arm has different
  1773  			// column type (for example, we expect INT2, but INT8 is given). In
  1774  			// such case, we need to plan a cast.
  1775  			elseIdx := thenIdxs[len(t.Whens)]
  1776  			fromType, toType := typs[elseIdx], typs[caseOutputIdx]
  1777  			elseOp, thenIdxs[len(t.Whens)], typs, err = planCastOperator(
  1778  				ctx, acc, typs, elseOp, elseIdx, fromType, toType, factory,
  1779  			)
  1780  			if err != nil {
  1781  				return nil, resultIdx, typs, internalMemUsed, err
  1782  			}
  1783  		}
  1784  
  1785  		schemaEnforcer.typs = typs
  1786  		schemaEnforcer.subsetEndIdx = len(typs)
  1787  		op := NewCaseOp(allocator, buffer, caseOps, elseOp, thenIdxs, caseOutputIdx, caseOutputType)
  1788  		internalMemUsed += op.(InternalMemoryOperator).InternalMemoryUsage()
  1789  		return op, caseOutputIdx, typs, internalMemUsed, err
  1790  	case *tree.AndExpr, *tree.OrExpr:
  1791  		return planLogicalProjectionOp(ctx, evalCtx, expr, columnTypes, input, acc, factory)
  1792  	default:
  1793  		return nil, resultIdx, nil, internalMemUsed, errors.Errorf("unhandled projection expression type: %s", reflect.TypeOf(t))
  1794  	}
  1795  }
  1796  
  1797  func checkSupportedProjectionExpr(left, right tree.TypedExpr) error {
  1798  	leftTyp := left.ResolvedType()
  1799  	rightTyp := right.ResolvedType()
  1800  	if leftTyp.Equivalent(rightTyp) {
  1801  		return nil
  1802  	}
  1803  
  1804  	// The types are not equivalent. Check if either is a type we'd like to avoid.
  1805  	for _, t := range []*types.T{leftTyp, rightTyp} {
  1806  		switch t.Family() {
  1807  		case types.DateFamily, types.TimestampFamily, types.TimestampTZFamily:
  1808  			return errors.New("dates and timestamp(tz) not supported in mixed-type expressions in the vectorized engine")
  1809  		}
  1810  	}
  1811  	return nil
  1812  }
  1813  
  1814  func checkSupportedBinaryExpr(left, right tree.TypedExpr, outputType *types.T) error {
  1815  	leftDatumBacked := typeconv.TypeFamilyToCanonicalTypeFamily(left.ResolvedType().Family()) == typeconv.DatumVecCanonicalTypeFamily
  1816  	rightDatumBacked := typeconv.TypeFamilyToCanonicalTypeFamily(right.ResolvedType().Family()) == typeconv.DatumVecCanonicalTypeFamily
  1817  	outputDatumBacked := typeconv.TypeFamilyToCanonicalTypeFamily(outputType.Family()) == typeconv.DatumVecCanonicalTypeFamily
  1818  	if (leftDatumBacked || rightDatumBacked) && !outputDatumBacked {
  1819  		return errors.New("datum-backed arguments and not datum-backed " +
  1820  			"output of a binary expression is currently not supported")
  1821  	}
  1822  	return nil
  1823  }
  1824  
  1825  func planProjectionExpr(
  1826  	ctx context.Context,
  1827  	evalCtx *tree.EvalContext,
  1828  	projOp tree.Operator,
  1829  	outputType *types.T,
  1830  	left, right tree.TypedExpr,
  1831  	columnTypes []*types.T,
  1832  	input colexecbase.Operator,
  1833  	acc *mon.BoundAccount,
  1834  	factory coldata.ColumnFactory,
  1835  	overloadHelper overloadHelper,
  1836  ) (op colexecbase.Operator, resultIdx int, typs []*types.T, internalMemUsed int, err error) {
  1837  	if err := checkSupportedProjectionExpr(left, right); err != nil {
  1838  		return nil, resultIdx, typs, internalMemUsed, err
  1839  	}
  1840  	resultIdx = -1
  1841  	// actualOutputType tracks the logical type of the output column of the
  1842  	// projection operator. See the comment below for more details.
  1843  	actualOutputType := outputType
  1844  	if outputType.Identical(types.Int) {
  1845  		// Currently, SQL type system does not respect the width of integers
  1846  		// when figuring out the type of the output of a projection expression
  1847  		// (for example, INT2 + INT2 will be typed as INT8); however,
  1848  		// vectorized operators do respect the width when both operands have
  1849  		// the same width. In order to go around this limitation, we explicitly
  1850  		// check whether output type is INT8, and if so, we override the output
  1851  		// physical types to be what the vectorized projection operators will
  1852  		// actually output.
  1853  		//
  1854  		// Note that in mixed-width scenarios (i.e. INT2 + INT4) the vectorized
  1855  		// engine will output INT8, so no overriding is needed.
  1856  		//
  1857  		// We do, however, need to plan a cast to the expected logical type and
  1858  		// we will do that below.
  1859  		leftType := left.ResolvedType()
  1860  		rightType := right.ResolvedType()
  1861  		if leftType.Identical(types.Int2) && rightType.Identical(types.Int2) {
  1862  			actualOutputType = types.Int2
  1863  		} else if leftType.Identical(types.Int4) && rightType.Identical(types.Int4) {
  1864  			actualOutputType = types.Int4
  1865  		}
  1866  	}
  1867  	// There are 3 cases. Either the left is constant, the right is constant,
  1868  	// or neither are constant.
  1869  	if lConstArg, lConst := left.(tree.Datum); lConst {
  1870  		// Case one: The left is constant.
  1871  		// Normally, the optimizer normalizes binary exprs so that the constant
  1872  		// argument is on the right side. This doesn't happen for non-commutative
  1873  		// operators such as - and /, though, so we still need this case.
  1874  		var rightIdx int
  1875  		input, rightIdx, typs, internalMemUsed, err = planProjectionOperators(
  1876  			ctx, evalCtx, right, columnTypes, input, acc, factory,
  1877  		)
  1878  		if err != nil {
  1879  			return nil, resultIdx, typs, internalMemUsed, err
  1880  		}
  1881  		resultIdx = len(typs)
  1882  		// The projection result will be outputted to a new column which is appended
  1883  		// to the input batch.
  1884  		op, err = GetProjectionLConstOperator(
  1885  			colmem.NewAllocator(ctx, acc, factory), left.ResolvedType(), typs[rightIdx], actualOutputType,
  1886  			projOp, input, rightIdx, lConstArg, resultIdx, overloadHelper,
  1887  		)
  1888  	} else {
  1889  		var (
  1890  			leftIdx             int
  1891  			internalMemUsedLeft int
  1892  		)
  1893  		input, leftIdx, typs, internalMemUsedLeft, err = planProjectionOperators(
  1894  			ctx, evalCtx, left, columnTypes, input, acc, factory,
  1895  		)
  1896  		if err != nil {
  1897  			return nil, resultIdx, typs, internalMemUsed, err
  1898  		}
  1899  		internalMemUsed += internalMemUsedLeft
  1900  		if rConstArg, rConst := right.(tree.Datum); rConst {
  1901  			// Case 2: The right is constant.
  1902  			// The projection result will be outputted to a new column which is appended
  1903  			// to the input batch.
  1904  			resultIdx = len(typs)
  1905  			if projOp == tree.Like || projOp == tree.NotLike {
  1906  				negate := projOp == tree.NotLike
  1907  				op, err = GetLikeProjectionOperator(
  1908  					colmem.NewAllocator(ctx, acc, factory), evalCtx, input, leftIdx, resultIdx,
  1909  					string(tree.MustBeDString(rConstArg)), negate,
  1910  				)
  1911  			} else if projOp == tree.In || projOp == tree.NotIn {
  1912  				negate := projOp == tree.NotIn
  1913  				datumTuple, ok := tree.AsDTuple(rConstArg)
  1914  				if !ok {
  1915  					err = errors.Errorf("IN operator supported only on constant expressions")
  1916  					return nil, resultIdx, typs, internalMemUsed, err
  1917  				}
  1918  				op, err = GetInProjectionOperator(
  1919  					colmem.NewAllocator(ctx, acc, factory), typs[leftIdx], input, leftIdx,
  1920  					resultIdx, datumTuple, negate,
  1921  				)
  1922  			} else if projOp == tree.IsDistinctFrom || projOp == tree.IsNotDistinctFrom {
  1923  				if right != tree.DNull {
  1924  					err = errors.Errorf("IS DISTINCT FROM and IS NOT DISTINCT FROM are supported only with NULL argument")
  1925  					return nil, resultIdx, typs, internalMemUsed, err
  1926  				}
  1927  				// IS NULL is replaced with IS NOT DISTINCT FROM NULL, so we want to
  1928  				// negate when IS DISTINCT FROM is used.
  1929  				negate := projOp == tree.IsDistinctFrom
  1930  				op = newIsNullProjOp(colmem.NewAllocator(ctx, acc, factory), input, leftIdx, resultIdx, negate)
  1931  			} else {
  1932  				op, err = GetProjectionRConstOperator(
  1933  					colmem.NewAllocator(ctx, acc, factory), typs[leftIdx], right.ResolvedType(), actualOutputType,
  1934  					projOp, input, leftIdx, rConstArg, resultIdx, overloadHelper,
  1935  				)
  1936  			}
  1937  		} else {
  1938  			// Case 3: neither are constant.
  1939  			var (
  1940  				rightIdx             int
  1941  				internalMemUsedRight int
  1942  			)
  1943  			input, rightIdx, typs, internalMemUsedRight, err = planProjectionOperators(
  1944  				ctx, evalCtx, right, typs, input, acc, factory,
  1945  			)
  1946  			if err != nil {
  1947  				return nil, resultIdx, nil, internalMemUsed, err
  1948  			}
  1949  			internalMemUsed += internalMemUsedRight
  1950  			resultIdx = len(typs)
  1951  			op, err = GetProjectionOperator(
  1952  				colmem.NewAllocator(ctx, acc, factory), typs[leftIdx], typs[rightIdx], actualOutputType,
  1953  				projOp, input, leftIdx, rightIdx, resultIdx, overloadHelper,
  1954  			)
  1955  		}
  1956  	}
  1957  	if err != nil {
  1958  		return op, resultIdx, typs, internalMemUsed, err
  1959  	}
  1960  	if sMem, ok := op.(InternalMemoryOperator); ok {
  1961  		internalMemUsed += sMem.InternalMemoryUsage()
  1962  	}
  1963  	typs = appendOneType(typs, actualOutputType)
  1964  	if !outputType.Identical(actualOutputType) {
  1965  		// The projection operator outputs a column of a different type than
  1966  		// the expected logical type. In order to "synchronize" the reality and
  1967  		// the expectations, we plan a cast.
  1968  		//
  1969  		// For example, INT2 + INT2 will be typed as INT8 by the SQL type
  1970  		// system, but we will plan a projection operator that outputs INT2, so
  1971  		// in such scenario we will have
  1972  		//    actualOutputType = types.Int2
  1973  		//          outputType = types.Int8
  1974  		// and will plan the corresponding cast.
  1975  		//
  1976  		// NOTE: this is *only* needed for integer types and should be removed
  1977  		// once #46940 is resolved.
  1978  		op, resultIdx, typs, err =
  1979  			planCastOperator(ctx, acc, typs, op, resultIdx, actualOutputType, outputType, factory)
  1980  	}
  1981  	return op, resultIdx, typs, internalMemUsed, err
  1982  }
  1983  
  1984  // planLogicalProjectionOp plans all the needed operators for a projection of
  1985  // a logical operation (either AND or OR).
  1986  func planLogicalProjectionOp(
  1987  	ctx context.Context,
  1988  	evalCtx *tree.EvalContext,
  1989  	expr tree.TypedExpr,
  1990  	columnTypes []*types.T,
  1991  	input colexecbase.Operator,
  1992  	acc *mon.BoundAccount,
  1993  	factory coldata.ColumnFactory,
  1994  ) (op colexecbase.Operator, resultIdx int, typs []*types.T, internalMemUsed int, err error) {
  1995  	// Add a new boolean column that will store the result of the projection.
  1996  	resultIdx = len(columnTypes)
  1997  	typs = appendOneType(columnTypes, types.Bool)
  1998  	var (
  1999  		typedLeft, typedRight                       tree.TypedExpr
  2000  		leftProjOpChain, rightProjOpChain, outputOp colexecbase.Operator
  2001  		leftIdx, rightIdx                           int
  2002  		internalMemUsedLeft, internalMemUsedRight   int
  2003  		leftFeedOp, rightFeedOp                     feedOperator
  2004  	)
  2005  	switch t := expr.(type) {
  2006  	case *tree.AndExpr:
  2007  		typedLeft = t.TypedLeft()
  2008  		typedRight = t.TypedRight()
  2009  	case *tree.OrExpr:
  2010  		typedLeft = t.TypedLeft()
  2011  		typedRight = t.TypedRight()
  2012  	default:
  2013  		colexecerror.InternalError(fmt.Sprintf("unexpected logical expression type %s", t.String()))
  2014  	}
  2015  	leftProjOpChain, leftIdx, typs, internalMemUsedLeft, err = planProjectionOperators(
  2016  		ctx, evalCtx, typedLeft, typs, &leftFeedOp, acc, factory,
  2017  	)
  2018  	if err != nil {
  2019  		return nil, resultIdx, typs, internalMemUsed, err
  2020  	}
  2021  	rightProjOpChain, rightIdx, typs, internalMemUsedRight, err = planProjectionOperators(
  2022  		ctx, evalCtx, typedRight, typs, &rightFeedOp, acc, factory,
  2023  	)
  2024  	if err != nil {
  2025  		return nil, resultIdx, typs, internalMemUsed, err
  2026  	}
  2027  	allocator := colmem.NewAllocator(ctx, acc, factory)
  2028  	input = newBatchSchemaSubsetEnforcer(allocator, input, typs, resultIdx, len(typs))
  2029  	switch expr.(type) {
  2030  	case *tree.AndExpr:
  2031  		outputOp = NewAndProjOp(
  2032  			allocator,
  2033  			input, leftProjOpChain, rightProjOpChain,
  2034  			&leftFeedOp, &rightFeedOp,
  2035  			leftIdx, rightIdx, resultIdx,
  2036  		)
  2037  	case *tree.OrExpr:
  2038  		outputOp = NewOrProjOp(
  2039  			allocator,
  2040  			input, leftProjOpChain, rightProjOpChain,
  2041  			&leftFeedOp, &rightFeedOp,
  2042  			leftIdx, rightIdx, resultIdx,
  2043  		)
  2044  	}
  2045  	return outputOp, resultIdx, typs, internalMemUsedLeft + internalMemUsedRight, nil
  2046  }
  2047  
  2048  // planIsNullProjectionOp plans the operator for IS NULL and IS NOT NULL
  2049  // expressions (tree.IsNullExpr and tree.IsNotNullExpr, respectively).
  2050  func planIsNullProjectionOp(
  2051  	ctx context.Context,
  2052  	evalCtx *tree.EvalContext,
  2053  	outputType *types.T,
  2054  	expr tree.TypedExpr,
  2055  	columnTypes []*types.T,
  2056  	input colexecbase.Operator,
  2057  	acc *mon.BoundAccount,
  2058  	negate bool,
  2059  	factory coldata.ColumnFactory,
  2060  ) (op colexecbase.Operator, resultIdx int, typs []*types.T, internalMemUsed int, err error) {
  2061  	op, resultIdx, typs, internalMemUsed, err = planProjectionOperators(
  2062  		ctx, evalCtx, expr, columnTypes, input, acc, factory,
  2063  	)
  2064  	outputIdx := len(typs)
  2065  	op = newIsNullProjOp(colmem.NewAllocator(ctx, acc, factory), op, resultIdx, outputIdx, negate)
  2066  	typs = appendOneType(typs, outputType)
  2067  	return op, outputIdx, typs, internalMemUsed, err
  2068  }
  2069  
  2070  // appendOneType appends a *types.T to then end of a []*types.T. The size of the
  2071  // underlying array of the resulting slice is 1 greater than the input slice.
  2072  // This differs from the built-in append function, which can double the capacity
  2073  // of the slice if its length is less than 1024, or increase by 25% otherwise.
  2074  func appendOneType(typs []*types.T, t *types.T) []*types.T {
  2075  	newTyps := make([]*types.T, len(typs)+1)
  2076  	copy(newTyps, typs)
  2077  	newTyps[len(newTyps)-1] = t
  2078  	return newTyps
  2079  }