github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/columnarizer.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    25  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    26  )
    27  
    28  // Columnarizer turns an execinfra.RowSource input into an Operator output, by
    29  // reading the input in chunks of size coldata.BatchSize() and converting each
    30  // chunk into a coldata.Batch column by column.
    31  type Columnarizer struct {
    32  	execinfra.ProcessorBase
    33  	NonExplainable
    34  
    35  	// mu is used to protect against concurrent DrainMeta and Next calls, which
    36  	// are currently allowed.
    37  	// TODO(asubiotto): Explore calling DrainMeta from the same goroutine as Next,
    38  	//  which will simplify this model.
    39  	mu syncutil.Mutex
    40  
    41  	allocator  *colmem.Allocator
    42  	input      execinfra.RowSource
    43  	da         sqlbase.DatumAlloc
    44  	initStatus OperatorInitStatus
    45  
    46  	buffered        sqlbase.EncDatumRows
    47  	batch           coldata.Batch
    48  	accumulatedMeta []execinfrapb.ProducerMetadata
    49  	ctx             context.Context
    50  	typs            []*types.T
    51  }
    52  
    53  var _ colexecbase.Operator = &Columnarizer{}
    54  
    55  // NewColumnarizer returns a new Columnarizer.
    56  func NewColumnarizer(
    57  	ctx context.Context,
    58  	allocator *colmem.Allocator,
    59  	flowCtx *execinfra.FlowCtx,
    60  	processorID int32,
    61  	input execinfra.RowSource,
    62  ) (*Columnarizer, error) {
    63  	var err error
    64  	c := &Columnarizer{
    65  		allocator: allocator,
    66  		input:     input,
    67  		ctx:       ctx,
    68  	}
    69  	if err = c.ProcessorBase.Init(
    70  		nil,
    71  		&execinfrapb.PostProcessSpec{},
    72  		input.OutputTypes(),
    73  		flowCtx,
    74  		processorID,
    75  		nil, /* output */
    76  		nil, /* memMonitor */
    77  		execinfra.ProcStateOpts{InputsToDrain: []execinfra.RowSource{input}},
    78  	); err != nil {
    79  		return nil, err
    80  	}
    81  	c.typs = c.OutputTypes()
    82  	return c, nil
    83  }
    84  
    85  // Init is part of the Operator interface.
    86  func (c *Columnarizer) Init() {
    87  	// We don't want to call Start on the input to columnarizer and allocating
    88  	// internal objects several times if Init method is called more than once, so
    89  	// we have this check in place.
    90  	if c.initStatus == OperatorNotInitialized {
    91  		c.batch = c.allocator.NewMemBatch(c.typs)
    92  		c.buffered = make(sqlbase.EncDatumRows, coldata.BatchSize())
    93  		for i := range c.buffered {
    94  			c.buffered[i] = make(sqlbase.EncDatumRow, len(c.typs))
    95  		}
    96  		c.accumulatedMeta = make([]execinfrapb.ProducerMetadata, 0, 1)
    97  		c.input.Start(c.ctx)
    98  		c.initStatus = OperatorInitialized
    99  	}
   100  }
   101  
   102  // Next is part of the Operator interface.
   103  func (c *Columnarizer) Next(context.Context) coldata.Batch {
   104  	c.mu.Lock()
   105  	defer c.mu.Unlock()
   106  	c.batch.ResetInternalBatch()
   107  	// Buffer up n rows.
   108  	nRows := 0
   109  	columnTypes := c.OutputTypes()
   110  	for ; nRows < coldata.BatchSize(); nRows++ {
   111  		row, meta := c.input.Next()
   112  		if meta != nil {
   113  			c.accumulatedMeta = append(c.accumulatedMeta, *meta)
   114  			nRows--
   115  			continue
   116  		}
   117  		if row == nil {
   118  			break
   119  		}
   120  		// TODO(jordan): evaluate whether it's more efficient to skip the buffer
   121  		// phase.
   122  		copy(c.buffered[nRows], row)
   123  	}
   124  
   125  	// Write each column into the output batch.
   126  	for idx, ct := range columnTypes {
   127  		err := EncDatumRowsToColVec(c.allocator, c.buffered[:nRows], c.batch.ColVec(idx), idx, ct, &c.da)
   128  		if err != nil {
   129  			colexecerror.InternalError(err)
   130  		}
   131  	}
   132  	c.batch.SetLength(nRows)
   133  	return c.batch
   134  }
   135  
   136  // Run is part of the execinfra.Processor interface.
   137  //
   138  // Columnarizers are not expected to be Run, so we prohibit calling this method
   139  // on them.
   140  func (c *Columnarizer) Run(context.Context) {
   141  	colexecerror.InternalError("Columnarizer should not be Run")
   142  }
   143  
   144  var _ colexecbase.Operator = &Columnarizer{}
   145  var _ execinfrapb.MetadataSource = &Columnarizer{}
   146  
   147  // DrainMeta is part of the MetadataSource interface.
   148  func (c *Columnarizer) DrainMeta(ctx context.Context) []execinfrapb.ProducerMetadata {
   149  	c.mu.Lock()
   150  	defer c.mu.Unlock()
   151  	c.MoveToDraining(nil /* err */)
   152  	for {
   153  		meta := c.DrainHelper()
   154  		if meta == nil {
   155  			break
   156  		}
   157  		c.accumulatedMeta = append(c.accumulatedMeta, *meta)
   158  	}
   159  	return c.accumulatedMeta
   160  }
   161  
   162  // ChildCount is part of the Operator interface.
   163  func (c *Columnarizer) ChildCount(verbose bool) int {
   164  	if _, ok := c.input.(execinfra.OpNode); ok {
   165  		return 1
   166  	}
   167  	return 0
   168  }
   169  
   170  // Child is part of the Operator interface.
   171  func (c *Columnarizer) Child(nth int, verbose bool) execinfra.OpNode {
   172  	if nth == 0 {
   173  		if n, ok := c.input.(execinfra.OpNode); ok {
   174  			return n
   175  		}
   176  		colexecerror.InternalError("input to Columnarizer is not an execinfra.OpNode")
   177  	}
   178  	colexecerror.InternalError(fmt.Sprintf("invalid index %d", nth))
   179  	// This code is unreachable, but the compiler cannot infer that.
   180  	return nil
   181  }