github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/columnarizer.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 "fmt" 16 17 "github.com/cockroachdb/cockroach/pkg/col/coldata" 18 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 19 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 20 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 21 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 22 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 24 "github.com/cockroachdb/cockroach/pkg/sql/types" 25 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 26 ) 27 28 // Columnarizer turns an execinfra.RowSource input into an Operator output, by 29 // reading the input in chunks of size coldata.BatchSize() and converting each 30 // chunk into a coldata.Batch column by column. 31 type Columnarizer struct { 32 execinfra.ProcessorBase 33 NonExplainable 34 35 // mu is used to protect against concurrent DrainMeta and Next calls, which 36 // are currently allowed. 37 // TODO(asubiotto): Explore calling DrainMeta from the same goroutine as Next, 38 // which will simplify this model. 39 mu syncutil.Mutex 40 41 allocator *colmem.Allocator 42 input execinfra.RowSource 43 da sqlbase.DatumAlloc 44 initStatus OperatorInitStatus 45 46 buffered sqlbase.EncDatumRows 47 batch coldata.Batch 48 accumulatedMeta []execinfrapb.ProducerMetadata 49 ctx context.Context 50 typs []*types.T 51 } 52 53 var _ colexecbase.Operator = &Columnarizer{} 54 55 // NewColumnarizer returns a new Columnarizer. 56 func NewColumnarizer( 57 ctx context.Context, 58 allocator *colmem.Allocator, 59 flowCtx *execinfra.FlowCtx, 60 processorID int32, 61 input execinfra.RowSource, 62 ) (*Columnarizer, error) { 63 var err error 64 c := &Columnarizer{ 65 allocator: allocator, 66 input: input, 67 ctx: ctx, 68 } 69 if err = c.ProcessorBase.Init( 70 nil, 71 &execinfrapb.PostProcessSpec{}, 72 input.OutputTypes(), 73 flowCtx, 74 processorID, 75 nil, /* output */ 76 nil, /* memMonitor */ 77 execinfra.ProcStateOpts{InputsToDrain: []execinfra.RowSource{input}}, 78 ); err != nil { 79 return nil, err 80 } 81 c.typs = c.OutputTypes() 82 return c, nil 83 } 84 85 // Init is part of the Operator interface. 86 func (c *Columnarizer) Init() { 87 // We don't want to call Start on the input to columnarizer and allocating 88 // internal objects several times if Init method is called more than once, so 89 // we have this check in place. 90 if c.initStatus == OperatorNotInitialized { 91 c.batch = c.allocator.NewMemBatch(c.typs) 92 c.buffered = make(sqlbase.EncDatumRows, coldata.BatchSize()) 93 for i := range c.buffered { 94 c.buffered[i] = make(sqlbase.EncDatumRow, len(c.typs)) 95 } 96 c.accumulatedMeta = make([]execinfrapb.ProducerMetadata, 0, 1) 97 c.input.Start(c.ctx) 98 c.initStatus = OperatorInitialized 99 } 100 } 101 102 // Next is part of the Operator interface. 103 func (c *Columnarizer) Next(context.Context) coldata.Batch { 104 c.mu.Lock() 105 defer c.mu.Unlock() 106 c.batch.ResetInternalBatch() 107 // Buffer up n rows. 108 nRows := 0 109 columnTypes := c.OutputTypes() 110 for ; nRows < coldata.BatchSize(); nRows++ { 111 row, meta := c.input.Next() 112 if meta != nil { 113 c.accumulatedMeta = append(c.accumulatedMeta, *meta) 114 nRows-- 115 continue 116 } 117 if row == nil { 118 break 119 } 120 // TODO(jordan): evaluate whether it's more efficient to skip the buffer 121 // phase. 122 copy(c.buffered[nRows], row) 123 } 124 125 // Write each column into the output batch. 126 for idx, ct := range columnTypes { 127 err := EncDatumRowsToColVec(c.allocator, c.buffered[:nRows], c.batch.ColVec(idx), idx, ct, &c.da) 128 if err != nil { 129 colexecerror.InternalError(err) 130 } 131 } 132 c.batch.SetLength(nRows) 133 return c.batch 134 } 135 136 // Run is part of the execinfra.Processor interface. 137 // 138 // Columnarizers are not expected to be Run, so we prohibit calling this method 139 // on them. 140 func (c *Columnarizer) Run(context.Context) { 141 colexecerror.InternalError("Columnarizer should not be Run") 142 } 143 144 var _ colexecbase.Operator = &Columnarizer{} 145 var _ execinfrapb.MetadataSource = &Columnarizer{} 146 147 // DrainMeta is part of the MetadataSource interface. 148 func (c *Columnarizer) DrainMeta(ctx context.Context) []execinfrapb.ProducerMetadata { 149 c.mu.Lock() 150 defer c.mu.Unlock() 151 c.MoveToDraining(nil /* err */) 152 for { 153 meta := c.DrainHelper() 154 if meta == nil { 155 break 156 } 157 c.accumulatedMeta = append(c.accumulatedMeta, *meta) 158 } 159 return c.accumulatedMeta 160 } 161 162 // ChildCount is part of the Operator interface. 163 func (c *Columnarizer) ChildCount(verbose bool) int { 164 if _, ok := c.input.(execinfra.OpNode); ok { 165 return 1 166 } 167 return 0 168 } 169 170 // Child is part of the Operator interface. 171 func (c *Columnarizer) Child(nth int, verbose bool) execinfra.OpNode { 172 if nth == 0 { 173 if n, ok := c.input.(execinfra.OpNode); ok { 174 return n 175 } 176 colexecerror.InternalError("input to Columnarizer is not an execinfra.OpNode") 177 } 178 colexecerror.InternalError(fmt.Sprintf("invalid index %d", nth)) 179 // This code is unreachable, but the compiler cannot infer that. 180 return nil 181 }