github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/bulk_row_writer.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "sync/atomic" 16 17 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 21 "github.com/cockroachdb/cockroach/pkg/sql/row" 22 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 24 "github.com/cockroachdb/cockroach/pkg/sql/types" 25 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 26 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 27 "github.com/cockroachdb/errors" 28 ) 29 30 // CTASPlanResultTypes is the result types for EXPORT plans. 31 var CTASPlanResultTypes = []*types.T{ 32 types.Bytes, // rows 33 } 34 35 type bulkRowWriter struct { 36 execinfra.ProcessorBase 37 flowCtx *execinfra.FlowCtx 38 processorID int32 39 batchIdxAtomic int64 40 spec execinfrapb.BulkRowWriterSpec 41 input execinfra.RowSource 42 output execinfra.RowReceiver 43 summary roachpb.BulkOpSummary 44 } 45 46 var _ execinfra.Processor = &bulkRowWriter{} 47 var _ execinfra.RowSource = &bulkRowWriter{} 48 49 func newBulkRowWriterProcessor( 50 flowCtx *execinfra.FlowCtx, 51 processorID int32, 52 spec execinfrapb.BulkRowWriterSpec, 53 input execinfra.RowSource, 54 output execinfra.RowReceiver, 55 ) (execinfra.Processor, error) { 56 c := &bulkRowWriter{ 57 flowCtx: flowCtx, 58 processorID: processorID, 59 batchIdxAtomic: 0, 60 spec: spec, 61 input: input, 62 output: output, 63 } 64 if err := c.Init( 65 c, &execinfrapb.PostProcessSpec{}, CTASPlanResultTypes, flowCtx, processorID, output, 66 nil /* memMonitor */, execinfra.ProcStateOpts{InputsToDrain: []execinfra.RowSource{input}}, 67 ); err != nil { 68 return nil, err 69 } 70 return c, nil 71 } 72 73 // Start is part of the RowSource interface. 74 func (sp *bulkRowWriter) Start(ctx context.Context) context.Context { 75 sp.input.Start(ctx) 76 ctx = sp.StartInternal(ctx, "bulkRowWriter") 77 err := sp.work(ctx) 78 sp.MoveToDraining(err) 79 return ctx 80 } 81 82 // Next is part of the RowSource interface. 83 func (sp *bulkRowWriter) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 84 // If there wasn't an error while processing, output the summary. 85 if sp.ProcessorBase.State == execinfra.StateRunning { 86 countsBytes, marshalErr := protoutil.Marshal(&sp.summary) 87 sp.MoveToDraining(marshalErr) 88 if marshalErr == nil { 89 // Output the summary. 90 return sqlbase.EncDatumRow{ 91 sqlbase.DatumToEncDatum(types.Bytes, tree.NewDBytes(tree.DBytes(countsBytes))), 92 }, nil 93 } 94 } 95 return nil, sp.DrainHelper() 96 } 97 98 func (sp *bulkRowWriter) work(ctx context.Context) error { 99 kvCh := make(chan row.KVBatch, 10) 100 var g ctxgroup.Group 101 102 conv, err := row.NewDatumRowConverter(ctx, 103 &sp.spec.Table, nil /* targetColNames */, sp.EvalCtx, kvCh) 104 if err != nil { 105 return err 106 } 107 if conv.EvalCtx.SessionData == nil { 108 panic("uninitialized session data") 109 } 110 111 g = ctxgroup.WithContext(ctx) 112 g.GoCtx(func(ctx context.Context) error { 113 return sp.ingestLoop(ctx, kvCh) 114 }) 115 g.GoCtx(func(ctx context.Context) error { 116 return sp.convertLoop(ctx, kvCh, conv) 117 }) 118 return g.Wait() 119 } 120 121 func (sp *bulkRowWriter) OutputTypes() []*types.T { 122 return CTASPlanResultTypes 123 } 124 125 func (sp *bulkRowWriter) ingestLoop(ctx context.Context, kvCh chan row.KVBatch) error { 126 writeTS := sp.spec.Table.CreateAsOfTime 127 const bufferSize = 64 << 20 128 adder, err := sp.flowCtx.Cfg.BulkAdder( 129 ctx, sp.flowCtx.Cfg.DB, writeTS, kvserverbase.BulkAdderOptions{MinBufferSize: bufferSize}, 130 ) 131 if err != nil { 132 return err 133 } 134 defer adder.Close(ctx) 135 136 // ingestKvs drains kvs from the channel until it closes, ingesting them using 137 // the BulkAdder. It handles the required buffering/sorting/etc. 138 ingestKvs := func() error { 139 for kvBatch := range kvCh { 140 for _, kv := range kvBatch.KVs { 141 if err := adder.Add(ctx, kv.Key, kv.Value.RawBytes); err != nil { 142 if errors.HasType(err, (*kvserverbase.DuplicateKeyError)(nil)) { 143 return errors.WithStack(err) 144 } 145 return err 146 } 147 } 148 } 149 150 if err := adder.Flush(ctx); err != nil { 151 if errors.HasType(err, (*kvserverbase.DuplicateKeyError)(nil)) { 152 return errors.WithStack(err) 153 } 154 return err 155 } 156 return nil 157 } 158 159 // Drain the kvCh using the BulkAdder until it closes. 160 if err := ingestKvs(); err != nil { 161 return err 162 } 163 164 sp.summary = adder.GetSummary() 165 return nil 166 } 167 168 func (sp *bulkRowWriter) convertLoop( 169 ctx context.Context, kvCh chan row.KVBatch, conv *row.DatumRowConverter, 170 ) error { 171 defer close(kvCh) 172 173 done := false 174 alloc := &sqlbase.DatumAlloc{} 175 typs := sp.input.OutputTypes() 176 177 for { 178 var rows int64 179 for { 180 row, meta := sp.input.Next() 181 if meta != nil { 182 if meta.Err != nil { 183 return meta.Err 184 } 185 sp.AppendTrailingMeta(*meta) 186 continue 187 } 188 if row == nil { 189 done = true 190 break 191 } 192 rows++ 193 194 for i, ed := range row { 195 if ed.IsNull() { 196 conv.Datums[i] = tree.DNull 197 continue 198 } 199 if err := ed.EnsureDecoded(typs[i], alloc); err != nil { 200 return err 201 } 202 conv.Datums[i] = ed.Datum 203 } 204 205 // `conv.Row` uses these as arguments to GenerateUniqueID to generate 206 // hidden primary keys, when necessary. We want them to be ascending per 207 // to reduce overlap in the resulting kvs and non-conflicting (because 208 // of primary key uniqueness). The ids that come out of GenerateUniqueID 209 // are sorted by (fileIndex, rowIndex) and unique as long as the two 210 // inputs are a unique combo, so using the processor ID and a 211 // monotonically increasing batch index should do what we want. 212 if err := conv.Row(ctx, sp.processorID, sp.batchIdxAtomic); err != nil { 213 return err 214 } 215 atomic.AddInt64(&sp.batchIdxAtomic, 1) 216 } 217 if rows < 1 { 218 break 219 } 220 221 if err := conv.SendBatch(ctx); err != nil { 222 return err 223 } 224 225 if done { 226 break 227 } 228 } 229 230 return nil 231 } 232 233 // ConsumerClosed is part of the RowSource interface. 234 func (sp *bulkRowWriter) ConsumerClosed() { 235 // The consumer is done, Next() will not be called again. 236 sp.InternalClose() 237 }