github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/bulk_row_writer.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  	"sync/atomic"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/row"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    25  	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
    26  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    27  	"github.com/cockroachdb/errors"
    28  )
    29  
    30  // CTASPlanResultTypes is the result types for EXPORT plans.
    31  var CTASPlanResultTypes = []*types.T{
    32  	types.Bytes, // rows
    33  }
    34  
    35  type bulkRowWriter struct {
    36  	execinfra.ProcessorBase
    37  	flowCtx        *execinfra.FlowCtx
    38  	processorID    int32
    39  	batchIdxAtomic int64
    40  	spec           execinfrapb.BulkRowWriterSpec
    41  	input          execinfra.RowSource
    42  	output         execinfra.RowReceiver
    43  	summary        roachpb.BulkOpSummary
    44  }
    45  
    46  var _ execinfra.Processor = &bulkRowWriter{}
    47  var _ execinfra.RowSource = &bulkRowWriter{}
    48  
    49  func newBulkRowWriterProcessor(
    50  	flowCtx *execinfra.FlowCtx,
    51  	processorID int32,
    52  	spec execinfrapb.BulkRowWriterSpec,
    53  	input execinfra.RowSource,
    54  	output execinfra.RowReceiver,
    55  ) (execinfra.Processor, error) {
    56  	c := &bulkRowWriter{
    57  		flowCtx:        flowCtx,
    58  		processorID:    processorID,
    59  		batchIdxAtomic: 0,
    60  		spec:           spec,
    61  		input:          input,
    62  		output:         output,
    63  	}
    64  	if err := c.Init(
    65  		c, &execinfrapb.PostProcessSpec{}, CTASPlanResultTypes, flowCtx, processorID, output,
    66  		nil /* memMonitor */, execinfra.ProcStateOpts{InputsToDrain: []execinfra.RowSource{input}},
    67  	); err != nil {
    68  		return nil, err
    69  	}
    70  	return c, nil
    71  }
    72  
    73  // Start is part of the RowSource interface.
    74  func (sp *bulkRowWriter) Start(ctx context.Context) context.Context {
    75  	sp.input.Start(ctx)
    76  	ctx = sp.StartInternal(ctx, "bulkRowWriter")
    77  	err := sp.work(ctx)
    78  	sp.MoveToDraining(err)
    79  	return ctx
    80  }
    81  
    82  // Next is part of the RowSource interface.
    83  func (sp *bulkRowWriter) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) {
    84  	// If there wasn't an error while processing, output the summary.
    85  	if sp.ProcessorBase.State == execinfra.StateRunning {
    86  		countsBytes, marshalErr := protoutil.Marshal(&sp.summary)
    87  		sp.MoveToDraining(marshalErr)
    88  		if marshalErr == nil {
    89  			// Output the summary.
    90  			return sqlbase.EncDatumRow{
    91  				sqlbase.DatumToEncDatum(types.Bytes, tree.NewDBytes(tree.DBytes(countsBytes))),
    92  			}, nil
    93  		}
    94  	}
    95  	return nil, sp.DrainHelper()
    96  }
    97  
    98  func (sp *bulkRowWriter) work(ctx context.Context) error {
    99  	kvCh := make(chan row.KVBatch, 10)
   100  	var g ctxgroup.Group
   101  
   102  	conv, err := row.NewDatumRowConverter(ctx,
   103  		&sp.spec.Table, nil /* targetColNames */, sp.EvalCtx, kvCh)
   104  	if err != nil {
   105  		return err
   106  	}
   107  	if conv.EvalCtx.SessionData == nil {
   108  		panic("uninitialized session data")
   109  	}
   110  
   111  	g = ctxgroup.WithContext(ctx)
   112  	g.GoCtx(func(ctx context.Context) error {
   113  		return sp.ingestLoop(ctx, kvCh)
   114  	})
   115  	g.GoCtx(func(ctx context.Context) error {
   116  		return sp.convertLoop(ctx, kvCh, conv)
   117  	})
   118  	return g.Wait()
   119  }
   120  
   121  func (sp *bulkRowWriter) OutputTypes() []*types.T {
   122  	return CTASPlanResultTypes
   123  }
   124  
   125  func (sp *bulkRowWriter) ingestLoop(ctx context.Context, kvCh chan row.KVBatch) error {
   126  	writeTS := sp.spec.Table.CreateAsOfTime
   127  	const bufferSize = 64 << 20
   128  	adder, err := sp.flowCtx.Cfg.BulkAdder(
   129  		ctx, sp.flowCtx.Cfg.DB, writeTS, kvserverbase.BulkAdderOptions{MinBufferSize: bufferSize},
   130  	)
   131  	if err != nil {
   132  		return err
   133  	}
   134  	defer adder.Close(ctx)
   135  
   136  	// ingestKvs drains kvs from the channel until it closes, ingesting them using
   137  	// the BulkAdder. It handles the required buffering/sorting/etc.
   138  	ingestKvs := func() error {
   139  		for kvBatch := range kvCh {
   140  			for _, kv := range kvBatch.KVs {
   141  				if err := adder.Add(ctx, kv.Key, kv.Value.RawBytes); err != nil {
   142  					if errors.HasType(err, (*kvserverbase.DuplicateKeyError)(nil)) {
   143  						return errors.WithStack(err)
   144  					}
   145  					return err
   146  				}
   147  			}
   148  		}
   149  
   150  		if err := adder.Flush(ctx); err != nil {
   151  			if errors.HasType(err, (*kvserverbase.DuplicateKeyError)(nil)) {
   152  				return errors.WithStack(err)
   153  			}
   154  			return err
   155  		}
   156  		return nil
   157  	}
   158  
   159  	// Drain the kvCh using the BulkAdder until it closes.
   160  	if err := ingestKvs(); err != nil {
   161  		return err
   162  	}
   163  
   164  	sp.summary = adder.GetSummary()
   165  	return nil
   166  }
   167  
   168  func (sp *bulkRowWriter) convertLoop(
   169  	ctx context.Context, kvCh chan row.KVBatch, conv *row.DatumRowConverter,
   170  ) error {
   171  	defer close(kvCh)
   172  
   173  	done := false
   174  	alloc := &sqlbase.DatumAlloc{}
   175  	typs := sp.input.OutputTypes()
   176  
   177  	for {
   178  		var rows int64
   179  		for {
   180  			row, meta := sp.input.Next()
   181  			if meta != nil {
   182  				if meta.Err != nil {
   183  					return meta.Err
   184  				}
   185  				sp.AppendTrailingMeta(*meta)
   186  				continue
   187  			}
   188  			if row == nil {
   189  				done = true
   190  				break
   191  			}
   192  			rows++
   193  
   194  			for i, ed := range row {
   195  				if ed.IsNull() {
   196  					conv.Datums[i] = tree.DNull
   197  					continue
   198  				}
   199  				if err := ed.EnsureDecoded(typs[i], alloc); err != nil {
   200  					return err
   201  				}
   202  				conv.Datums[i] = ed.Datum
   203  			}
   204  
   205  			// `conv.Row` uses these as arguments to GenerateUniqueID to generate
   206  			// hidden primary keys, when necessary. We want them to be ascending per
   207  			// to reduce overlap in the resulting kvs and non-conflicting (because
   208  			// of primary key uniqueness). The ids that come out of GenerateUniqueID
   209  			// are sorted by (fileIndex, rowIndex) and unique as long as the two
   210  			// inputs are a unique combo, so using the processor ID and a
   211  			// monotonically increasing batch index should do what we want.
   212  			if err := conv.Row(ctx, sp.processorID, sp.batchIdxAtomic); err != nil {
   213  				return err
   214  			}
   215  			atomic.AddInt64(&sp.batchIdxAtomic, 1)
   216  		}
   217  		if rows < 1 {
   218  			break
   219  		}
   220  
   221  		if err := conv.SendBatch(ctx); err != nil {
   222  			return err
   223  		}
   224  
   225  		if done {
   226  			break
   227  		}
   228  	}
   229  
   230  	return nil
   231  }
   232  
   233  // ConsumerClosed is part of the RowSource interface.
   234  func (sp *bulkRowWriter) ConsumerClosed() {
   235  	// The consumer is done, Next() will not be called again.
   236  	sp.InternalClose()
   237  }