github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/import_processor.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package importccl
    10  
    11  import (
    12  	"context"
    13  	"math"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/ccl/storageccl"
    18  	"github.com/cockroachdb/cockroach/pkg/keys"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    20  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/row"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/rowexec"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    28  	"github.com/cockroachdb/cockroach/pkg/storage/cloud"
    29  	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
    30  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    31  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    32  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    33  	"github.com/cockroachdb/errors"
    34  )
    35  
    36  var csvOutputTypes = []*types.T{
    37  	types.Bytes,
    38  	types.Bytes,
    39  }
    40  
    41  type readImportDataProcessor struct {
    42  	flowCtx *execinfra.FlowCtx
    43  	spec    execinfrapb.ReadImportDataSpec
    44  	output  execinfra.RowReceiver
    45  }
    46  
    47  var _ execinfra.Processor = &readImportDataProcessor{}
    48  
    49  func (cp *readImportDataProcessor) OutputTypes() []*types.T {
    50  	return csvOutputTypes
    51  }
    52  
    53  func newReadImportDataProcessor(
    54  	flowCtx *execinfra.FlowCtx,
    55  	processorID int32,
    56  	spec execinfrapb.ReadImportDataSpec,
    57  	output execinfra.RowReceiver,
    58  ) (execinfra.Processor, error) {
    59  	cp := &readImportDataProcessor{
    60  		flowCtx: flowCtx,
    61  		spec:    spec,
    62  		output:  output,
    63  	}
    64  	return cp, nil
    65  }
    66  
    67  func (cp *readImportDataProcessor) Run(ctx context.Context) {
    68  	ctx, span := tracing.ChildSpan(ctx, "readImportDataProcessor")
    69  	defer tracing.FinishSpan(span)
    70  	defer cp.output.ProducerDone()
    71  
    72  	progCh := make(chan execinfrapb.RemoteProducerMetadata_BulkProcessorProgress)
    73  
    74  	var summary *roachpb.BulkOpSummary
    75  	var err error
    76  	// We don't have to worry about this go routine leaking because next we loop over progCh
    77  	// which is closed only after the go routine returns.
    78  	go func() {
    79  		defer close(progCh)
    80  		summary, err = runImport(ctx, cp.flowCtx, &cp.spec, progCh)
    81  	}()
    82  
    83  	for prog := range progCh {
    84  		// Take a copy so that we can send the progress address to the output processor.
    85  		p := prog
    86  		cp.output.Push(nil, &execinfrapb.ProducerMetadata{BulkProcessorProgress: &p})
    87  	}
    88  
    89  	if err != nil {
    90  		cp.output.Push(nil, &execinfrapb.ProducerMetadata{Err: err})
    91  		return
    92  	}
    93  
    94  	// Once the import is done, send back to the controller the serialized
    95  	// summary of the import operation. For more info see roachpb.BulkOpSummary.
    96  	countsBytes, err := protoutil.Marshal(summary)
    97  	if err != nil {
    98  		cp.output.Push(nil, &execinfrapb.ProducerMetadata{Err: err})
    99  		return
   100  	}
   101  	cp.output.Push(sqlbase.EncDatumRow{
   102  		sqlbase.DatumToEncDatum(types.Bytes, tree.NewDBytes(tree.DBytes(countsBytes))),
   103  		sqlbase.DatumToEncDatum(types.Bytes, tree.NewDBytes(tree.DBytes([]byte{}))),
   104  	}, nil)
   105  }
   106  
   107  func makeInputConverter(
   108  	ctx context.Context,
   109  	spec *execinfrapb.ReadImportDataSpec,
   110  	evalCtx *tree.EvalContext,
   111  	kvCh chan row.KVBatch,
   112  ) (inputConverter, error) {
   113  
   114  	var singleTable *sqlbase.TableDescriptor
   115  	var singleTableTargetCols tree.NameList
   116  	if len(spec.Tables) == 1 {
   117  		for _, table := range spec.Tables {
   118  			singleTable = table.Desc
   119  			singleTableTargetCols = make(tree.NameList, len(table.TargetCols))
   120  			for i, colName := range table.TargetCols {
   121  				singleTableTargetCols[i] = tree.Name(colName)
   122  			}
   123  		}
   124  	}
   125  
   126  	if format := spec.Format.Format; singleTable == nil && !isMultiTableFormat(format) {
   127  		return nil, errors.Errorf("%s only supports reading a single, pre-specified table", format.String())
   128  	}
   129  
   130  	switch spec.Format.Format {
   131  	case roachpb.IOFileFormat_CSV:
   132  		isWorkload := true
   133  		for _, file := range spec.Uri {
   134  			if conf, err := cloud.ExternalStorageConfFromURI(file); err != nil || conf.Provider != roachpb.ExternalStorageProvider_Workload {
   135  				isWorkload = false
   136  				break
   137  			}
   138  		}
   139  		if isWorkload {
   140  			return newWorkloadReader(kvCh, singleTable, evalCtx), nil
   141  		}
   142  		return newCSVInputReader(
   143  			kvCh, spec.Format.Csv, spec.WalltimeNanos, int(spec.ReaderParallelism),
   144  			singleTable, singleTableTargetCols, evalCtx), nil
   145  	case roachpb.IOFileFormat_MysqlOutfile:
   146  		return newMysqloutfileReader(
   147  			spec.Format.MysqlOut, kvCh, spec.WalltimeNanos, int(spec.ReaderParallelism), singleTable, evalCtx)
   148  	case roachpb.IOFileFormat_Mysqldump:
   149  		return newMysqldumpReader(ctx, kvCh, spec.Tables, evalCtx)
   150  	case roachpb.IOFileFormat_PgCopy:
   151  		return newPgCopyReader(ctx, kvCh, spec.Format.PgCopy, singleTable, evalCtx)
   152  	case roachpb.IOFileFormat_PgDump:
   153  		return newPgDumpReader(ctx, kvCh, spec.Format.PgDump, spec.Tables, evalCtx)
   154  	case roachpb.IOFileFormat_Avro:
   155  		return newAvroInputReader(
   156  			kvCh, singleTable, spec.Format.Avro, spec.WalltimeNanos,
   157  			int(spec.ReaderParallelism), evalCtx)
   158  	default:
   159  		return nil, errors.Errorf(
   160  			"Requested IMPORT format (%d) not supported by this node", spec.Format.Format)
   161  	}
   162  }
   163  
   164  // ingestKvs drains kvs from the channel until it closes, ingesting them using
   165  // the BulkAdder. It handles the required buffering/sorting/etc.
   166  func ingestKvs(
   167  	ctx context.Context,
   168  	flowCtx *execinfra.FlowCtx,
   169  	spec *execinfrapb.ReadImportDataSpec,
   170  	progCh chan execinfrapb.RemoteProducerMetadata_BulkProcessorProgress,
   171  	kvCh <-chan row.KVBatch,
   172  ) (*roachpb.BulkOpSummary, error) {
   173  	ctx, span := tracing.ChildSpan(ctx, "ingestKVs")
   174  	defer tracing.FinishSpan(span)
   175  
   176  	writeTS := hlc.Timestamp{WallTime: spec.WalltimeNanos}
   177  
   178  	flushSize := func() int64 { return storageccl.MaxImportBatchSize(flowCtx.Cfg.Settings) }
   179  
   180  	// We create two bulk adders so as to combat the excessive flushing of small
   181  	// SSTs which was observed when using a single adder for both primary and
   182  	// secondary index kvs. The number of secondary index kvs are small, and so we
   183  	// expect the indexAdder to flush much less frequently than the pkIndexAdder.
   184  	//
   185  	// It is highly recommended that the cluster setting controlling the max size
   186  	// of the pkIndexAdder buffer be set below that of the indexAdder buffer.
   187  	// Otherwise, as a consequence of filling up faster the pkIndexAdder buffer
   188  	// will hog memory as it tries to grow more aggressively.
   189  	minBufferSize, maxBufferSize, stepSize := storageccl.ImportBufferConfigSizes(flowCtx.Cfg.Settings, true /* isPKAdder */)
   190  	pkIndexAdder, err := flowCtx.Cfg.BulkAdder(ctx, flowCtx.Cfg.DB, writeTS, kvserverbase.BulkAdderOptions{
   191  		Name:              "pkAdder",
   192  		DisallowShadowing: true,
   193  		SkipDuplicates:    true,
   194  		MinBufferSize:     minBufferSize,
   195  		MaxBufferSize:     maxBufferSize,
   196  		StepBufferSize:    stepSize,
   197  		SSTSize:           flushSize,
   198  	})
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  	defer pkIndexAdder.Close(ctx)
   203  
   204  	minBufferSize, maxBufferSize, stepSize = storageccl.ImportBufferConfigSizes(flowCtx.Cfg.Settings, false /* isPKAdder */)
   205  	indexAdder, err := flowCtx.Cfg.BulkAdder(ctx, flowCtx.Cfg.DB, writeTS, kvserverbase.BulkAdderOptions{
   206  		Name:              "indexAdder",
   207  		DisallowShadowing: true,
   208  		SkipDuplicates:    true,
   209  		MinBufferSize:     minBufferSize,
   210  		MaxBufferSize:     maxBufferSize,
   211  		StepBufferSize:    stepSize,
   212  		SSTSize:           flushSize,
   213  	})
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  	defer indexAdder.Close(ctx)
   218  
   219  	// Setup progress tracking:
   220  	//  - offsets maps source file IDs to offsets in the slices below.
   221  	//  - writtenRow contains LastRow of batch most recently added to the buffer.
   222  	//  - writtenFraction contains % of the input finished as of last batch.
   223  	//  - pkFlushedRow contains `writtenRow` as of the last pk adder flush.
   224  	//  - idxFlushedRow contains `writtenRow` as of the last index adder flush.
   225  	// In pkFlushedRow, idxFlushedRow and writtenFaction values are written via
   226  	// `atomic` so the progress reporting go goroutine can read them.
   227  	writtenRow := make([]int64, len(spec.Uri))
   228  	writtenFraction := make([]uint32, len(spec.Uri))
   229  
   230  	pkFlushedRow := make([]int64, len(spec.Uri))
   231  	idxFlushedRow := make([]int64, len(spec.Uri))
   232  
   233  	// When the PK adder flushes, everything written has been flushed, so we set
   234  	// pkFlushedRow to writtenRow. Additionally if the indexAdder is empty then we
   235  	// can treat it as flushed as well (in case we're not adding anything to it).
   236  	pkIndexAdder.SetOnFlush(func() {
   237  		for i, emitted := range writtenRow {
   238  			atomic.StoreInt64(&pkFlushedRow[i], emitted)
   239  		}
   240  		if indexAdder.IsEmpty() {
   241  			for i, emitted := range writtenRow {
   242  				atomic.StoreInt64(&idxFlushedRow[i], emitted)
   243  			}
   244  		}
   245  	})
   246  	indexAdder.SetOnFlush(func() {
   247  		for i, emitted := range writtenRow {
   248  			atomic.StoreInt64(&idxFlushedRow[i], emitted)
   249  		}
   250  	})
   251  
   252  	// offsets maps input file ID to a slot in our progress tracking slices.
   253  	offsets := make(map[int32]int, len(spec.Uri))
   254  	var offset int
   255  	for i := range spec.Uri {
   256  		offsets[i] = offset
   257  		offset++
   258  	}
   259  
   260  	pushProgress := func() {
   261  		var prog execinfrapb.RemoteProducerMetadata_BulkProcessorProgress
   262  		prog.ResumePos = make(map[int32]int64)
   263  		prog.CompletedFraction = make(map[int32]float32)
   264  		for file, offset := range offsets {
   265  			pk := atomic.LoadInt64(&pkFlushedRow[offset])
   266  			idx := atomic.LoadInt64(&idxFlushedRow[offset])
   267  			// On resume we'll be able to skip up the last row for which both the
   268  			// PK and index adders have flushed KVs.
   269  			if idx > pk {
   270  				prog.ResumePos[file] = pk
   271  			} else {
   272  				prog.ResumePos[file] = idx
   273  			}
   274  			prog.CompletedFraction[file] = math.Float32frombits(atomic.LoadUint32(&writtenFraction[offset]))
   275  		}
   276  		progCh <- prog
   277  	}
   278  
   279  	// stopProgress will be closed when there is no more progress to report.
   280  	stopProgress := make(chan struct{})
   281  	g := ctxgroup.WithContext(ctx)
   282  	g.GoCtx(func(ctx context.Context) error {
   283  		tick := time.NewTicker(time.Second * 10)
   284  		defer tick.Stop()
   285  		done := ctx.Done()
   286  		for {
   287  			select {
   288  			case <-done:
   289  				return ctx.Err()
   290  			case <-stopProgress:
   291  				return nil
   292  			case <-tick.C:
   293  				pushProgress()
   294  			}
   295  		}
   296  	})
   297  
   298  	g.GoCtx(func(ctx context.Context) error {
   299  		defer close(stopProgress)
   300  
   301  		// We insert splits at every index span of the table above. Since the
   302  		// BulkAdder is split aware when constructing SSTs, there is no risk of worst
   303  		// case overlap behavior in the resulting AddSSTable calls.
   304  		//
   305  		// NB: We are getting rid of the pre-buffering stage which constructed
   306  		// separate buckets for each table's primary data, and flushed to the
   307  		// BulkAdder when the bucket was full. This is because, a tpcc 1k IMPORT would
   308  		// OOM when maintaining this buffer. Two big wins we got from this
   309  		// pre-buffering stage were:
   310  		//
   311  		// 1. We avoided worst case overlapping behavior in the AddSSTable calls as a
   312  		// result of flushing keys with the same TableIDIndexID prefix, together.
   313  		//
   314  		// 2. Secondary index KVs which were few and filled the bucket infrequently
   315  		// were flushed rarely, resulting in fewer L0 (and total) files.
   316  		//
   317  		// While we continue to achieve the first property as a result of the splits
   318  		// mentioned above, the KVs sent to the BulkAdder are no longer grouped which
   319  		// results in flushing a much larger number of small SSTs. This increases the
   320  		// number of L0 (and total) files, but with a lower memory usage.
   321  		for kvBatch := range kvCh {
   322  			for _, kv := range kvBatch.KVs {
   323  				_, _, indexID, indexErr := keys.TODOSQLCodec.DecodeIndexPrefix(kv.Key)
   324  				if indexErr != nil {
   325  					return indexErr
   326  				}
   327  
   328  				// Decide which adder to send the KV to by extracting its index id.
   329  				//
   330  				// TODO(adityamaru): There is a potential optimization of plumbing the
   331  				// different putters, and differentiating based on their type. It might be
   332  				// more efficient than parsing every kv.
   333  				if indexID == 1 {
   334  					if err := pkIndexAdder.Add(ctx, kv.Key, kv.Value.RawBytes); err != nil {
   335  						if errors.HasType(err, (*kvserverbase.DuplicateKeyError)(nil)) {
   336  							return errors.Wrap(err, "duplicate key in primary index")
   337  						}
   338  						return err
   339  					}
   340  				} else {
   341  					if err := indexAdder.Add(ctx, kv.Key, kv.Value.RawBytes); err != nil {
   342  						if errors.HasType(err, (*kvserverbase.DuplicateKeyError)(nil)) {
   343  							return errors.Wrap(err, "duplicate key in index")
   344  						}
   345  						return err
   346  					}
   347  				}
   348  			}
   349  			offset := offsets[kvBatch.Source]
   350  			writtenRow[offset] = kvBatch.LastRow
   351  			atomic.StoreUint32(&writtenFraction[offset], math.Float32bits(kvBatch.Progress))
   352  			if flowCtx.Cfg.TestingKnobs.BulkAdderFlushesEveryBatch {
   353  				_ = pkIndexAdder.Flush(ctx)
   354  				_ = indexAdder.Flush(ctx)
   355  				pushProgress()
   356  			}
   357  		}
   358  		return nil
   359  	})
   360  
   361  	if err := g.Wait(); err != nil {
   362  		return nil, err
   363  	}
   364  
   365  	if err := pkIndexAdder.Flush(ctx); err != nil {
   366  		if errors.HasType(err, (*kvserverbase.DuplicateKeyError)(nil)) {
   367  			return nil, errors.Wrap(err, "duplicate key in primary index")
   368  		}
   369  		return nil, err
   370  	}
   371  
   372  	if err := indexAdder.Flush(ctx); err != nil {
   373  		if errors.HasType(err, (*kvserverbase.DuplicateKeyError)(nil)) {
   374  			return nil, errors.Wrap(err, "duplicate key in index")
   375  		}
   376  		return nil, err
   377  	}
   378  
   379  	addedSummary := pkIndexAdder.GetSummary()
   380  	addedSummary.Add(indexAdder.GetSummary())
   381  	return &addedSummary, nil
   382  }
   383  
   384  func init() {
   385  	rowexec.NewReadImportDataProcessor = newReadImportDataProcessor
   386  }