github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/distsql_plan_csv.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  	"math"
    16  	"math/rand"
    17  	"sync/atomic"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/jobs"
    21  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    22  	"github.com/cockroachdb/cockroach/pkg/kv"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/server/serverpb"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/physicalplan"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/rowcontainer"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    30  	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
    31  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    32  	"github.com/cockroachdb/cockroach/pkg/util/log"
    33  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    34  	"github.com/cockroachdb/logtags"
    35  )
    36  
    37  // RowResultWriter is a thin wrapper around a RowContainer.
    38  type RowResultWriter struct {
    39  	rowContainer *rowcontainer.RowContainer
    40  	rowsAffected int
    41  	err          error
    42  }
    43  
    44  var _ rowResultWriter = &RowResultWriter{}
    45  
    46  // NewRowResultWriter creates a new RowResultWriter.
    47  func NewRowResultWriter(rowContainer *rowcontainer.RowContainer) *RowResultWriter {
    48  	return &RowResultWriter{rowContainer: rowContainer}
    49  }
    50  
    51  // IncrementRowsAffected implements the rowResultWriter interface.
    52  func (b *RowResultWriter) IncrementRowsAffected(n int) {
    53  	b.rowsAffected += n
    54  }
    55  
    56  // AddRow implements the rowResultWriter interface.
    57  func (b *RowResultWriter) AddRow(ctx context.Context, row tree.Datums) error {
    58  	_, err := b.rowContainer.AddRow(ctx, row)
    59  	return err
    60  }
    61  
    62  // SetError is part of the rowResultWriter interface.
    63  func (b *RowResultWriter) SetError(err error) {
    64  	b.err = err
    65  }
    66  
    67  // Err is part of the rowResultWriter interface.
    68  func (b *RowResultWriter) Err() error {
    69  	return b.err
    70  }
    71  
    72  // callbackResultWriter is a rowResultWriter that runs a callback function
    73  // on AddRow.
    74  type callbackResultWriter struct {
    75  	fn           func(ctx context.Context, row tree.Datums) error
    76  	rowsAffected int
    77  	err          error
    78  }
    79  
    80  var _ rowResultWriter = &callbackResultWriter{}
    81  
    82  // newCallbackResultWriter creates a new callbackResultWriter.
    83  func newCallbackResultWriter(
    84  	fn func(ctx context.Context, row tree.Datums) error,
    85  ) *callbackResultWriter {
    86  	return &callbackResultWriter{fn: fn}
    87  }
    88  
    89  func (c *callbackResultWriter) IncrementRowsAffected(n int) {
    90  	c.rowsAffected += n
    91  }
    92  
    93  func (c *callbackResultWriter) AddRow(ctx context.Context, row tree.Datums) error {
    94  	return c.fn(ctx, row)
    95  }
    96  
    97  func (c *callbackResultWriter) SetError(err error) {
    98  	c.err = err
    99  }
   100  
   101  func (c *callbackResultWriter) Err() error {
   102  	return c.err
   103  }
   104  
   105  func (dsp *DistSQLPlanner) setupAllNodesPlanning(
   106  	ctx context.Context, evalCtx *extendedEvalContext, execCfg *ExecutorConfig,
   107  ) (*PlanningCtx, []roachpb.NodeID, error) {
   108  	planCtx := dsp.NewPlanningCtx(ctx, evalCtx, nil /* txn */, true /* distribute */)
   109  
   110  	ss, err := execCfg.StatusServer.OptionalErr(47900)
   111  	if err != nil {
   112  		return nil, nil, err
   113  	}
   114  	resp, err := ss.Nodes(ctx, &serverpb.NodesRequest{})
   115  	if err != nil {
   116  		return nil, nil, err
   117  	}
   118  	// Because we're not going through the normal pathways, we have to set up the
   119  	// planCtx.NodeStatuses map ourselves. CheckNodeHealthAndVersion() will
   120  	// populate it.
   121  	for _, node := range resp.Nodes {
   122  		_ /* NodeStautus */ = dsp.CheckNodeHealthAndVersion(planCtx, node.Desc.NodeID)
   123  	}
   124  	nodes := make([]roachpb.NodeID, 0, len(planCtx.NodeStatuses))
   125  	for nodeID := range planCtx.NodeStatuses {
   126  		nodes = append(nodes, nodeID)
   127  	}
   128  	// Shuffle node order so that multiple IMPORTs done in parallel will not
   129  	// identically schedule CSV reading. For example, if there are 3 nodes and 4
   130  	// files, the first node will get 2 files while the other nodes will each get 1
   131  	// file. Shuffling will make that first node random instead of always the same.
   132  	rand.Shuffle(len(nodes), func(i, j int) {
   133  		nodes[i], nodes[j] = nodes[j], nodes[i]
   134  	})
   135  	return planCtx, nodes, nil
   136  }
   137  
   138  func makeImportReaderSpecs(
   139  	job *jobs.Job,
   140  	tables map[string]*execinfrapb.ReadImportDataSpec_ImportTable,
   141  	from []string,
   142  	format roachpb.IOFileFormat,
   143  	nodes []roachpb.NodeID,
   144  	walltime int64,
   145  ) []*execinfrapb.ReadImportDataSpec {
   146  
   147  	// For each input file, assign it to a node.
   148  	inputSpecs := make([]*execinfrapb.ReadImportDataSpec, 0, len(nodes))
   149  	progress := job.Progress()
   150  	importProgress := progress.GetImport()
   151  	for i, input := range from {
   152  		// Round robin assign CSV files to nodes. Files 0 through len(nodes)-1
   153  		// creates the spec. Future files just add themselves to the Uris.
   154  		if i < len(nodes) {
   155  			spec := &execinfrapb.ReadImportDataSpec{
   156  				Tables: tables,
   157  				Format: format,
   158  				Progress: execinfrapb.JobProgress{
   159  					JobID: *job.ID(),
   160  					Slot:  int32(i),
   161  				},
   162  				WalltimeNanos: walltime,
   163  				Uri:           make(map[int32]string),
   164  				ResumePos:     make(map[int32]int64),
   165  			}
   166  			inputSpecs = append(inputSpecs, spec)
   167  		}
   168  		n := i % len(nodes)
   169  		inputSpecs[n].Uri[int32(i)] = input
   170  		if importProgress.ResumePos != nil {
   171  			inputSpecs[n].ResumePos[int32(i)] = importProgress.ResumePos[int32(i)]
   172  		}
   173  	}
   174  
   175  	for i := range inputSpecs {
   176  		// TODO(mjibson): using the actual file sizes here would improve progress
   177  		// accuracy.
   178  		inputSpecs[i].Progress.Contribution = float32(len(inputSpecs[i].Uri)) / float32(len(from))
   179  	}
   180  	return inputSpecs
   181  }
   182  
   183  func presplitTableBoundaries(
   184  	ctx context.Context,
   185  	cfg *ExecutorConfig,
   186  	tables map[string]*execinfrapb.ReadImportDataSpec_ImportTable,
   187  ) error {
   188  	expirationTime := cfg.DB.Clock().Now().Add(time.Hour.Nanoseconds(), 0)
   189  	for _, tbl := range tables {
   190  		for _, span := range tbl.Desc.AllIndexSpans(cfg.Codec) {
   191  			if err := cfg.DB.AdminSplit(ctx, span.Key, span.Key, expirationTime); err != nil {
   192  				return err
   193  			}
   194  
   195  			log.VEventf(ctx, 1, "scattering index range %s", span.Key)
   196  			scatterReq := &roachpb.AdminScatterRequest{
   197  				RequestHeader: roachpb.RequestHeaderFromSpan(span),
   198  			}
   199  			if _, pErr := kv.SendWrapped(ctx, cfg.DB.NonTransactionalSender(), scatterReq); pErr != nil {
   200  				log.Errorf(ctx, "failed to scatter span %s: %s", span.Key, pErr)
   201  			}
   202  		}
   203  	}
   204  	return nil
   205  }
   206  
   207  // DistIngest is used by IMPORT to run a DistSQL flow to ingest data by starting
   208  // reader processes on many nodes that each read and ingest their assigned files
   209  // and then send back a summary of what they ingested. The combined summary is
   210  // returned.
   211  func DistIngest(
   212  	ctx context.Context,
   213  	phs PlanHookState,
   214  	job *jobs.Job,
   215  	tables map[string]*execinfrapb.ReadImportDataSpec_ImportTable,
   216  	from []string,
   217  	format roachpb.IOFileFormat,
   218  	walltime int64,
   219  	alwaysFlushProgress bool,
   220  ) (roachpb.BulkOpSummary, error) {
   221  	ctx = logtags.AddTag(ctx, "import-distsql-ingest", nil)
   222  
   223  	dsp := phs.DistSQLPlanner()
   224  	evalCtx := phs.ExtendedEvalContext()
   225  
   226  	planCtx, nodes, err := dsp.setupAllNodesPlanning(ctx, evalCtx, phs.ExecCfg())
   227  	if err != nil {
   228  		return roachpb.BulkOpSummary{}, err
   229  	}
   230  
   231  	inputSpecs := makeImportReaderSpecs(job, tables, from, format, nodes, walltime)
   232  
   233  	var p PhysicalPlan
   234  
   235  	// Setup a one-stage plan with one proc per input spec.
   236  	stageID := p.NewStageID()
   237  	p.ResultRouters = make([]physicalplan.ProcessorIdx, len(inputSpecs))
   238  	for i, rcs := range inputSpecs {
   239  		proc := physicalplan.Processor{
   240  			Node: nodes[i],
   241  			Spec: execinfrapb.ProcessorSpec{
   242  				Core:    execinfrapb.ProcessorCoreUnion{ReadImport: rcs},
   243  				Output:  []execinfrapb.OutputRouterSpec{{Type: execinfrapb.OutputRouterSpec_PASS_THROUGH}},
   244  				StageID: stageID,
   245  			},
   246  		}
   247  		pIdx := p.AddProcessor(proc)
   248  		p.ResultRouters[i] = pIdx
   249  	}
   250  
   251  	// The direct-ingest readers will emit a binary encoded BulkOpSummary.
   252  	p.PlanToStreamColMap = []int{0, 1}
   253  	p.ResultTypes = []*types.T{types.Bytes, types.Bytes}
   254  
   255  	dsp.FinalizePlan(planCtx, &p)
   256  
   257  	if err := job.FractionProgressed(ctx,
   258  		func(ctx context.Context, details jobspb.ProgressDetails) float32 {
   259  			prog := details.(*jobspb.Progress_Import).Import
   260  			prog.ReadProgress = make([]float32, len(from))
   261  			prog.ResumePos = make([]int64, len(from))
   262  			return 0.0
   263  		},
   264  	); err != nil {
   265  		return roachpb.BulkOpSummary{}, err
   266  	}
   267  
   268  	rowProgress := make([]int64, len(from))
   269  	fractionProgress := make([]uint32, len(from))
   270  
   271  	updateJobProgress := func() error {
   272  		return job.FractionProgressed(ctx,
   273  			func(ctx context.Context, details jobspb.ProgressDetails) float32 {
   274  				var overall float32
   275  				prog := details.(*jobspb.Progress_Import).Import
   276  				for i := range rowProgress {
   277  					prog.ResumePos[i] = atomic.LoadInt64(&rowProgress[i])
   278  				}
   279  				for i := range fractionProgress {
   280  					fileProgress := math.Float32frombits(atomic.LoadUint32(&fractionProgress[i]))
   281  					prog.ReadProgress[i] = fileProgress
   282  					overall += fileProgress
   283  				}
   284  				return overall / float32(len(from))
   285  			},
   286  		)
   287  	}
   288  
   289  	metaFn := func(_ context.Context, meta *execinfrapb.ProducerMetadata) error {
   290  		if meta.BulkProcessorProgress != nil {
   291  			for i, v := range meta.BulkProcessorProgress.ResumePos {
   292  				atomic.StoreInt64(&rowProgress[i], v)
   293  			}
   294  			for i, v := range meta.BulkProcessorProgress.CompletedFraction {
   295  				atomic.StoreUint32(&fractionProgress[i], math.Float32bits(v))
   296  			}
   297  
   298  			if alwaysFlushProgress {
   299  				return updateJobProgress()
   300  			}
   301  		}
   302  		return nil
   303  	}
   304  
   305  	var res roachpb.BulkOpSummary
   306  	rowResultWriter := newCallbackResultWriter(func(ctx context.Context, row tree.Datums) error {
   307  		var counts roachpb.BulkOpSummary
   308  		if err := protoutil.Unmarshal([]byte(*row[0].(*tree.DBytes)), &counts); err != nil {
   309  			return err
   310  		}
   311  		res.Add(counts)
   312  		return nil
   313  	})
   314  
   315  	if err := presplitTableBoundaries(ctx, phs.ExecCfg(), tables); err != nil {
   316  		return roachpb.BulkOpSummary{}, err
   317  	}
   318  
   319  	recv := MakeDistSQLReceiver(
   320  		ctx,
   321  		&metadataCallbackWriter{rowResultWriter: rowResultWriter, fn: metaFn},
   322  		tree.Rows,
   323  		nil, /* rangeCache */
   324  		nil, /* leaseCache */
   325  		nil, /* txn - the flow does not read or write the database */
   326  		func(ts hlc.Timestamp) {},
   327  		evalCtx.Tracing,
   328  	)
   329  	defer recv.Release()
   330  
   331  	stopProgress := make(chan struct{})
   332  	g := ctxgroup.WithContext(ctx)
   333  	g.GoCtx(func(ctx context.Context) error {
   334  		tick := time.NewTicker(time.Second * 10)
   335  		defer tick.Stop()
   336  		done := ctx.Done()
   337  		for {
   338  			select {
   339  			case <-stopProgress:
   340  				return nil
   341  			case <-done:
   342  				return ctx.Err()
   343  			case <-tick.C:
   344  				if err := updateJobProgress(); err != nil {
   345  					return err
   346  				}
   347  			}
   348  		}
   349  	})
   350  
   351  	g.GoCtx(func(ctx context.Context) error {
   352  		defer close(stopProgress)
   353  		// Copy the evalCtx, as dsp.Run() might change it.
   354  		evalCtxCopy := *evalCtx
   355  		dsp.Run(planCtx, nil, &p, recv, &evalCtxCopy, nil /* finishedSetupFn */)()
   356  		return rowResultWriter.Err()
   357  	})
   358  
   359  	if err := g.Wait(); err != nil {
   360  		return roachpb.BulkOpSummary{}, err
   361  	}
   362  
   363  	return res, nil
   364  }