github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/distsql_plan_csv.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "math" 16 "math/rand" 17 "sync/atomic" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/jobs" 21 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 22 "github.com/cockroachdb/cockroach/pkg/kv" 23 "github.com/cockroachdb/cockroach/pkg/roachpb" 24 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 25 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 26 "github.com/cockroachdb/cockroach/pkg/sql/physicalplan" 27 "github.com/cockroachdb/cockroach/pkg/sql/rowcontainer" 28 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 29 "github.com/cockroachdb/cockroach/pkg/sql/types" 30 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 31 "github.com/cockroachdb/cockroach/pkg/util/hlc" 32 "github.com/cockroachdb/cockroach/pkg/util/log" 33 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 34 "github.com/cockroachdb/logtags" 35 ) 36 37 // RowResultWriter is a thin wrapper around a RowContainer. 38 type RowResultWriter struct { 39 rowContainer *rowcontainer.RowContainer 40 rowsAffected int 41 err error 42 } 43 44 var _ rowResultWriter = &RowResultWriter{} 45 46 // NewRowResultWriter creates a new RowResultWriter. 47 func NewRowResultWriter(rowContainer *rowcontainer.RowContainer) *RowResultWriter { 48 return &RowResultWriter{rowContainer: rowContainer} 49 } 50 51 // IncrementRowsAffected implements the rowResultWriter interface. 52 func (b *RowResultWriter) IncrementRowsAffected(n int) { 53 b.rowsAffected += n 54 } 55 56 // AddRow implements the rowResultWriter interface. 57 func (b *RowResultWriter) AddRow(ctx context.Context, row tree.Datums) error { 58 _, err := b.rowContainer.AddRow(ctx, row) 59 return err 60 } 61 62 // SetError is part of the rowResultWriter interface. 63 func (b *RowResultWriter) SetError(err error) { 64 b.err = err 65 } 66 67 // Err is part of the rowResultWriter interface. 68 func (b *RowResultWriter) Err() error { 69 return b.err 70 } 71 72 // callbackResultWriter is a rowResultWriter that runs a callback function 73 // on AddRow. 74 type callbackResultWriter struct { 75 fn func(ctx context.Context, row tree.Datums) error 76 rowsAffected int 77 err error 78 } 79 80 var _ rowResultWriter = &callbackResultWriter{} 81 82 // newCallbackResultWriter creates a new callbackResultWriter. 83 func newCallbackResultWriter( 84 fn func(ctx context.Context, row tree.Datums) error, 85 ) *callbackResultWriter { 86 return &callbackResultWriter{fn: fn} 87 } 88 89 func (c *callbackResultWriter) IncrementRowsAffected(n int) { 90 c.rowsAffected += n 91 } 92 93 func (c *callbackResultWriter) AddRow(ctx context.Context, row tree.Datums) error { 94 return c.fn(ctx, row) 95 } 96 97 func (c *callbackResultWriter) SetError(err error) { 98 c.err = err 99 } 100 101 func (c *callbackResultWriter) Err() error { 102 return c.err 103 } 104 105 func (dsp *DistSQLPlanner) setupAllNodesPlanning( 106 ctx context.Context, evalCtx *extendedEvalContext, execCfg *ExecutorConfig, 107 ) (*PlanningCtx, []roachpb.NodeID, error) { 108 planCtx := dsp.NewPlanningCtx(ctx, evalCtx, nil /* txn */, true /* distribute */) 109 110 ss, err := execCfg.StatusServer.OptionalErr(47900) 111 if err != nil { 112 return nil, nil, err 113 } 114 resp, err := ss.Nodes(ctx, &serverpb.NodesRequest{}) 115 if err != nil { 116 return nil, nil, err 117 } 118 // Because we're not going through the normal pathways, we have to set up the 119 // planCtx.NodeStatuses map ourselves. CheckNodeHealthAndVersion() will 120 // populate it. 121 for _, node := range resp.Nodes { 122 _ /* NodeStautus */ = dsp.CheckNodeHealthAndVersion(planCtx, node.Desc.NodeID) 123 } 124 nodes := make([]roachpb.NodeID, 0, len(planCtx.NodeStatuses)) 125 for nodeID := range planCtx.NodeStatuses { 126 nodes = append(nodes, nodeID) 127 } 128 // Shuffle node order so that multiple IMPORTs done in parallel will not 129 // identically schedule CSV reading. For example, if there are 3 nodes and 4 130 // files, the first node will get 2 files while the other nodes will each get 1 131 // file. Shuffling will make that first node random instead of always the same. 132 rand.Shuffle(len(nodes), func(i, j int) { 133 nodes[i], nodes[j] = nodes[j], nodes[i] 134 }) 135 return planCtx, nodes, nil 136 } 137 138 func makeImportReaderSpecs( 139 job *jobs.Job, 140 tables map[string]*execinfrapb.ReadImportDataSpec_ImportTable, 141 from []string, 142 format roachpb.IOFileFormat, 143 nodes []roachpb.NodeID, 144 walltime int64, 145 ) []*execinfrapb.ReadImportDataSpec { 146 147 // For each input file, assign it to a node. 148 inputSpecs := make([]*execinfrapb.ReadImportDataSpec, 0, len(nodes)) 149 progress := job.Progress() 150 importProgress := progress.GetImport() 151 for i, input := range from { 152 // Round robin assign CSV files to nodes. Files 0 through len(nodes)-1 153 // creates the spec. Future files just add themselves to the Uris. 154 if i < len(nodes) { 155 spec := &execinfrapb.ReadImportDataSpec{ 156 Tables: tables, 157 Format: format, 158 Progress: execinfrapb.JobProgress{ 159 JobID: *job.ID(), 160 Slot: int32(i), 161 }, 162 WalltimeNanos: walltime, 163 Uri: make(map[int32]string), 164 ResumePos: make(map[int32]int64), 165 } 166 inputSpecs = append(inputSpecs, spec) 167 } 168 n := i % len(nodes) 169 inputSpecs[n].Uri[int32(i)] = input 170 if importProgress.ResumePos != nil { 171 inputSpecs[n].ResumePos[int32(i)] = importProgress.ResumePos[int32(i)] 172 } 173 } 174 175 for i := range inputSpecs { 176 // TODO(mjibson): using the actual file sizes here would improve progress 177 // accuracy. 178 inputSpecs[i].Progress.Contribution = float32(len(inputSpecs[i].Uri)) / float32(len(from)) 179 } 180 return inputSpecs 181 } 182 183 func presplitTableBoundaries( 184 ctx context.Context, 185 cfg *ExecutorConfig, 186 tables map[string]*execinfrapb.ReadImportDataSpec_ImportTable, 187 ) error { 188 expirationTime := cfg.DB.Clock().Now().Add(time.Hour.Nanoseconds(), 0) 189 for _, tbl := range tables { 190 for _, span := range tbl.Desc.AllIndexSpans(cfg.Codec) { 191 if err := cfg.DB.AdminSplit(ctx, span.Key, span.Key, expirationTime); err != nil { 192 return err 193 } 194 195 log.VEventf(ctx, 1, "scattering index range %s", span.Key) 196 scatterReq := &roachpb.AdminScatterRequest{ 197 RequestHeader: roachpb.RequestHeaderFromSpan(span), 198 } 199 if _, pErr := kv.SendWrapped(ctx, cfg.DB.NonTransactionalSender(), scatterReq); pErr != nil { 200 log.Errorf(ctx, "failed to scatter span %s: %s", span.Key, pErr) 201 } 202 } 203 } 204 return nil 205 } 206 207 // DistIngest is used by IMPORT to run a DistSQL flow to ingest data by starting 208 // reader processes on many nodes that each read and ingest their assigned files 209 // and then send back a summary of what they ingested. The combined summary is 210 // returned. 211 func DistIngest( 212 ctx context.Context, 213 phs PlanHookState, 214 job *jobs.Job, 215 tables map[string]*execinfrapb.ReadImportDataSpec_ImportTable, 216 from []string, 217 format roachpb.IOFileFormat, 218 walltime int64, 219 alwaysFlushProgress bool, 220 ) (roachpb.BulkOpSummary, error) { 221 ctx = logtags.AddTag(ctx, "import-distsql-ingest", nil) 222 223 dsp := phs.DistSQLPlanner() 224 evalCtx := phs.ExtendedEvalContext() 225 226 planCtx, nodes, err := dsp.setupAllNodesPlanning(ctx, evalCtx, phs.ExecCfg()) 227 if err != nil { 228 return roachpb.BulkOpSummary{}, err 229 } 230 231 inputSpecs := makeImportReaderSpecs(job, tables, from, format, nodes, walltime) 232 233 var p PhysicalPlan 234 235 // Setup a one-stage plan with one proc per input spec. 236 stageID := p.NewStageID() 237 p.ResultRouters = make([]physicalplan.ProcessorIdx, len(inputSpecs)) 238 for i, rcs := range inputSpecs { 239 proc := physicalplan.Processor{ 240 Node: nodes[i], 241 Spec: execinfrapb.ProcessorSpec{ 242 Core: execinfrapb.ProcessorCoreUnion{ReadImport: rcs}, 243 Output: []execinfrapb.OutputRouterSpec{{Type: execinfrapb.OutputRouterSpec_PASS_THROUGH}}, 244 StageID: stageID, 245 }, 246 } 247 pIdx := p.AddProcessor(proc) 248 p.ResultRouters[i] = pIdx 249 } 250 251 // The direct-ingest readers will emit a binary encoded BulkOpSummary. 252 p.PlanToStreamColMap = []int{0, 1} 253 p.ResultTypes = []*types.T{types.Bytes, types.Bytes} 254 255 dsp.FinalizePlan(planCtx, &p) 256 257 if err := job.FractionProgressed(ctx, 258 func(ctx context.Context, details jobspb.ProgressDetails) float32 { 259 prog := details.(*jobspb.Progress_Import).Import 260 prog.ReadProgress = make([]float32, len(from)) 261 prog.ResumePos = make([]int64, len(from)) 262 return 0.0 263 }, 264 ); err != nil { 265 return roachpb.BulkOpSummary{}, err 266 } 267 268 rowProgress := make([]int64, len(from)) 269 fractionProgress := make([]uint32, len(from)) 270 271 updateJobProgress := func() error { 272 return job.FractionProgressed(ctx, 273 func(ctx context.Context, details jobspb.ProgressDetails) float32 { 274 var overall float32 275 prog := details.(*jobspb.Progress_Import).Import 276 for i := range rowProgress { 277 prog.ResumePos[i] = atomic.LoadInt64(&rowProgress[i]) 278 } 279 for i := range fractionProgress { 280 fileProgress := math.Float32frombits(atomic.LoadUint32(&fractionProgress[i])) 281 prog.ReadProgress[i] = fileProgress 282 overall += fileProgress 283 } 284 return overall / float32(len(from)) 285 }, 286 ) 287 } 288 289 metaFn := func(_ context.Context, meta *execinfrapb.ProducerMetadata) error { 290 if meta.BulkProcessorProgress != nil { 291 for i, v := range meta.BulkProcessorProgress.ResumePos { 292 atomic.StoreInt64(&rowProgress[i], v) 293 } 294 for i, v := range meta.BulkProcessorProgress.CompletedFraction { 295 atomic.StoreUint32(&fractionProgress[i], math.Float32bits(v)) 296 } 297 298 if alwaysFlushProgress { 299 return updateJobProgress() 300 } 301 } 302 return nil 303 } 304 305 var res roachpb.BulkOpSummary 306 rowResultWriter := newCallbackResultWriter(func(ctx context.Context, row tree.Datums) error { 307 var counts roachpb.BulkOpSummary 308 if err := protoutil.Unmarshal([]byte(*row[0].(*tree.DBytes)), &counts); err != nil { 309 return err 310 } 311 res.Add(counts) 312 return nil 313 }) 314 315 if err := presplitTableBoundaries(ctx, phs.ExecCfg(), tables); err != nil { 316 return roachpb.BulkOpSummary{}, err 317 } 318 319 recv := MakeDistSQLReceiver( 320 ctx, 321 &metadataCallbackWriter{rowResultWriter: rowResultWriter, fn: metaFn}, 322 tree.Rows, 323 nil, /* rangeCache */ 324 nil, /* leaseCache */ 325 nil, /* txn - the flow does not read or write the database */ 326 func(ts hlc.Timestamp) {}, 327 evalCtx.Tracing, 328 ) 329 defer recv.Release() 330 331 stopProgress := make(chan struct{}) 332 g := ctxgroup.WithContext(ctx) 333 g.GoCtx(func(ctx context.Context) error { 334 tick := time.NewTicker(time.Second * 10) 335 defer tick.Stop() 336 done := ctx.Done() 337 for { 338 select { 339 case <-stopProgress: 340 return nil 341 case <-done: 342 return ctx.Err() 343 case <-tick.C: 344 if err := updateJobProgress(); err != nil { 345 return err 346 } 347 } 348 } 349 }) 350 351 g.GoCtx(func(ctx context.Context) error { 352 defer close(stopProgress) 353 // Copy the evalCtx, as dsp.Run() might change it. 354 evalCtxCopy := *evalCtx 355 dsp.Run(planCtx, nil, &p, recv, &evalCtxCopy, nil /* finishedSetupFn */)() 356 return rowResultWriter.Err() 357 }) 358 359 if err := g.Wait(); err != nil { 360 return roachpb.BulkOpSummary{}, err 361 } 362 363 return res, nil 364 }