github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/changefeed_dist.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package changefeedccl 10 11 import ( 12 "context" 13 14 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 15 "github.com/cockroachdb/cockroach/pkg/keys" 16 "github.com/cockroachdb/cockroach/pkg/kv" 17 "github.com/cockroachdb/cockroach/pkg/roachpb" 18 "github.com/cockroachdb/cockroach/pkg/sql" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 20 "github.com/cockroachdb/cockroach/pkg/sql/physicalplan" 21 "github.com/cockroachdb/cockroach/pkg/sql/rowexec" 22 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 24 "github.com/cockroachdb/cockroach/pkg/sql/types" 25 "github.com/cockroachdb/cockroach/pkg/util/hlc" 26 ) 27 28 func init() { 29 rowexec.NewChangeAggregatorProcessor = newChangeAggregatorProcessor 30 rowexec.NewChangeFrontierProcessor = newChangeFrontierProcessor 31 } 32 33 const ( 34 changeAggregatorProcName = `changeagg` 35 changeFrontierProcName = `changefntr` 36 ) 37 38 var changefeedResultTypes = []*types.T{ 39 types.Bytes, // resolved span 40 types.String, // topic 41 types.Bytes, // key 42 types.Bytes, // value 43 } 44 45 // distChangefeedFlow plans and runs a distributed changefeed. 46 // 47 // One or more ChangeAggregator processors watch table data for changes. These 48 // transform the changed kvs into changed rows and either emit them to a sink 49 // (such as kafka) or, if there is no sink, forward them in columns 1,2,3 (where 50 // they will be eventually returned directly via pgwire). In either case, 51 // periodically a span will become resolved as of some timestamp, meaning that 52 // no new rows will ever be emitted at or below that timestamp. These span-level 53 // resolved timestamps are emitted as a marshaled `jobspb.ResolvedSpan` proto in 54 // column 0. 55 // 56 // The flow will always have exactly one ChangeFrontier processor which all the 57 // ChangeAggregators feed into. It collects all span-level resolved timestamps 58 // and aggregates them into a changefeed-level resolved timestamp, which is the 59 // minimum of the span-level resolved timestamps. This changefeed-level resolved 60 // timestamp is emitted into the changefeed sink (or returned to the gateway if 61 // there is no sink) whenever it advances. ChangeFrontier also updates the 62 // progress of the changefeed's corresponding system job. 63 func distChangefeedFlow( 64 ctx context.Context, 65 phs sql.PlanHookState, 66 jobID int64, 67 details jobspb.ChangefeedDetails, 68 progress jobspb.Progress, 69 resultsCh chan<- tree.Datums, 70 ) error { 71 var err error 72 details, err = validateDetails(details) 73 if err != nil { 74 return err 75 } 76 77 // NB: A non-empty high water indicates that we have checkpointed a resolved 78 // timestamp. Skipping the initial scan is equivalent to starting the 79 // changefeed from a checkpoint at its start time. Initialize the progress 80 // based on whether we should perform an initial scan. 81 { 82 h := progress.GetHighWater() 83 noHighWater := (h == nil || *h == (hlc.Timestamp{})) 84 // We want to set the highWater and thus avoid an initial scan if either 85 // this is a cursor and there was no request for one, or we don't have a 86 // cursor but we have a request to not have an initial scan. 87 if noHighWater && !initialScanFromOptions(details.Opts) { 88 // If there is a cursor, the statement time has already been set to it. 89 progress.Progress = &jobspb.Progress_HighWater{HighWater: &details.StatementTime} 90 } 91 } 92 93 spansTS := details.StatementTime 94 var initialHighWater hlc.Timestamp 95 if h := progress.GetHighWater(); h != nil && *h != (hlc.Timestamp{}) { 96 initialHighWater = *h 97 // If we have a high-water set, use it to compute the spans, since the 98 // ones at the statement time may have been garbage collected by now. 99 spansTS = initialHighWater 100 } 101 102 execCfg := phs.ExecCfg() 103 trackedSpans, err := fetchSpansForTargets(ctx, execCfg.DB, execCfg.Codec, details.Targets, spansTS) 104 if err != nil { 105 return err 106 } 107 108 // Changefeed flows handle transactional consistency themselves. 109 var noTxn *kv.Txn 110 gatewayNodeID, err := execCfg.NodeID.OptionalNodeIDErr(48274) 111 if err != nil { 112 return err 113 } 114 dsp := phs.DistSQLPlanner() 115 evalCtx := phs.ExtendedEvalContext() 116 planCtx := dsp.NewPlanningCtx(ctx, evalCtx, noTxn, true /* distribute */) 117 118 var spanPartitions []sql.SpanPartition 119 if details.SinkURI == `` { 120 // Sinkless feeds get one ChangeAggregator on the gateway. 121 spanPartitions = []sql.SpanPartition{{Node: gatewayNodeID, Spans: trackedSpans}} 122 } else { 123 // All other feeds get a ChangeAggregator local on the leaseholder. 124 spanPartitions, err = dsp.PartitionSpans(planCtx, trackedSpans) 125 if err != nil { 126 return err 127 } 128 } 129 130 changeAggregatorProcs := make([]physicalplan.Processor, 0, len(spanPartitions)) 131 for _, sp := range spanPartitions { 132 // TODO(dan): Merge these watches with the span-level resolved 133 // timestamps from the job progress. 134 watches := make([]execinfrapb.ChangeAggregatorSpec_Watch, len(sp.Spans)) 135 for i, nodeSpan := range sp.Spans { 136 watches[i] = execinfrapb.ChangeAggregatorSpec_Watch{ 137 Span: nodeSpan, 138 InitialResolved: initialHighWater, 139 } 140 } 141 142 changeAggregatorProcs = append(changeAggregatorProcs, physicalplan.Processor{ 143 Node: sp.Node, 144 Spec: execinfrapb.ProcessorSpec{ 145 Core: execinfrapb.ProcessorCoreUnion{ 146 ChangeAggregator: &execinfrapb.ChangeAggregatorSpec{ 147 Watches: watches, 148 Feed: details, 149 }, 150 }, 151 Output: []execinfrapb.OutputRouterSpec{{Type: execinfrapb.OutputRouterSpec_PASS_THROUGH}}, 152 }, 153 }) 154 } 155 // NB: This SpanFrontier processor depends on the set of tracked spans being 156 // static. Currently there is no way for them to change after the changefeed 157 // is created, even if it is paused and unpaused, but #28982 describes some 158 // ways that this might happen in the future. 159 changeFrontierSpec := execinfrapb.ChangeFrontierSpec{ 160 TrackedSpans: trackedSpans, 161 Feed: details, 162 JobID: jobID, 163 } 164 165 var p sql.PhysicalPlan 166 167 stageID := p.NewStageID() 168 p.ResultRouters = make([]physicalplan.ProcessorIdx, len(changeAggregatorProcs)) 169 for i, proc := range changeAggregatorProcs { 170 proc.Spec.StageID = stageID 171 pIdx := p.AddProcessor(proc) 172 p.ResultRouters[i] = pIdx 173 } 174 175 p.AddSingleGroupStage( 176 gatewayNodeID, 177 execinfrapb.ProcessorCoreUnion{ChangeFrontier: &changeFrontierSpec}, 178 execinfrapb.PostProcessSpec{}, 179 changefeedResultTypes, 180 ) 181 182 p.ResultTypes = changefeedResultTypes 183 p.PlanToStreamColMap = []int{1, 2, 3} 184 dsp.FinalizePlan(planCtx, &p) 185 186 resultRows := makeChangefeedResultWriter(resultsCh) 187 recv := sql.MakeDistSQLReceiver( 188 ctx, 189 resultRows, 190 tree.Rows, 191 execCfg.RangeDescriptorCache, 192 execCfg.LeaseHolderCache, 193 noTxn, 194 func(ts hlc.Timestamp) {}, 195 evalCtx.Tracing, 196 ) 197 defer recv.Release() 198 199 var finishedSetupFn func() 200 if details.SinkURI != `` { 201 // We abuse the job's results channel to make CREATE CHANGEFEED wait for 202 // this before returning to the user to ensure the setup went okay. Job 203 // resumption doesn't have the same hack, but at the moment ignores 204 // results and so is currently okay. Return nil instead of anything 205 // meaningful so that if we start doing anything with the results 206 // returned by resumed jobs, then it breaks instead of returning 207 // nonsense. 208 finishedSetupFn = func() { resultsCh <- tree.Datums(nil) } 209 } 210 211 // Copy the evalCtx, as dsp.Run() might change it. 212 evalCtxCopy := *evalCtx 213 dsp.Run(planCtx, noTxn, &p, recv, &evalCtxCopy, finishedSetupFn)() 214 return resultRows.Err() 215 } 216 217 func fetchSpansForTargets( 218 ctx context.Context, 219 db *kv.DB, 220 codec keys.SQLCodec, 221 targets jobspb.ChangefeedTargets, 222 ts hlc.Timestamp, 223 ) ([]roachpb.Span, error) { 224 var spans []roachpb.Span 225 err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 226 spans = nil 227 txn.SetFixedTimestamp(ctx, ts) 228 // Note that all targets are currently guaranteed to be tables. 229 for tableID := range targets { 230 tableDesc, err := sqlbase.GetTableDescFromID(ctx, txn, codec, tableID) 231 if err != nil { 232 return err 233 } 234 spans = append(spans, tableDesc.PrimaryIndexSpan(codec)) 235 } 236 return nil 237 }) 238 return spans, err 239 } 240 241 // changefeedResultWriter implements the `rowexec.resultWriter` that sends 242 // the received rows back over the given channel. 243 type changefeedResultWriter struct { 244 rowsCh chan<- tree.Datums 245 rowsAffected int 246 err error 247 } 248 249 func makeChangefeedResultWriter(rowsCh chan<- tree.Datums) *changefeedResultWriter { 250 return &changefeedResultWriter{rowsCh: rowsCh} 251 } 252 253 func (w *changefeedResultWriter) AddRow(ctx context.Context, row tree.Datums) error { 254 // Copy the row because it's not guaranteed to exist after this function 255 // returns. 256 row = append(tree.Datums(nil), row...) 257 258 select { 259 case <-ctx.Done(): 260 return ctx.Err() 261 case w.rowsCh <- row: 262 return nil 263 } 264 } 265 func (w *changefeedResultWriter) IncrementRowsAffected(n int) { 266 w.rowsAffected += n 267 } 268 func (w *changefeedResultWriter) SetError(err error) { 269 w.err = err 270 } 271 func (w *changefeedResultWriter) Err() error { 272 return w.err 273 }