github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/kvfeed/kv_feed.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 // Package kvfeed provides an abstraction to stream kvs to a buffer. 10 // 11 // The kvfeed coordinated performing logical backfills in the face of schema 12 // changes and then running rangefeeds. 13 package kvfeed 14 15 import ( 16 "context" 17 "fmt" 18 19 "github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/changefeedbase" 20 "github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/schemafeed" 21 "github.com/cockroachdb/cockroach/pkg/gossip" 22 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 23 "github.com/cockroachdb/cockroach/pkg/kv" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 27 "github.com/cockroachdb/cockroach/pkg/sql/catalog/lease" 28 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 29 "github.com/cockroachdb/cockroach/pkg/util/hlc" 30 "github.com/cockroachdb/cockroach/pkg/util/log" 31 "github.com/cockroachdb/cockroach/pkg/util/mon" 32 "github.com/cockroachdb/cockroach/pkg/util/span" 33 "github.com/cockroachdb/errors" 34 ) 35 36 // Config configures a kvfeed. 37 type Config struct { 38 Settings *cluster.Settings 39 DB *kv.DB 40 Clock *hlc.Clock 41 Gossip gossip.DeprecatedGossip 42 Spans []roachpb.Span 43 Targets jobspb.ChangefeedTargets 44 Sink EventBufferWriter 45 LeaseMgr *lease.Manager 46 Metrics *Metrics 47 MM *mon.BytesMonitor 48 WithDiff bool 49 SchemaChangeEvents changefeedbase.SchemaChangeEventClass 50 SchemaChangePolicy changefeedbase.SchemaChangePolicy 51 52 // If true, the feed will begin with a dump of data at exactly the 53 // InitialHighWater. This is a peculiar behavior. In general the 54 // InitialHighWater is a point in time at which all data is known to have 55 // been seen. 56 NeedsInitialScan bool 57 58 // InitialHighWater is the timestamp from which new events are guaranteed to 59 // be produced. 60 InitialHighWater hlc.Timestamp 61 } 62 63 // Run will run the kvfeed. The feed runs synchronously and returns an 64 // error when it finishes. 65 func Run(ctx context.Context, cfg Config) error { 66 g := ctxgroup.WithContext(ctx) 67 var sf schemaFeed 68 { 69 rawSF := schemafeed.New(makeTablefeedConfig(cfg)) 70 // Start polling the schemafeed, which must be done concurrently with 71 // the individual rangefeed routines. 72 g.GoCtx(rawSF.Run) 73 sf = rawSF 74 } 75 var sc kvScanner 76 { 77 sc = &scanRequestScanner{ 78 settings: cfg.Settings, 79 gossip: cfg.Gossip, 80 db: cfg.DB, 81 } 82 } 83 var pff physicalFeedFactory 84 { 85 sender := cfg.DB.NonTransactionalSender() 86 distSender := sender.(*kv.CrossRangeTxnWrapperSender).Wrapped().(*kvcoord.DistSender) 87 pff = rangefeedFactory(distSender.RangeFeed) 88 } 89 bf := func() EventBuffer { 90 return makeMemBuffer(cfg.MM.MakeBoundAccount(), cfg.Metrics) 91 } 92 f := newKVFeed( 93 cfg.Sink, cfg.Spans, 94 cfg.SchemaChangeEvents, cfg.SchemaChangePolicy, 95 cfg.NeedsInitialScan, cfg.WithDiff, 96 cfg.InitialHighWater, 97 sf, sc, pff, bf) 98 g.GoCtx(f.run) 99 err := g.Wait() 100 // NB: The higher layers of the changefeed should detect the boundary and the 101 // policy and tear everything down. Returning before the higher layers tear 102 // down the changefeed exposes synchronization challenges. 103 var scErr schemaChangeDetectedError 104 if errors.As(err, &scErr) { 105 log.Infof(ctx, "stopping changefeed due to schema change at %v", scErr.ts) 106 <-ctx.Done() 107 err = nil 108 } 109 return err 110 } 111 112 // schemaChangeDetectedError is a sentinel error to indicate to Run() that the 113 // schema change is stopping due to a schema change. This is handy to trigger 114 // the context group to stop; the error is handled entirely in this package. 115 type schemaChangeDetectedError struct { 116 ts hlc.Timestamp 117 } 118 119 func (e schemaChangeDetectedError) Error() string { 120 return fmt.Sprintf("schema change deteceted at %v", e.ts) 121 } 122 123 type schemaFeed interface { 124 Peek(ctx context.Context, atOrBefore hlc.Timestamp) (events []schemafeed.TableEvent, err error) 125 Pop(ctx context.Context, atOrBefore hlc.Timestamp) (events []schemafeed.TableEvent, err error) 126 } 127 128 type kvFeed struct { 129 spans []roachpb.Span 130 withDiff bool 131 withInitialBackfill bool 132 initialHighWater hlc.Timestamp 133 sink EventBufferWriter 134 135 schemaChangeEvents changefeedbase.SchemaChangeEventClass 136 schemaChangePolicy changefeedbase.SchemaChangePolicy 137 138 // These dependencies are made available for test injection. 139 bufferFactory func() EventBuffer 140 tableFeed schemaFeed 141 scanner kvScanner 142 physicalFeed physicalFeedFactory 143 } 144 145 func newKVFeed( 146 sink EventBufferWriter, 147 spans []roachpb.Span, 148 schemaChangeEvents changefeedbase.SchemaChangeEventClass, 149 schemaChangePolicy changefeedbase.SchemaChangePolicy, 150 withInitialBackfill, withDiff bool, 151 initialHighWater hlc.Timestamp, 152 tf schemaFeed, 153 sc kvScanner, 154 pff physicalFeedFactory, 155 bf func() EventBuffer, 156 ) *kvFeed { 157 return &kvFeed{ 158 sink: sink, 159 spans: spans, 160 withInitialBackfill: withInitialBackfill, 161 withDiff: withDiff, 162 initialHighWater: initialHighWater, 163 schemaChangeEvents: schemaChangeEvents, 164 schemaChangePolicy: schemaChangePolicy, 165 tableFeed: tf, 166 scanner: sc, 167 physicalFeed: pff, 168 bufferFactory: bf, 169 } 170 } 171 172 func (f *kvFeed) run(ctx context.Context) (err error) { 173 // highWater represents the point in time at or before which we know 174 // we've seen all events or is the initial starting time of the feed. 175 highWater := f.initialHighWater 176 for i := 0; ; i++ { 177 initialScan := i == 0 178 if err = f.scanIfShould(ctx, initialScan, highWater); err != nil { 179 return err 180 } 181 highWater, err = f.runUntilTableEvent(ctx, highWater) 182 if err != nil { 183 return err 184 } 185 186 // Resolve all of the spans as a boundary if the policy indicates that 187 // we should do so. 188 if f.schemaChangePolicy != changefeedbase.OptSchemaChangePolicyNoBackfill { 189 for _, span := range f.spans { 190 if err := f.sink.AddResolved(ctx, span, highWater, true); err != nil { 191 return err 192 } 193 } 194 } 195 // Exit if the policy says we should. 196 if f.schemaChangePolicy == changefeedbase.OptSchemaChangePolicyStop { 197 return schemaChangeDetectedError{highWater.Next()} 198 } 199 } 200 } 201 202 func (f *kvFeed) scanIfShould( 203 ctx context.Context, initialScan bool, highWater hlc.Timestamp, 204 ) error { 205 scanTime := highWater.Next() 206 events, err := f.tableFeed.Peek(ctx, scanTime) 207 if err != nil { 208 return err 209 } 210 // This off-by-one is a little weird. It says that if you create a changefeed 211 // at some statement time then you're going to get the table as of that statement 212 // time with an initial backfill but if you use a cursor then you will get the 213 // updates after that timestamp. 214 isInitialScan := initialScan && f.withInitialBackfill 215 if isInitialScan { 216 scanTime = highWater 217 } else if len(events) > 0 { 218 // TODO(ajwerner): In this case we should only backfill for the tables 219 // which have events which may not be all of the targets. 220 for _, ev := range events { 221 if !scanTime.Equal(ev.After.ModificationTime) { 222 log.Fatalf(ctx, "found event in shouldScan which did not occur at the scan time %v: %v", 223 scanTime, ev) 224 } 225 } 226 } else { 227 return nil 228 } 229 230 // Consume the events up to scanTime. 231 if _, err := f.tableFeed.Pop(ctx, scanTime); err != nil { 232 return err 233 } 234 235 if !isInitialScan && f.schemaChangePolicy == changefeedbase.OptSchemaChangePolicyNoBackfill { 236 return nil 237 } 238 239 if err := f.scanner.Scan(ctx, f.sink, physicalConfig{ 240 Spans: f.spans, 241 Timestamp: scanTime, 242 WithDiff: !isInitialScan && f.withDiff, 243 }); err != nil { 244 return err 245 } 246 247 // NB: We don't update the highwater even though we've technically seen all 248 // events for all spans at the previous highwater.Next(). We choose not to 249 // because doing so would be wrong once we only backfill some tables. 250 return nil 251 } 252 253 func (f *kvFeed) runUntilTableEvent( 254 ctx context.Context, startFrom hlc.Timestamp, 255 ) (resolvedUpTo hlc.Timestamp, err error) { 256 // Determine whether to request the previous value of each update from 257 // RangeFeed based on whether the `diff` option is specified. 258 if _, err := f.tableFeed.Peek(ctx, startFrom); err != nil { 259 return hlc.Timestamp{}, err 260 } 261 262 memBuf := f.bufferFactory() 263 defer memBuf.Close(ctx) 264 265 g := ctxgroup.WithContext(ctx) 266 physicalCfg := physicalConfig{Spans: f.spans, Timestamp: startFrom, WithDiff: f.withDiff} 267 g.GoCtx(func(ctx context.Context) error { 268 return copyFromSourceToSinkUntilTableEvent(ctx, f.sink, memBuf, physicalCfg, f.tableFeed) 269 }) 270 g.GoCtx(func(ctx context.Context) error { 271 return f.physicalFeed.Run(ctx, memBuf, physicalCfg) 272 }) 273 274 // TODO(mrtracy): We are currently tearing down the entire rangefeed set in 275 // order to perform a scan; however, given that we have an intermediate 276 // buffer, its seems that we could do this without having to destroy and 277 // recreate the rangefeeds. 278 err = g.Wait() 279 if err == nil { 280 log.Fatalf(ctx, "feed exited with no error and no scan boundary") 281 return hlc.Timestamp{}, nil // unreachable 282 } else if tErr := (*errBoundaryReached)(nil); errors.As(err, &tErr) { 283 // TODO(ajwerner): iterate the spans and add a Resolved timestamp. 284 // We'll need to do this to ensure that a resolved timestamp propagates 285 // when we're trying to exit. 286 return tErr.Timestamp().Prev(), nil 287 } else { 288 return hlc.Timestamp{}, err 289 } 290 } 291 292 type errBoundaryReached struct { 293 schemafeed.TableEvent 294 } 295 296 func (e *errBoundaryReached) Error() string { 297 return "scan boundary reached: " + e.String() 298 } 299 300 // copyFromSourceToSinkUntilTableEvents will pull read entries from source and 301 // publish them to sink if there is no table event from the schemaFeed. If a 302 // tableEvent occurs then the function will return once all of the spans have 303 // been resolved up to the event. The first such event will be returned as 304 // *errBoundaryReached. A nil error will never be returned. 305 func copyFromSourceToSinkUntilTableEvent( 306 ctx context.Context, 307 sink EventBufferWriter, 308 source EventBufferReader, 309 cfg physicalConfig, 310 tables schemaFeed, 311 ) error { 312 // Maintain a local spanfrontier to tell when all the component rangefeeds 313 // being watched have reached the Scan boundary. 314 frontier := span.MakeFrontier(cfg.Spans...) 315 for _, span := range cfg.Spans { 316 frontier.Forward(span, cfg.Timestamp) 317 } 318 var ( 319 scanBoundary *errBoundaryReached 320 checkForScanBoundary = func(ts hlc.Timestamp) error { 321 if scanBoundary != nil { 322 return nil 323 } 324 nextEvents, err := tables.Peek(ctx, ts) 325 if err != nil { 326 return err 327 } 328 if len(nextEvents) > 0 { 329 scanBoundary = &errBoundaryReached{nextEvents[0]} 330 } 331 return nil 332 } 333 applyScanBoundary = func(e Event) (skipEvent, reachedBoundary bool) { 334 if scanBoundary == nil { 335 return false, false 336 } 337 if e.Timestamp().Less(scanBoundary.Timestamp()) { 338 return false, false 339 } 340 switch e.Type() { 341 case KVEvent: 342 return true, false 343 case ResolvedEvent: 344 boundaryResolvedTimestamp := scanBoundary.Timestamp().Prev() 345 resolved := e.Resolved() 346 if resolved.Timestamp.LessEq(boundaryResolvedTimestamp) { 347 return false, false 348 } 349 frontier.Forward(resolved.Span, boundaryResolvedTimestamp) 350 return true, frontier.Frontier() == boundaryResolvedTimestamp 351 default: 352 log.Fatal(ctx, "unknown event type") 353 return false, false 354 } 355 } 356 addEntry = func(e Event) error { 357 switch e.Type() { 358 case KVEvent: 359 return sink.AddKV(ctx, e.KV(), e.PrevValue(), e.BackfillTimestamp()) 360 case ResolvedEvent: 361 // TODO(ajwerner): technically this doesn't need to happen for most 362 // events - we just need to make sure we forward for events which are 363 // at scanBoundary.Prev(). We may not yet know about that scanBoundary. 364 // The logic currently doesn't make this clean. 365 resolved := e.Resolved() 366 frontier.Forward(resolved.Span, resolved.Timestamp) 367 return sink.AddResolved(ctx, resolved.Span, resolved.Timestamp, false) 368 default: 369 log.Fatal(ctx, "unknown event type") 370 return nil 371 } 372 } 373 ) 374 for { 375 e, err := source.Get(ctx) 376 if err != nil { 377 return err 378 } 379 if err := checkForScanBoundary(e.Timestamp()); err != nil { 380 return err 381 } 382 skipEntry, scanBoundaryReached := applyScanBoundary(e) 383 if scanBoundaryReached { 384 // All component rangefeeds are now at the boundary. 385 // Break out of the ctxgroup by returning the sentinel error. 386 return scanBoundary 387 } 388 if skipEntry { 389 continue 390 } 391 if err := addEntry(e); err != nil { 392 return err 393 } 394 } 395 } 396 397 func makeTablefeedConfig(cfg Config) schemafeed.Config { 398 return schemafeed.Config{ 399 DB: cfg.DB, 400 Clock: cfg.Clock, 401 Settings: cfg.Settings, 402 Targets: cfg.Targets, 403 LeaseManager: cfg.LeaseMgr, 404 SchemaChangeEvents: cfg.SchemaChangeEvents, 405 InitialHighWater: cfg.InitialHighWater, 406 } 407 }