github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/kvfeed/kv_feed.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  // Package kvfeed provides an abstraction to stream kvs to a buffer.
    10  //
    11  // The kvfeed coordinated performing logical backfills in the face of schema
    12  // changes and then running rangefeeds.
    13  package kvfeed
    14  
    15  import (
    16  	"context"
    17  	"fmt"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/changefeedbase"
    20  	"github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/schemafeed"
    21  	"github.com/cockroachdb/cockroach/pkg/gossip"
    22  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    23  	"github.com/cockroachdb/cockroach/pkg/kv"
    24  	"github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
    25  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    26  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/lease"
    28  	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
    29  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    30  	"github.com/cockroachdb/cockroach/pkg/util/log"
    31  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    32  	"github.com/cockroachdb/cockroach/pkg/util/span"
    33  	"github.com/cockroachdb/errors"
    34  )
    35  
    36  // Config configures a kvfeed.
    37  type Config struct {
    38  	Settings           *cluster.Settings
    39  	DB                 *kv.DB
    40  	Clock              *hlc.Clock
    41  	Gossip             gossip.DeprecatedGossip
    42  	Spans              []roachpb.Span
    43  	Targets            jobspb.ChangefeedTargets
    44  	Sink               EventBufferWriter
    45  	LeaseMgr           *lease.Manager
    46  	Metrics            *Metrics
    47  	MM                 *mon.BytesMonitor
    48  	WithDiff           bool
    49  	SchemaChangeEvents changefeedbase.SchemaChangeEventClass
    50  	SchemaChangePolicy changefeedbase.SchemaChangePolicy
    51  
    52  	// If true, the feed will begin with a dump of data at exactly the
    53  	// InitialHighWater. This is a peculiar behavior. In general the
    54  	// InitialHighWater is a point in time at which all data is known to have
    55  	// been seen.
    56  	NeedsInitialScan bool
    57  
    58  	// InitialHighWater is the timestamp from which new events are guaranteed to
    59  	// be produced.
    60  	InitialHighWater hlc.Timestamp
    61  }
    62  
    63  // Run will run the kvfeed. The feed runs synchronously and returns an
    64  // error when it finishes.
    65  func Run(ctx context.Context, cfg Config) error {
    66  	g := ctxgroup.WithContext(ctx)
    67  	var sf schemaFeed
    68  	{
    69  		rawSF := schemafeed.New(makeTablefeedConfig(cfg))
    70  		// Start polling the schemafeed, which must be done concurrently with
    71  		// the individual rangefeed routines.
    72  		g.GoCtx(rawSF.Run)
    73  		sf = rawSF
    74  	}
    75  	var sc kvScanner
    76  	{
    77  		sc = &scanRequestScanner{
    78  			settings: cfg.Settings,
    79  			gossip:   cfg.Gossip,
    80  			db:       cfg.DB,
    81  		}
    82  	}
    83  	var pff physicalFeedFactory
    84  	{
    85  		sender := cfg.DB.NonTransactionalSender()
    86  		distSender := sender.(*kv.CrossRangeTxnWrapperSender).Wrapped().(*kvcoord.DistSender)
    87  		pff = rangefeedFactory(distSender.RangeFeed)
    88  	}
    89  	bf := func() EventBuffer {
    90  		return makeMemBuffer(cfg.MM.MakeBoundAccount(), cfg.Metrics)
    91  	}
    92  	f := newKVFeed(
    93  		cfg.Sink, cfg.Spans,
    94  		cfg.SchemaChangeEvents, cfg.SchemaChangePolicy,
    95  		cfg.NeedsInitialScan, cfg.WithDiff,
    96  		cfg.InitialHighWater,
    97  		sf, sc, pff, bf)
    98  	g.GoCtx(f.run)
    99  	err := g.Wait()
   100  	// NB: The higher layers of the changefeed should detect the boundary and the
   101  	// policy and tear everything down. Returning before the higher layers tear
   102  	// down the changefeed exposes synchronization challenges.
   103  	var scErr schemaChangeDetectedError
   104  	if errors.As(err, &scErr) {
   105  		log.Infof(ctx, "stopping changefeed due to schema change at %v", scErr.ts)
   106  		<-ctx.Done()
   107  		err = nil
   108  	}
   109  	return err
   110  }
   111  
   112  // schemaChangeDetectedError is a sentinel error to indicate to Run() that the
   113  // schema change is stopping due to a schema change. This is handy to trigger
   114  // the context group to stop; the error is handled entirely in this package.
   115  type schemaChangeDetectedError struct {
   116  	ts hlc.Timestamp
   117  }
   118  
   119  func (e schemaChangeDetectedError) Error() string {
   120  	return fmt.Sprintf("schema change deteceted at %v", e.ts)
   121  }
   122  
   123  type schemaFeed interface {
   124  	Peek(ctx context.Context, atOrBefore hlc.Timestamp) (events []schemafeed.TableEvent, err error)
   125  	Pop(ctx context.Context, atOrBefore hlc.Timestamp) (events []schemafeed.TableEvent, err error)
   126  }
   127  
   128  type kvFeed struct {
   129  	spans               []roachpb.Span
   130  	withDiff            bool
   131  	withInitialBackfill bool
   132  	initialHighWater    hlc.Timestamp
   133  	sink                EventBufferWriter
   134  
   135  	schemaChangeEvents changefeedbase.SchemaChangeEventClass
   136  	schemaChangePolicy changefeedbase.SchemaChangePolicy
   137  
   138  	// These dependencies are made available for test injection.
   139  	bufferFactory func() EventBuffer
   140  	tableFeed     schemaFeed
   141  	scanner       kvScanner
   142  	physicalFeed  physicalFeedFactory
   143  }
   144  
   145  func newKVFeed(
   146  	sink EventBufferWriter,
   147  	spans []roachpb.Span,
   148  	schemaChangeEvents changefeedbase.SchemaChangeEventClass,
   149  	schemaChangePolicy changefeedbase.SchemaChangePolicy,
   150  	withInitialBackfill, withDiff bool,
   151  	initialHighWater hlc.Timestamp,
   152  	tf schemaFeed,
   153  	sc kvScanner,
   154  	pff physicalFeedFactory,
   155  	bf func() EventBuffer,
   156  ) *kvFeed {
   157  	return &kvFeed{
   158  		sink:                sink,
   159  		spans:               spans,
   160  		withInitialBackfill: withInitialBackfill,
   161  		withDiff:            withDiff,
   162  		initialHighWater:    initialHighWater,
   163  		schemaChangeEvents:  schemaChangeEvents,
   164  		schemaChangePolicy:  schemaChangePolicy,
   165  		tableFeed:           tf,
   166  		scanner:             sc,
   167  		physicalFeed:        pff,
   168  		bufferFactory:       bf,
   169  	}
   170  }
   171  
   172  func (f *kvFeed) run(ctx context.Context) (err error) {
   173  	// highWater represents the point in time at or before which we know
   174  	// we've seen all events or is the initial starting time of the feed.
   175  	highWater := f.initialHighWater
   176  	for i := 0; ; i++ {
   177  		initialScan := i == 0
   178  		if err = f.scanIfShould(ctx, initialScan, highWater); err != nil {
   179  			return err
   180  		}
   181  		highWater, err = f.runUntilTableEvent(ctx, highWater)
   182  		if err != nil {
   183  			return err
   184  		}
   185  
   186  		// Resolve all of the spans as a boundary if the policy indicates that
   187  		// we should do so.
   188  		if f.schemaChangePolicy != changefeedbase.OptSchemaChangePolicyNoBackfill {
   189  			for _, span := range f.spans {
   190  				if err := f.sink.AddResolved(ctx, span, highWater, true); err != nil {
   191  					return err
   192  				}
   193  			}
   194  		}
   195  		// Exit if the policy says we should.
   196  		if f.schemaChangePolicy == changefeedbase.OptSchemaChangePolicyStop {
   197  			return schemaChangeDetectedError{highWater.Next()}
   198  		}
   199  	}
   200  }
   201  
   202  func (f *kvFeed) scanIfShould(
   203  	ctx context.Context, initialScan bool, highWater hlc.Timestamp,
   204  ) error {
   205  	scanTime := highWater.Next()
   206  	events, err := f.tableFeed.Peek(ctx, scanTime)
   207  	if err != nil {
   208  		return err
   209  	}
   210  	// This off-by-one is a little weird. It says that if you create a changefeed
   211  	// at some statement time then you're going to get the table as of that statement
   212  	// time with an initial backfill but if you use a cursor then you will get the
   213  	// updates after that timestamp.
   214  	isInitialScan := initialScan && f.withInitialBackfill
   215  	if isInitialScan {
   216  		scanTime = highWater
   217  	} else if len(events) > 0 {
   218  		// TODO(ajwerner): In this case we should only backfill for the tables
   219  		// which have events which may not be all of the targets.
   220  		for _, ev := range events {
   221  			if !scanTime.Equal(ev.After.ModificationTime) {
   222  				log.Fatalf(ctx, "found event in shouldScan which did not occur at the scan time %v: %v",
   223  					scanTime, ev)
   224  			}
   225  		}
   226  	} else {
   227  		return nil
   228  	}
   229  
   230  	// Consume the events up to scanTime.
   231  	if _, err := f.tableFeed.Pop(ctx, scanTime); err != nil {
   232  		return err
   233  	}
   234  
   235  	if !isInitialScan && f.schemaChangePolicy == changefeedbase.OptSchemaChangePolicyNoBackfill {
   236  		return nil
   237  	}
   238  
   239  	if err := f.scanner.Scan(ctx, f.sink, physicalConfig{
   240  		Spans:     f.spans,
   241  		Timestamp: scanTime,
   242  		WithDiff:  !isInitialScan && f.withDiff,
   243  	}); err != nil {
   244  		return err
   245  	}
   246  
   247  	// NB: We don't update the highwater even though we've technically seen all
   248  	// events for all spans at the previous highwater.Next(). We choose not to
   249  	// because doing so would be wrong once we only backfill some tables.
   250  	return nil
   251  }
   252  
   253  func (f *kvFeed) runUntilTableEvent(
   254  	ctx context.Context, startFrom hlc.Timestamp,
   255  ) (resolvedUpTo hlc.Timestamp, err error) {
   256  	// Determine whether to request the previous value of each update from
   257  	// RangeFeed based on whether the `diff` option is specified.
   258  	if _, err := f.tableFeed.Peek(ctx, startFrom); err != nil {
   259  		return hlc.Timestamp{}, err
   260  	}
   261  
   262  	memBuf := f.bufferFactory()
   263  	defer memBuf.Close(ctx)
   264  
   265  	g := ctxgroup.WithContext(ctx)
   266  	physicalCfg := physicalConfig{Spans: f.spans, Timestamp: startFrom, WithDiff: f.withDiff}
   267  	g.GoCtx(func(ctx context.Context) error {
   268  		return copyFromSourceToSinkUntilTableEvent(ctx, f.sink, memBuf, physicalCfg, f.tableFeed)
   269  	})
   270  	g.GoCtx(func(ctx context.Context) error {
   271  		return f.physicalFeed.Run(ctx, memBuf, physicalCfg)
   272  	})
   273  
   274  	// TODO(mrtracy): We are currently tearing down the entire rangefeed set in
   275  	// order to perform a scan; however, given that we have an intermediate
   276  	// buffer, its seems that we could do this without having to destroy and
   277  	// recreate the rangefeeds.
   278  	err = g.Wait()
   279  	if err == nil {
   280  		log.Fatalf(ctx, "feed exited with no error and no scan boundary")
   281  		return hlc.Timestamp{}, nil // unreachable
   282  	} else if tErr := (*errBoundaryReached)(nil); errors.As(err, &tErr) {
   283  		// TODO(ajwerner): iterate the spans and add a Resolved timestamp.
   284  		// We'll need to do this to ensure that a resolved timestamp propagates
   285  		// when we're trying to exit.
   286  		return tErr.Timestamp().Prev(), nil
   287  	} else {
   288  		return hlc.Timestamp{}, err
   289  	}
   290  }
   291  
   292  type errBoundaryReached struct {
   293  	schemafeed.TableEvent
   294  }
   295  
   296  func (e *errBoundaryReached) Error() string {
   297  	return "scan boundary reached: " + e.String()
   298  }
   299  
   300  // copyFromSourceToSinkUntilTableEvents will pull read entries from source and
   301  // publish them to sink if there is no table event from the schemaFeed. If a
   302  // tableEvent occurs then the function will return once all of the spans have
   303  // been resolved up to the event. The first such event will be returned as
   304  // *errBoundaryReached. A nil error will never be returned.
   305  func copyFromSourceToSinkUntilTableEvent(
   306  	ctx context.Context,
   307  	sink EventBufferWriter,
   308  	source EventBufferReader,
   309  	cfg physicalConfig,
   310  	tables schemaFeed,
   311  ) error {
   312  	// Maintain a local spanfrontier to tell when all the component rangefeeds
   313  	// being watched have reached the Scan boundary.
   314  	frontier := span.MakeFrontier(cfg.Spans...)
   315  	for _, span := range cfg.Spans {
   316  		frontier.Forward(span, cfg.Timestamp)
   317  	}
   318  	var (
   319  		scanBoundary         *errBoundaryReached
   320  		checkForScanBoundary = func(ts hlc.Timestamp) error {
   321  			if scanBoundary != nil {
   322  				return nil
   323  			}
   324  			nextEvents, err := tables.Peek(ctx, ts)
   325  			if err != nil {
   326  				return err
   327  			}
   328  			if len(nextEvents) > 0 {
   329  				scanBoundary = &errBoundaryReached{nextEvents[0]}
   330  			}
   331  			return nil
   332  		}
   333  		applyScanBoundary = func(e Event) (skipEvent, reachedBoundary bool) {
   334  			if scanBoundary == nil {
   335  				return false, false
   336  			}
   337  			if e.Timestamp().Less(scanBoundary.Timestamp()) {
   338  				return false, false
   339  			}
   340  			switch e.Type() {
   341  			case KVEvent:
   342  				return true, false
   343  			case ResolvedEvent:
   344  				boundaryResolvedTimestamp := scanBoundary.Timestamp().Prev()
   345  				resolved := e.Resolved()
   346  				if resolved.Timestamp.LessEq(boundaryResolvedTimestamp) {
   347  					return false, false
   348  				}
   349  				frontier.Forward(resolved.Span, boundaryResolvedTimestamp)
   350  				return true, frontier.Frontier() == boundaryResolvedTimestamp
   351  			default:
   352  				log.Fatal(ctx, "unknown event type")
   353  				return false, false
   354  			}
   355  		}
   356  		addEntry = func(e Event) error {
   357  			switch e.Type() {
   358  			case KVEvent:
   359  				return sink.AddKV(ctx, e.KV(), e.PrevValue(), e.BackfillTimestamp())
   360  			case ResolvedEvent:
   361  				// TODO(ajwerner): technically this doesn't need to happen for most
   362  				// events - we just need to make sure we forward for events which are
   363  				// at scanBoundary.Prev(). We may not yet know about that scanBoundary.
   364  				// The logic currently doesn't make this clean.
   365  				resolved := e.Resolved()
   366  				frontier.Forward(resolved.Span, resolved.Timestamp)
   367  				return sink.AddResolved(ctx, resolved.Span, resolved.Timestamp, false)
   368  			default:
   369  				log.Fatal(ctx, "unknown event type")
   370  				return nil
   371  			}
   372  		}
   373  	)
   374  	for {
   375  		e, err := source.Get(ctx)
   376  		if err != nil {
   377  			return err
   378  		}
   379  		if err := checkForScanBoundary(e.Timestamp()); err != nil {
   380  			return err
   381  		}
   382  		skipEntry, scanBoundaryReached := applyScanBoundary(e)
   383  		if scanBoundaryReached {
   384  			// All component rangefeeds are now at the boundary.
   385  			// Break out of the ctxgroup by returning the sentinel error.
   386  			return scanBoundary
   387  		}
   388  		if skipEntry {
   389  			continue
   390  		}
   391  		if err := addEntry(e); err != nil {
   392  			return err
   393  		}
   394  	}
   395  }
   396  
   397  func makeTablefeedConfig(cfg Config) schemafeed.Config {
   398  	return schemafeed.Config{
   399  		DB:                 cfg.DB,
   400  		Clock:              cfg.Clock,
   401  		Settings:           cfg.Settings,
   402  		Targets:            cfg.Targets,
   403  		LeaseManager:       cfg.LeaseMgr,
   404  		SchemaChangeEvents: cfg.SchemaChangeEvents,
   405  		InitialHighWater:   cfg.InitialHighWater,
   406  	}
   407  }