github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvnemesis/watcher.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvnemesis
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/kv"
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/storage"
    21  	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/span"
    25  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    26  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    27  	"github.com/cockroachdb/errors"
    28  )
    29  
    30  // ClosedTimestampTargetInterval allows for setting the closed timestamp target
    31  // interval.
    32  type ClosedTimestampTargetInterval interface {
    33  	Set(context.Context, time.Duration) error
    34  	ResetToDefault(context.Context) error
    35  }
    36  
    37  // Watcher slurps all changes that happen to some span of kvs using RangeFeed.
    38  type Watcher struct {
    39  	ct ClosedTimestampTargetInterval
    40  	mu struct {
    41  		syncutil.Mutex
    42  		kvs             *Engine
    43  		frontier        *span.Frontier
    44  		frontierWaiters map[hlc.Timestamp][]chan error
    45  	}
    46  	cancel func()
    47  	g      ctxgroup.Group
    48  }
    49  
    50  // Watch starts a new Watcher over the given span of kvs. See Watcher.
    51  func Watch(
    52  	ctx context.Context, dbs []*kv.DB, ct ClosedTimestampTargetInterval, dataSpan roachpb.Span,
    53  ) (*Watcher, error) {
    54  	if len(dbs) < 1 {
    55  		return nil, errors.New(`at least one db must be given`)
    56  	}
    57  	firstDB := dbs[0]
    58  
    59  	w := &Watcher{
    60  		ct: ct,
    61  	}
    62  	var err error
    63  	if w.mu.kvs, err = MakeEngine(); err != nil {
    64  		return nil, err
    65  	}
    66  	w.mu.frontier = span.MakeFrontier(dataSpan)
    67  	w.mu.frontierWaiters = make(map[hlc.Timestamp][]chan error)
    68  	ctx, w.cancel = context.WithCancel(ctx)
    69  	w.g = ctxgroup.WithContext(ctx)
    70  
    71  	dss := make([]*kvcoord.DistSender, len(dbs))
    72  	for i := range dbs {
    73  		sender := dbs[i].NonTransactionalSender()
    74  		dss[i] = sender.(*kv.CrossRangeTxnWrapperSender).Wrapped().(*kvcoord.DistSender)
    75  	}
    76  
    77  	startTs := firstDB.Clock().Now()
    78  	eventC := make(chan *roachpb.RangeFeedEvent, 128)
    79  	w.g.GoCtx(func(ctx context.Context) error {
    80  		ts := startTs
    81  		for i := 0; ; i = (i + 1) % len(dbs) {
    82  			w.mu.Lock()
    83  			ts.Forward(w.mu.frontier.Frontier())
    84  			w.mu.Unlock()
    85  
    86  			ds := dss[i]
    87  			err := ds.RangeFeed(ctx, dataSpan, ts, true /* withDiff */, eventC)
    88  			if isRetryableRangeFeedErr(err) {
    89  				log.Infof(ctx, "got retryable RangeFeed error: %+v", err)
    90  				continue
    91  			}
    92  			return err
    93  		}
    94  	})
    95  	w.g.GoCtx(func(ctx context.Context) error {
    96  		return w.processEvents(ctx, eventC)
    97  	})
    98  
    99  	// Make sure the RangeFeed has started up, else we might lose some events.
   100  	if err := w.WaitForFrontier(ctx, startTs); err != nil {
   101  		_ = w.Finish()
   102  		return nil, err
   103  	}
   104  
   105  	return w, nil
   106  }
   107  
   108  func isRetryableRangeFeedErr(err error) bool {
   109  	switch {
   110  	case errors.Is(err, context.Canceled):
   111  		return false
   112  	default:
   113  		return true
   114  	}
   115  }
   116  
   117  // Finish tears down the Watcher and returns all the kvs it has ingested. It may
   118  // be called multiple times, though not concurrently.
   119  func (w *Watcher) Finish() *Engine {
   120  	if w.cancel == nil {
   121  		// Finish was already called.
   122  		return w.mu.kvs
   123  	}
   124  	w.cancel()
   125  	w.cancel = nil
   126  	// Only WaitForFrontier cares about errors.
   127  	_ = w.g.Wait()
   128  	return w.mu.kvs
   129  }
   130  
   131  // WaitForFrontier blocks until all kv changes <= the given timestamp are
   132  // guaranteed to have been ingested.
   133  func (w *Watcher) WaitForFrontier(ctx context.Context, ts hlc.Timestamp) (retErr error) {
   134  	log.Infof(ctx, `watcher waiting for %s`, ts)
   135  	if err := w.ct.Set(ctx, 1*time.Millisecond); err != nil {
   136  		return err
   137  	}
   138  	defer func() {
   139  		if err := w.ct.ResetToDefault(ctx); err != nil {
   140  			retErr = errors.WithSecondaryError(retErr, err)
   141  		}
   142  	}()
   143  	resultCh := make(chan error, 1)
   144  	w.mu.Lock()
   145  	w.mu.frontierWaiters[ts] = append(w.mu.frontierWaiters[ts], resultCh)
   146  	w.mu.Unlock()
   147  	select {
   148  	case <-ctx.Done():
   149  		return ctx.Err()
   150  	case err := <-resultCh:
   151  		return err
   152  	}
   153  }
   154  
   155  func (w *Watcher) processEvents(ctx context.Context, eventC chan *roachpb.RangeFeedEvent) error {
   156  	for {
   157  		select {
   158  		case <-ctx.Done():
   159  			return nil
   160  		case event := <-eventC:
   161  			switch e := event.GetValue().(type) {
   162  			case *roachpb.RangeFeedError:
   163  				return e.Error.GoError()
   164  			case *roachpb.RangeFeedValue:
   165  				log.Infof(ctx, `rangefeed Put %s %s -> %s (prev %s)`,
   166  					e.Key, e.Value.Timestamp, e.Value.PrettyPrint(), e.PrevValue.PrettyPrint())
   167  				w.mu.Lock()
   168  				// TODO(dan): If the exact key+ts is put into kvs more than once, the
   169  				// Engine will keep the last. This matches our txn semantics (if a key
   170  				// is written in a transaction more than once, only the last is kept)
   171  				// but it means that we'll won't catch it if we violate those semantics.
   172  				// Consider first doing a Get and somehow failing if this exact key+ts
   173  				// has previously been put with a different value.
   174  				w.mu.kvs.Put(storage.MVCCKey{Key: e.Key, Timestamp: e.Value.Timestamp}, e.Value.RawBytes)
   175  				prevTs := e.Value.Timestamp.Prev()
   176  				prevValue := w.mu.kvs.Get(e.Key, prevTs)
   177  
   178  				// RangeFeed doesn't send the timestamps of the previous values back
   179  				// because changefeeds don't need them. It would likely be easy to
   180  				// implement, but would add unnecessary allocations in changefeeds,
   181  				// which don't need them. This means we'd want to make it an option in
   182  				// the request, which seems silly to do for only this test.
   183  				prevValue.Timestamp = hlc.Timestamp{}
   184  				prevValueMismatch := !prevValue.Equal(e.PrevValue)
   185  				var engineContents string
   186  				if prevValueMismatch {
   187  					engineContents = w.mu.kvs.DebugPrint("  ")
   188  				}
   189  				w.mu.Unlock()
   190  
   191  				if prevValueMismatch {
   192  					log.Infof(ctx, "rangefeed mismatch\n%s", engineContents)
   193  					panic(errors.Errorf(
   194  						`expected (%s, %s) previous value %s got: %s`, e.Key, prevTs, prevValue, e.PrevValue))
   195  				}
   196  			case *roachpb.RangeFeedCheckpoint:
   197  				w.mu.Lock()
   198  				if w.mu.frontier.Forward(e.Span, e.ResolvedTS) {
   199  					frontier := w.mu.frontier.Frontier()
   200  					log.Infof(ctx, `watcher reached frontier %s lagging by %s`,
   201  						frontier, timeutil.Now().Sub(frontier.GoTime()))
   202  					for ts, chs := range w.mu.frontierWaiters {
   203  						if frontier.Less(ts) {
   204  							continue
   205  						}
   206  						log.Infof(ctx, `watcher notifying %s`, ts)
   207  						delete(w.mu.frontierWaiters, ts)
   208  						for _, ch := range chs {
   209  							ch <- nil
   210  						}
   211  					}
   212  				}
   213  				w.mu.Unlock()
   214  			}
   215  		}
   216  	}
   217  }