github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvnemesis/watcher.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvnemesis 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/kv" 18 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/storage" 21 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/span" 25 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 26 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 27 "github.com/cockroachdb/errors" 28 ) 29 30 // ClosedTimestampTargetInterval allows for setting the closed timestamp target 31 // interval. 32 type ClosedTimestampTargetInterval interface { 33 Set(context.Context, time.Duration) error 34 ResetToDefault(context.Context) error 35 } 36 37 // Watcher slurps all changes that happen to some span of kvs using RangeFeed. 38 type Watcher struct { 39 ct ClosedTimestampTargetInterval 40 mu struct { 41 syncutil.Mutex 42 kvs *Engine 43 frontier *span.Frontier 44 frontierWaiters map[hlc.Timestamp][]chan error 45 } 46 cancel func() 47 g ctxgroup.Group 48 } 49 50 // Watch starts a new Watcher over the given span of kvs. See Watcher. 51 func Watch( 52 ctx context.Context, dbs []*kv.DB, ct ClosedTimestampTargetInterval, dataSpan roachpb.Span, 53 ) (*Watcher, error) { 54 if len(dbs) < 1 { 55 return nil, errors.New(`at least one db must be given`) 56 } 57 firstDB := dbs[0] 58 59 w := &Watcher{ 60 ct: ct, 61 } 62 var err error 63 if w.mu.kvs, err = MakeEngine(); err != nil { 64 return nil, err 65 } 66 w.mu.frontier = span.MakeFrontier(dataSpan) 67 w.mu.frontierWaiters = make(map[hlc.Timestamp][]chan error) 68 ctx, w.cancel = context.WithCancel(ctx) 69 w.g = ctxgroup.WithContext(ctx) 70 71 dss := make([]*kvcoord.DistSender, len(dbs)) 72 for i := range dbs { 73 sender := dbs[i].NonTransactionalSender() 74 dss[i] = sender.(*kv.CrossRangeTxnWrapperSender).Wrapped().(*kvcoord.DistSender) 75 } 76 77 startTs := firstDB.Clock().Now() 78 eventC := make(chan *roachpb.RangeFeedEvent, 128) 79 w.g.GoCtx(func(ctx context.Context) error { 80 ts := startTs 81 for i := 0; ; i = (i + 1) % len(dbs) { 82 w.mu.Lock() 83 ts.Forward(w.mu.frontier.Frontier()) 84 w.mu.Unlock() 85 86 ds := dss[i] 87 err := ds.RangeFeed(ctx, dataSpan, ts, true /* withDiff */, eventC) 88 if isRetryableRangeFeedErr(err) { 89 log.Infof(ctx, "got retryable RangeFeed error: %+v", err) 90 continue 91 } 92 return err 93 } 94 }) 95 w.g.GoCtx(func(ctx context.Context) error { 96 return w.processEvents(ctx, eventC) 97 }) 98 99 // Make sure the RangeFeed has started up, else we might lose some events. 100 if err := w.WaitForFrontier(ctx, startTs); err != nil { 101 _ = w.Finish() 102 return nil, err 103 } 104 105 return w, nil 106 } 107 108 func isRetryableRangeFeedErr(err error) bool { 109 switch { 110 case errors.Is(err, context.Canceled): 111 return false 112 default: 113 return true 114 } 115 } 116 117 // Finish tears down the Watcher and returns all the kvs it has ingested. It may 118 // be called multiple times, though not concurrently. 119 func (w *Watcher) Finish() *Engine { 120 if w.cancel == nil { 121 // Finish was already called. 122 return w.mu.kvs 123 } 124 w.cancel() 125 w.cancel = nil 126 // Only WaitForFrontier cares about errors. 127 _ = w.g.Wait() 128 return w.mu.kvs 129 } 130 131 // WaitForFrontier blocks until all kv changes <= the given timestamp are 132 // guaranteed to have been ingested. 133 func (w *Watcher) WaitForFrontier(ctx context.Context, ts hlc.Timestamp) (retErr error) { 134 log.Infof(ctx, `watcher waiting for %s`, ts) 135 if err := w.ct.Set(ctx, 1*time.Millisecond); err != nil { 136 return err 137 } 138 defer func() { 139 if err := w.ct.ResetToDefault(ctx); err != nil { 140 retErr = errors.WithSecondaryError(retErr, err) 141 } 142 }() 143 resultCh := make(chan error, 1) 144 w.mu.Lock() 145 w.mu.frontierWaiters[ts] = append(w.mu.frontierWaiters[ts], resultCh) 146 w.mu.Unlock() 147 select { 148 case <-ctx.Done(): 149 return ctx.Err() 150 case err := <-resultCh: 151 return err 152 } 153 } 154 155 func (w *Watcher) processEvents(ctx context.Context, eventC chan *roachpb.RangeFeedEvent) error { 156 for { 157 select { 158 case <-ctx.Done(): 159 return nil 160 case event := <-eventC: 161 switch e := event.GetValue().(type) { 162 case *roachpb.RangeFeedError: 163 return e.Error.GoError() 164 case *roachpb.RangeFeedValue: 165 log.Infof(ctx, `rangefeed Put %s %s -> %s (prev %s)`, 166 e.Key, e.Value.Timestamp, e.Value.PrettyPrint(), e.PrevValue.PrettyPrint()) 167 w.mu.Lock() 168 // TODO(dan): If the exact key+ts is put into kvs more than once, the 169 // Engine will keep the last. This matches our txn semantics (if a key 170 // is written in a transaction more than once, only the last is kept) 171 // but it means that we'll won't catch it if we violate those semantics. 172 // Consider first doing a Get and somehow failing if this exact key+ts 173 // has previously been put with a different value. 174 w.mu.kvs.Put(storage.MVCCKey{Key: e.Key, Timestamp: e.Value.Timestamp}, e.Value.RawBytes) 175 prevTs := e.Value.Timestamp.Prev() 176 prevValue := w.mu.kvs.Get(e.Key, prevTs) 177 178 // RangeFeed doesn't send the timestamps of the previous values back 179 // because changefeeds don't need them. It would likely be easy to 180 // implement, but would add unnecessary allocations in changefeeds, 181 // which don't need them. This means we'd want to make it an option in 182 // the request, which seems silly to do for only this test. 183 prevValue.Timestamp = hlc.Timestamp{} 184 prevValueMismatch := !prevValue.Equal(e.PrevValue) 185 var engineContents string 186 if prevValueMismatch { 187 engineContents = w.mu.kvs.DebugPrint(" ") 188 } 189 w.mu.Unlock() 190 191 if prevValueMismatch { 192 log.Infof(ctx, "rangefeed mismatch\n%s", engineContents) 193 panic(errors.Errorf( 194 `expected (%s, %s) previous value %s got: %s`, e.Key, prevTs, prevValue, e.PrevValue)) 195 } 196 case *roachpb.RangeFeedCheckpoint: 197 w.mu.Lock() 198 if w.mu.frontier.Forward(e.Span, e.ResolvedTS) { 199 frontier := w.mu.frontier.Frontier() 200 log.Infof(ctx, `watcher reached frontier %s lagging by %s`, 201 frontier, timeutil.Now().Sub(frontier.GoTime())) 202 for ts, chs := range w.mu.frontierWaiters { 203 if frontier.Less(ts) { 204 continue 205 } 206 log.Infof(ctx, `watcher notifying %s`, ts) 207 delete(w.mu.frontierWaiters, ts) 208 for _, ch := range chs { 209 ch <- nil 210 } 211 } 212 } 213 w.mu.Unlock() 214 } 215 } 216 } 217 }