github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/closedts/closedts.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // Package closedts houses the interfaces and basic definitions used by the 12 // various components of the closed timestamp subsystems. 13 // 14 // The following diagram illustrates how these components fit together. In 15 // running operation, the components are grouped in a container.Container 16 // (intended as a pass-around per-instance Singleton). 17 // Replicas proposing commands talk to the Tracker; replicas trying to serve 18 // follower reads talk to the Provider, which receives closed timestamp updates 19 // for the local node and its peers. 20 // 21 // Node 1 | Node 2 22 // | 23 // +---------+ Close +-----------+ | +-----------+ 24 // | Tracker |<--------| | | | | 25 // +-----+---+ | +-------+ | | | +-------+ | CanServe 26 // ^ | |Storage| | | | |Storage| |<---------+ 27 // | | --------+ | | | +-------+ | | 28 // |Track | | | | | +----+----+ 29 // | | Provider | | | Provider | | Follower| 30 // | +-----------+ | +-----------+ | Replica | 31 // | ^ ^ +----+----+ 32 // | |Subscribe |Notify | 33 // | | | | 34 // +---------+ | Request | | 35 // |Proposing| Refresh +---+----+ <------ +---+-----+ Request | 36 // | Replica |<--------| Server | | Clients |<----------+ 37 // +---------+ +--------+ ------> +---------+ EnsureClient 38 // CT 39 package closedts 40 41 import ( 42 "context" 43 44 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/ctpb" 45 "github.com/cockroachdb/cockroach/pkg/roachpb" 46 "github.com/cockroachdb/cockroach/pkg/util/hlc" 47 ) 48 49 // ReleaseFunc is a closure returned from Track which is used to record the 50 // LeaseAppliedIndex (LAI) given to a tracked proposal. The supplied epoch must 51 // match that of the lease under which the proposal was proposed. 52 type ReleaseFunc func(context.Context, ctpb.Epoch, roachpb.RangeID, ctpb.LAI) 53 54 // TrackerI is part of the machinery enabling follower reads, that is, consistent 55 // reads served by replicas not holding the lease (for the requested timestamp). 56 // This data structure keeps tabs on ongoing command evaluations (which it 57 // forces to successively higher timestamps) and provides closed timestamp 58 // updates along with a map delta of minimum Lease Applied Indexes a replica 59 // wishing to serve a follower read must reach in order to do so correctly. 60 // 61 // See https://github.com/cockroachdb/cockroach/pull/26362 for more information. 62 // 63 // The methods exposed on Tracker are safe for concurrent use. 64 type TrackerI interface { 65 Close(next hlc.Timestamp, expCurEpoch ctpb.Epoch) (hlc.Timestamp, map[roachpb.RangeID]ctpb.LAI, bool) 66 Track(ctx context.Context) (hlc.Timestamp, ReleaseFunc) 67 } 68 69 // A Storage holds the closed timestamps and associated MLAIs for each node. It 70 // additionally provides historical information about past state that it 71 // "compacts" regularly, and which can be introspected via the VisitAscending 72 // method. 73 // 74 // The data in a Storage is ephemeral, i.e. is lost during process restarts. 75 // Introducing a persistent storage will require some design work to make 76 // sure a) that the records in the storage are certifiably up to date (they 77 // won't be naturally, unless we add a synchronous write to each proposal) 78 // and b) that the proposal at each MLAI has actually been proposed. It's 79 // unlikely that we'll ever find it useful to introduce persistence here 80 // (though we want to persist historical information for recovery after 81 // permanent loss of quorum, but there we only need some consistent on- 82 // disk state; we don't need to bootstrap it into a new consistent state 83 // that can be updated incrementally). 84 type Storage interface { 85 // VisitAscending visits the historical states contained within the Storage 86 // in ascending closed timestamp order. Each state (Entry) is full, i.e. 87 // non-incremental. The iteration stops when all states have been visited 88 // or the visitor returns true. 89 VisitAscending(roachpb.NodeID, func(ctpb.Entry) (done bool)) 90 // VisitDescending visits the historical states contained within the Storage 91 // in descending closed timestamp order. Each state (Entry) is full, i.e. 92 // non-incremental. The iteration stops when all states have been visited 93 // or the visitor returns true. 94 VisitDescending(roachpb.NodeID, func(ctpb.Entry) (done bool)) 95 // Add merges the given Entry into the state for the given NodeID. The first 96 // Entry passed in for any given Entry.Epoch must have Entry.Full set. 97 Add(roachpb.NodeID, ctpb.Entry) 98 // Clear removes all closed timestamp information from the Storage. It can 99 // be used to simulate the loss of information caused by a process restart. 100 Clear() 101 } 102 103 // A Notifyee is a sink for closed timestamp updates. 104 type Notifyee interface { 105 // Notify returns a channel into which updates are written. 106 // 107 // In practice, the Notifyee will be a Provider. 108 Notify(roachpb.NodeID) chan<- ctpb.Entry 109 } 110 111 // A Producer is a source of closed timestamp updates about the local node. 112 type Producer interface { 113 // The Subscribe method blocks and, until the context cancels, writes a 114 // stream of updates to the provided channel the aggregate of which is 115 // guaranteed to represent a valid (i.e. gapless) state. 116 Subscribe(context.Context, chan<- ctpb.Entry) 117 } 118 119 // Provider is the central coordinator in the closed timestamp subsystem and the 120 // gatekeeper for the closed timestamp state for both local and remote nodes, 121 // which it handles in a symmetric fashion. It has the following tasks: 122 // 123 // 1. it accepts subscriptions for closed timestamp updates sourced from the 124 // local node. Upon accepting a subscription, the subscriber first receives 125 // the aggregate closed timestamp snapshot of the local node and then periodic 126 // updates. 127 // 2. it periodically closes out timestamps on the local node and passes the 128 // resulting entries to all of its subscribers. 129 // 3. it accepts notifications from other nodes, passing these updates through 130 // to its local storage, so that 131 // 4. the CanServe method determines via the the underlying storage whether a 132 // given read can be satisfied via follower reads. 133 // 5. the MaxClosed method determines via the underlying storage what the maximum 134 // closed timestamp is for the specified LAI. 135 // TODO(tschottdorf): This is already adding some cruft to this nice interface. 136 // CanServe and MaxClosed are almost identical. 137 // 138 // Note that a Provider has no duty to immediately persist the local closed 139 // timestamps to the underlying storage. 140 type Provider interface { 141 Producer 142 Notifyee 143 Start() 144 MaxClosed(roachpb.NodeID, roachpb.RangeID, ctpb.Epoch, ctpb.LAI) hlc.Timestamp 145 } 146 147 // A ClientRegistry is the client component of the follower reads subsystem. It 148 // contacts other nodes and requests a continuous stream of closed timestamp 149 // updates which it relays to the Provider. 150 type ClientRegistry interface { 151 // Request asynchronously notifies the given node that an update should be 152 // emitted for the given range. 153 Request(roachpb.NodeID, roachpb.RangeID) 154 // EnsureClient instructs the registry to (asynchronously) request a stream 155 // of closed timestamp updates from the given node. 156 EnsureClient(roachpb.NodeID) 157 } 158 159 // CloseFn is periodically called by Producers to close out new timestamps. 160 // Outside of tests, it corresponds to (*Tracker).Close; see there for a 161 // detailed description of the semantics. The final returned boolean indicates 162 // whether tracked epoch matched the expCurEpoch and that returned information 163 // may be used. 164 type CloseFn func(next hlc.Timestamp, expCurEpoch ctpb.Epoch) (hlc.Timestamp, map[roachpb.RangeID]ctpb.LAI, bool) 165 166 // AsCloseFn uses the TrackerI as a CloseFn. 167 func AsCloseFn(t TrackerI) CloseFn { 168 return func(next hlc.Timestamp, expCurEpoch ctpb.Epoch) (hlc.Timestamp, map[roachpb.RangeID]ctpb.LAI, bool) { 169 return t.Close(next, expCurEpoch) 170 } 171 } 172 173 // LiveClockFn supplies a current HLC timestamp from the local node with the 174 // extra constraints that the local node is live for the returned timestamp at 175 // the given epoch. The NodeID is passed in to make this method easier to define 176 // before the NodeID is known. 177 type LiveClockFn func(roachpb.NodeID) (liveNow hlc.Timestamp, liveEpoch ctpb.Epoch, _ error) 178 179 // RefreshFn is called by the Producer when it is asked to manually create (and 180 // emit) an update for a number of its replicas. The closed timestamp subsystem 181 // intentionally knows as little about the outside world as possible, and this 182 // function, injected from the outside, provides the minimal glue. Its job is 183 // to register a proposal for the current lease applied indexes of the replicas 184 // with the Tracker, so that updates for them are emitted soon thereafter. 185 type RefreshFn func(...roachpb.RangeID) 186 187 // A Dialer opens closed timestamp connections to receive updates from remote 188 // nodes. 189 type Dialer interface { 190 Dial(context.Context, roachpb.NodeID) (ctpb.Client, error) 191 Ready(roachpb.NodeID) bool // if false, Dial is likely to fail 192 }