github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/closedts/closedts.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // Package closedts houses the interfaces and basic definitions used by the
    12  // various components of the closed timestamp subsystems.
    13  //
    14  // The following diagram illustrates how these components fit together. In
    15  // running operation, the components are grouped in a container.Container
    16  // (intended as a pass-around per-instance Singleton).
    17  // Replicas proposing commands talk to the Tracker; replicas trying to serve
    18  // follower reads talk to the Provider, which receives closed timestamp updates
    19  // for the local node and its peers.
    20  //
    21  //                             Node 1 | Node 2
    22  //                                    |
    23  // +---------+  Close  +-----------+  |  +-----------+
    24  // | Tracker |<--------|           |  |  |           |
    25  // +-----+---+         | +-------+ |  |  | +-------+ |  CanServe
    26  //       ^             | |Storage| |  |  | |Storage| |<---------+
    27  //       |             | --------+ |  |  | +-------+ |          |
    28  //       |Track        |           |  |  |           |     +----+----+
    29  //       |             | Provider  |  |  | Provider  |     | Follower|
    30  //       |             +-----------+  |  +-----------+     | Replica |
    31  //       |                 ^                  ^            +----+----+
    32  //       |                 |Subscribe         |Notify           |
    33  //       |                 |                  |                 |
    34  // +---------+             |      Request     |                 |
    35  // |Proposing| Refresh +---+----+ <------ +---+-----+  Request  |
    36  // | Replica |<--------| Server |         | Clients |<----------+
    37  // +---------+         +--------+ ------> +---------+  EnsureClient
    38  //                                  CT
    39  package closedts
    40  
    41  import (
    42  	"context"
    43  
    44  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/ctpb"
    45  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    46  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    47  )
    48  
    49  // ReleaseFunc is a closure returned from Track which is used to record the
    50  // LeaseAppliedIndex (LAI) given to a tracked proposal. The supplied epoch must
    51  // match that of the lease under which the proposal was proposed.
    52  type ReleaseFunc func(context.Context, ctpb.Epoch, roachpb.RangeID, ctpb.LAI)
    53  
    54  // TrackerI is part of the machinery enabling follower reads, that is, consistent
    55  // reads served by replicas not holding the lease (for the requested timestamp).
    56  // This data structure keeps tabs on ongoing command evaluations (which it
    57  // forces to successively higher timestamps) and provides closed timestamp
    58  // updates along with a map delta of minimum Lease Applied Indexes a replica
    59  // wishing to serve a follower read must reach in order to do so correctly.
    60  //
    61  // See https://github.com/cockroachdb/cockroach/pull/26362 for more information.
    62  //
    63  // The methods exposed on Tracker are safe for concurrent use.
    64  type TrackerI interface {
    65  	Close(next hlc.Timestamp, expCurEpoch ctpb.Epoch) (hlc.Timestamp, map[roachpb.RangeID]ctpb.LAI, bool)
    66  	Track(ctx context.Context) (hlc.Timestamp, ReleaseFunc)
    67  }
    68  
    69  // A Storage holds the closed timestamps and associated MLAIs for each node. It
    70  // additionally provides historical information about past state that it
    71  // "compacts" regularly, and which can be introspected via the VisitAscending
    72  // method.
    73  //
    74  // The data in a Storage is ephemeral, i.e. is lost during process restarts.
    75  // Introducing a persistent storage will require some design work to make
    76  // sure a) that the records in the storage are certifiably up to date (they
    77  // won't be naturally, unless we add a synchronous write to each proposal)
    78  // and b) that the proposal at each MLAI has actually been proposed. It's
    79  // unlikely that we'll ever find it useful to introduce persistence here
    80  // (though we want to persist historical information for recovery after
    81  // permanent loss of quorum, but there we only need some consistent on-
    82  // disk state; we don't need to bootstrap it into a new consistent state
    83  // that can be updated incrementally).
    84  type Storage interface {
    85  	// VisitAscending visits the historical states contained within the Storage
    86  	// in ascending closed timestamp order. Each state (Entry) is full, i.e.
    87  	// non-incremental. The iteration stops when all states have been visited
    88  	// or the visitor returns true.
    89  	VisitAscending(roachpb.NodeID, func(ctpb.Entry) (done bool))
    90  	// VisitDescending visits the historical states contained within the Storage
    91  	// in descending closed timestamp order. Each state (Entry) is full, i.e.
    92  	// non-incremental. The iteration stops when all states have been visited
    93  	// or the visitor returns true.
    94  	VisitDescending(roachpb.NodeID, func(ctpb.Entry) (done bool))
    95  	// Add merges the given Entry into the state for the given NodeID. The first
    96  	// Entry passed in for any given Entry.Epoch must have Entry.Full set.
    97  	Add(roachpb.NodeID, ctpb.Entry)
    98  	// Clear removes all closed timestamp information from the Storage. It can
    99  	// be used to simulate the loss of information caused by a process restart.
   100  	Clear()
   101  }
   102  
   103  // A Notifyee is a sink for closed timestamp updates.
   104  type Notifyee interface {
   105  	// Notify returns a channel into which updates are written.
   106  	//
   107  	// In practice, the Notifyee will be a Provider.
   108  	Notify(roachpb.NodeID) chan<- ctpb.Entry
   109  }
   110  
   111  // A Producer is a source of closed timestamp updates about the local node.
   112  type Producer interface {
   113  	// The Subscribe method blocks and, until the context cancels, writes a
   114  	// stream of updates to the provided channel the aggregate of which is
   115  	// guaranteed to represent a valid (i.e. gapless) state.
   116  	Subscribe(context.Context, chan<- ctpb.Entry)
   117  }
   118  
   119  // Provider is the central coordinator in the closed timestamp subsystem and the
   120  // gatekeeper for the closed timestamp state for both local and remote nodes,
   121  // which it handles in a symmetric fashion. It has the following tasks:
   122  //
   123  // 1. it accepts subscriptions for closed timestamp updates sourced from the
   124  //    local node. Upon accepting a subscription, the subscriber first receives
   125  //    the aggregate closed timestamp snapshot of the local node and then periodic
   126  //    updates.
   127  // 2. it periodically closes out timestamps on the local node and passes the
   128  //    resulting entries to all of its subscribers.
   129  // 3. it accepts notifications from other nodes, passing these updates through
   130  //    to its local storage, so that
   131  // 4. the CanServe method determines via the the underlying storage whether a
   132  //    given read can be satisfied via follower reads.
   133  // 5. the MaxClosed method determines via the underlying storage what the maximum
   134  //    closed timestamp is for the specified LAI.
   135  //    TODO(tschottdorf): This is already adding some cruft to this nice interface.
   136  //    CanServe and MaxClosed are almost identical.
   137  //
   138  // Note that a Provider has no duty to immediately persist the local closed
   139  // timestamps to the underlying storage.
   140  type Provider interface {
   141  	Producer
   142  	Notifyee
   143  	Start()
   144  	MaxClosed(roachpb.NodeID, roachpb.RangeID, ctpb.Epoch, ctpb.LAI) hlc.Timestamp
   145  }
   146  
   147  // A ClientRegistry is the client component of the follower reads subsystem. It
   148  // contacts other nodes and requests a continuous stream of closed timestamp
   149  // updates which it relays to the Provider.
   150  type ClientRegistry interface {
   151  	// Request asynchronously notifies the given node that an update should be
   152  	// emitted for the given range.
   153  	Request(roachpb.NodeID, roachpb.RangeID)
   154  	// EnsureClient instructs the registry to (asynchronously) request a stream
   155  	// of closed timestamp updates from the given node.
   156  	EnsureClient(roachpb.NodeID)
   157  }
   158  
   159  // CloseFn is periodically called by Producers to close out new timestamps.
   160  // Outside of tests, it corresponds to (*Tracker).Close; see there for a
   161  // detailed description of the semantics. The final returned boolean indicates
   162  // whether tracked epoch matched the expCurEpoch and that returned information
   163  // may be used.
   164  type CloseFn func(next hlc.Timestamp, expCurEpoch ctpb.Epoch) (hlc.Timestamp, map[roachpb.RangeID]ctpb.LAI, bool)
   165  
   166  // AsCloseFn uses the TrackerI as a CloseFn.
   167  func AsCloseFn(t TrackerI) CloseFn {
   168  	return func(next hlc.Timestamp, expCurEpoch ctpb.Epoch) (hlc.Timestamp, map[roachpb.RangeID]ctpb.LAI, bool) {
   169  		return t.Close(next, expCurEpoch)
   170  	}
   171  }
   172  
   173  // LiveClockFn supplies a current HLC timestamp from the local node with the
   174  // extra constraints that the local node is live for the returned timestamp at
   175  // the given epoch. The NodeID is passed in to make this method easier to define
   176  // before the NodeID is known.
   177  type LiveClockFn func(roachpb.NodeID) (liveNow hlc.Timestamp, liveEpoch ctpb.Epoch, _ error)
   178  
   179  // RefreshFn is called by the Producer when it is asked to manually create (and
   180  // emit) an update for a number of its replicas. The closed timestamp subsystem
   181  // intentionally knows as little about the outside world as possible, and this
   182  // function, injected from the outside, provides the minimal glue. Its job is
   183  // to register a proposal for the current lease applied indexes of the replicas
   184  // with the Tracker, so that updates for them are emitted soon thereafter.
   185  type RefreshFn func(...roachpb.RangeID)
   186  
   187  // A Dialer opens closed timestamp connections to receive updates from remote
   188  // nodes.
   189  type Dialer interface {
   190  	Dial(context.Context, roachpb.NodeID) (ctpb.Client, error)
   191  	Ready(roachpb.NodeID) bool // if false, Dial is likely to fail
   192  }