github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/closedts/provider/provider.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package provider
    12  
    13  import (
    14  	"context"
    15  	"math"
    16  	"sync"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/ctpb"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    23  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    24  	"github.com/cockroachdb/cockroach/pkg/util/log"
    25  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    26  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    27  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    28  	"github.com/cockroachdb/logtags"
    29  )
    30  
    31  // Config holds the information necessary to create a Provider.
    32  type Config struct {
    33  	// NodeID is the ID of the node on which the Provider is housed.
    34  	NodeID   roachpb.NodeID
    35  	Settings *cluster.Settings
    36  	Stopper  *stop.Stopper
    37  	Storage  closedts.Storage
    38  	Clock    closedts.LiveClockFn
    39  	Close    closedts.CloseFn
    40  }
    41  
    42  type subscriber struct {
    43  	ch    chan<- ctpb.Entry
    44  	queue []ctpb.Entry
    45  }
    46  
    47  // Provider implements closedts.Provider. It orchestrates the flow of closed
    48  // timestamps and lets callers check whether they can serve reads.
    49  type Provider struct {
    50  	cfg *Config
    51  
    52  	mu struct {
    53  		syncutil.RWMutex
    54  		*sync.Cond // on RWMutex.RLocker()
    55  		// The current subscribers. The goroutine associated to each
    56  		// subscriber uses the RLock to mutate its slot. Thus, when
    57  		// accessing this slice for any other reason, the write lock
    58  		// needs to be acquired.
    59  		subscribers []*subscriber
    60  		draining    bool // tell subscribers to terminate
    61  	}
    62  
    63  	everyClockLog log.EveryN
    64  }
    65  
    66  var _ closedts.Provider = (*Provider)(nil)
    67  
    68  // NewProvider initializes a Provider, that has yet to be started.
    69  func NewProvider(cfg *Config) *Provider {
    70  	p := &Provider{
    71  		cfg:           cfg,
    72  		everyClockLog: log.Every(time.Minute),
    73  	}
    74  	p.mu.Cond = sync.NewCond(p.mu.RLocker())
    75  	return p
    76  }
    77  
    78  // Start implements closedts.Provider.
    79  //
    80  // TODO(tschottdorf): the closer functionality could be extracted into its own
    81  // component, which would make the interfaces a little cleaner. Decide whether
    82  // it's worth it during testing.
    83  func (p *Provider) Start() {
    84  	p.cfg.Stopper.RunWorker(logtags.AddTag(context.Background(), "ct-closer", nil), p.runCloser)
    85  }
    86  
    87  func (p *Provider) drain() {
    88  	p.mu.Lock()
    89  	p.mu.draining = true
    90  	p.mu.Unlock()
    91  	for {
    92  		p.mu.Broadcast()
    93  		p.mu.Lock()
    94  		done := true
    95  		for _, sub := range p.mu.subscribers {
    96  			done = done && sub == nil
    97  		}
    98  		p.mu.Unlock()
    99  
   100  		if done {
   101  			return
   102  		}
   103  	}
   104  }
   105  
   106  func (p *Provider) runCloser(ctx context.Context) {
   107  	// The loop below signals the subscribers, so when it exits it needs to do
   108  	// extra work to help the subscribers terminate.
   109  	defer p.drain()
   110  
   111  	if p.cfg.NodeID == 0 {
   112  		// This Provider is likely misconfigured.
   113  		panic("can't use NodeID zero")
   114  	}
   115  	ch := p.Notify(p.cfg.NodeID)
   116  	defer close(ch)
   117  
   118  	confCh := make(chan struct{}, 1)
   119  	confChanged := func() {
   120  		select {
   121  		case confCh <- struct{}{}:
   122  		default:
   123  		}
   124  	}
   125  	closedts.TargetDuration.SetOnChange(&p.cfg.Settings.SV, confChanged)
   126  	// Track whether we've ever been live to avoid logging warnings about not
   127  	// being live during node startup.
   128  	var everBeenLive bool
   129  	var t timeutil.Timer
   130  	defer t.Stop()
   131  	for {
   132  		closeFraction := closedts.CloseFraction.Get(&p.cfg.Settings.SV)
   133  		targetDuration := float64(closedts.TargetDuration.Get(&p.cfg.Settings.SV))
   134  		if targetDuration > 0 {
   135  			t.Reset(time.Duration(closeFraction * targetDuration))
   136  		} else {
   137  			t.Stop() // disable closing when the target duration is non-positive
   138  		}
   139  		select {
   140  		case <-p.cfg.Stopper.ShouldQuiesce():
   141  			return
   142  		case <-ctx.Done():
   143  			return
   144  		case <-t.C:
   145  			t.Read = true
   146  		case <-confCh:
   147  			// Loop around to use the updated timer.
   148  			continue
   149  		}
   150  
   151  		next, liveAtEpoch, err := p.cfg.Clock(p.cfg.NodeID)
   152  		next.WallTime -= int64(targetDuration)
   153  		if err != nil {
   154  			if everBeenLive && p.everyClockLog.ShouldLog() {
   155  				log.Warningf(ctx, "unable to move closed timestamp forward: %+v", err)
   156  			}
   157  			// Broadcast even if nothing new was queued, so that the subscribers
   158  			// loop to check their client's context.
   159  			p.mu.Broadcast()
   160  		} else {
   161  			everBeenLive = true
   162  			// Close may fail if the data being closed does not correspond to the
   163  			// current liveAtEpoch.
   164  			closed, m, ok := p.cfg.Close(next, liveAtEpoch)
   165  			if !ok {
   166  				if log.V(1) {
   167  					log.Infof(ctx, "failed to close %v due to liveness epoch mismatch at %v",
   168  						next, liveAtEpoch)
   169  				}
   170  				continue
   171  			}
   172  			if log.V(1) {
   173  				log.Infof(ctx, "closed ts=%s with %+v, next closed timestamp should be %s",
   174  					closed, m, next)
   175  			}
   176  			entry := ctpb.Entry{
   177  				Epoch:           liveAtEpoch,
   178  				ClosedTimestamp: closed,
   179  				MLAI:            m,
   180  			}
   181  
   182  			// Simulate a subscription to the local node, so that the new information
   183  			// is added to the storage (and thus becomes available to future subscribers
   184  			// as well, not only to existing ones). The other end of the chan will Broadcast().
   185  			//
   186  			// TODO(tschottdorf): the transport should ignore connection requests from
   187  			// the node to itself. Those connections would pointlessly loop this around
   188  			// once more.
   189  			ch <- entry
   190  		}
   191  	}
   192  }
   193  
   194  // Notify implements closedts.Notifyee. It passes the incoming stream of Entries
   195  // to the local Storage.
   196  func (p *Provider) Notify(nodeID roachpb.NodeID) chan<- ctpb.Entry {
   197  	ch := make(chan ctpb.Entry)
   198  
   199  	p.cfg.Stopper.RunWorker(context.Background(), func(ctx context.Context) {
   200  		handle := func(entry ctpb.Entry) {
   201  			p.cfg.Storage.Add(nodeID, entry)
   202  		}
   203  		// Special-case data about the origin node, which folks can subscribe to.
   204  		// This is easily generalized to also allow subscriptions for data that
   205  		// originated on other nodes, but this doesn't seem necessary right now.
   206  		if nodeID == p.cfg.NodeID {
   207  			handle = func(entry ctpb.Entry) {
   208  				// Add to the Storage first.
   209  				p.cfg.Storage.Add(nodeID, entry)
   210  				// Notify existing subscribers.
   211  				p.mu.Lock()
   212  				for _, sub := range p.mu.subscribers {
   213  					if sub == nil {
   214  						continue
   215  					}
   216  					sub.queue = append(sub.queue, entry)
   217  				}
   218  				p.mu.Unlock()
   219  				// Wake up all clients.
   220  				p.mu.Broadcast()
   221  			}
   222  		}
   223  		for entry := range ch {
   224  			handle(entry)
   225  		}
   226  	})
   227  
   228  	return ch
   229  }
   230  
   231  // Subscribe implements closedts.Producer. It produces a stream of Entries
   232  // pertaining to the local Node.
   233  //
   234  // TODO(tschottdorf): consider not forcing the caller to launch the goroutine.
   235  func (p *Provider) Subscribe(ctx context.Context, ch chan<- ctpb.Entry) {
   236  	var i int
   237  	sub := &subscriber{ch, nil}
   238  	p.mu.Lock()
   239  	for i = 0; i < len(p.mu.subscribers); i++ {
   240  		if p.mu.subscribers[i] == nil {
   241  			p.mu.subscribers[i] = sub
   242  			break
   243  		}
   244  	}
   245  	if i == len(p.mu.subscribers) {
   246  		p.mu.subscribers = append(p.mu.subscribers, sub)
   247  	}
   248  	draining := p.mu.draining
   249  	p.mu.Unlock()
   250  
   251  	defer func() {
   252  		p.mu.Lock()
   253  		p.mu.subscribers[i] = nil
   254  		p.mu.Unlock()
   255  		close(ch)
   256  	}()
   257  
   258  	if draining {
   259  		return
   260  	}
   261  
   262  	if log.V(1) {
   263  		log.Infof(ctx, "new subscriber (slot %d) connected", i)
   264  	}
   265  
   266  	// The subscription is already active, so any storage snapshot from now on is
   267  	// going to fully catch up the subscriber without a gap.
   268  	{
   269  		var entries []ctpb.Entry
   270  
   271  		p.cfg.Storage.VisitAscending(p.cfg.NodeID, func(entry ctpb.Entry) (done bool) {
   272  			// Don't block in this method.
   273  			entries = append(entries, entry)
   274  			return false // not done
   275  		})
   276  
   277  		for _, entry := range entries {
   278  			select {
   279  			case ch <- entry:
   280  			case <-p.cfg.Stopper.ShouldQuiesce():
   281  				return
   282  			case <-ctx.Done():
   283  				return
   284  			}
   285  		}
   286  	}
   287  
   288  	for {
   289  		p.mu.RLock()
   290  		var done bool
   291  		for len(p.mu.subscribers[i].queue) == 0 {
   292  			if ctx.Err() != nil || p.mu.draining {
   293  				done = true
   294  				break
   295  			}
   296  			p.mu.Wait()
   297  		}
   298  		var queue []ctpb.Entry
   299  		// When only readers are around (as they are now), we can actually
   300  		// mutate our slot because that's all the others do as well.
   301  		queue, p.mu.subscribers[i].queue = p.mu.subscribers[i].queue, nil
   302  		p.mu.RUnlock()
   303  
   304  		if done {
   305  			return
   306  		}
   307  
   308  		shouldLog := log.V(1)
   309  		var n int
   310  		minMLAI := ctpb.LAI(math.MaxInt64)
   311  		var minRangeID, maxRangeID roachpb.RangeID
   312  		var maxMLAI ctpb.LAI
   313  
   314  		for _, entry := range queue {
   315  			if shouldLog {
   316  				n += len(entry.MLAI)
   317  				for rangeID, mlai := range entry.MLAI {
   318  					if mlai < minMLAI {
   319  						minMLAI = mlai
   320  						minRangeID = rangeID
   321  					}
   322  					if mlai > maxMLAI {
   323  						maxMLAI = mlai
   324  						maxRangeID = rangeID
   325  					}
   326  				}
   327  			}
   328  
   329  			select {
   330  			case ch <- entry:
   331  			case <-p.cfg.Stopper.ShouldQuiesce():
   332  				return
   333  			case <-ctx.Done():
   334  				return
   335  			}
   336  		}
   337  		if shouldLog {
   338  			log.Infof(ctx, "sent %d closed timestamp entries to client %d (%d range updates total, min/max mlai: %d@r%d / %d@r%d)", len(queue), i, n, minMLAI, minRangeID, maxMLAI, maxRangeID)
   339  		}
   340  	}
   341  }
   342  
   343  // MaxClosed implements closedts.Provider.
   344  func (p *Provider) MaxClosed(
   345  	nodeID roachpb.NodeID, rangeID roachpb.RangeID, epoch ctpb.Epoch, lai ctpb.LAI,
   346  ) hlc.Timestamp {
   347  	var maxTS hlc.Timestamp
   348  	p.cfg.Storage.VisitDescending(nodeID, func(entry ctpb.Entry) (done bool) {
   349  		if mlai, found := entry.MLAI[rangeID]; found {
   350  			if entry.Epoch == epoch && mlai <= lai {
   351  				maxTS = entry.ClosedTimestamp
   352  				return true
   353  			}
   354  		}
   355  		return false
   356  	})
   357  
   358  	return maxTS
   359  }