github.com/letsencrypt/trillian@v1.1.2-0.20180615153820-ae375a99d36a/util/election/runner.go (about)

     1  // Copyright 2017 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package election
    16  
    17  import (
    18  	"context"
    19  	"math/rand"
    20  	"time"
    21  
    22  	"github.com/golang/glog"
    23  	"github.com/google/trillian/util"
    24  )
    25  
    26  // Minimum values for configuration intervals.
    27  const (
    28  	MinPreElectionPause    = 10 * time.Millisecond
    29  	MinMasterCheckInterval = 50 * time.Millisecond
    30  	MinMasterHoldInterval  = 10 * time.Second
    31  )
    32  
    33  // RunnerConfig describes the parameters for an election Runner.
    34  type RunnerConfig struct {
    35  	// PreElectionPause is the maximum interval to wait before starting a
    36  	// mastership election for a particular log.
    37  	PreElectionPause time.Duration
    38  	// MasterCheckInterval is the interval between checks that we still
    39  	// hold mastership for a log.
    40  	MasterCheckInterval time.Duration
    41  	// MasterHoldInterval is the minimum interval to hold mastership for.
    42  	MasterHoldInterval time.Duration
    43  	// ResignOdds gives the chance of resigning mastership after each
    44  	// check interval, as the N for 1-in-N.
    45  	ResignOdds int
    46  
    47  	TimeSource util.TimeSource
    48  }
    49  
    50  // fixupRunnerConfig ensures operation parameters have required minimum values.
    51  func fixupRunnerConfig(cfg *RunnerConfig) {
    52  	if cfg.PreElectionPause < MinPreElectionPause {
    53  		cfg.PreElectionPause = MinPreElectionPause
    54  	}
    55  	if cfg.MasterCheckInterval < MinMasterCheckInterval {
    56  		cfg.MasterCheckInterval = MinMasterCheckInterval
    57  	}
    58  	if cfg.MasterHoldInterval < MinMasterHoldInterval {
    59  		cfg.MasterHoldInterval = MinMasterHoldInterval
    60  	}
    61  	if cfg.ResignOdds < 1 {
    62  		cfg.ResignOdds = 1
    63  	}
    64  	if cfg.TimeSource == nil {
    65  		cfg.TimeSource = util.SystemTimeSource{}
    66  	}
    67  }
    68  
    69  // Runner controls a continuous election process.
    70  type Runner struct {
    71  	// Allow the user to store a Cancel function with the runner for convenience.
    72  	Cancel   context.CancelFunc
    73  	id       int64
    74  	cfg      *RunnerConfig
    75  	tracker  *MasterTracker
    76  	election MasterElection
    77  }
    78  
    79  // NewRunner builds a new election Runner instance with the given configuration.  On calling
    80  // Run(), the provided election will be continuously monitored and mastership changes will
    81  // be notified to the provided MasterTracker instance.
    82  func NewRunner(id int64, cfg *RunnerConfig, tracker *MasterTracker, cancel context.CancelFunc, el MasterElection) *Runner {
    83  	fixupRunnerConfig(cfg)
    84  	return &Runner{
    85  		Cancel:   cancel,
    86  		id:       id,
    87  		cfg:      cfg,
    88  		tracker:  tracker,
    89  		election: el,
    90  	}
    91  }
    92  
    93  // Run performs a continuous election process. It runs continuously until the
    94  // context is canceled or an internal error is encountered.
    95  func (er *Runner) Run(ctx context.Context, pending chan<- Resignation) {
    96  	// Pause for a random interval so that if multiple instances start at the same
    97  	// time there is less of a thundering herd.
    98  	pause := rand.Int63n(er.cfg.PreElectionPause.Nanoseconds())
    99  	if err := util.SleepContext(ctx, time.Duration(pause)); err != nil {
   100  		return
   101  	}
   102  
   103  	glog.V(1).Infof("%d: start election-monitoring loop ", er.id)
   104  	if err := er.election.Start(ctx); err != nil {
   105  		glog.Errorf("%d: election.Start() failed: %v", er.id, err)
   106  		return
   107  	}
   108  	defer func(ctx context.Context, er *Runner) {
   109  		glog.Infof("%d: shutdown election-monitoring loop", er.id)
   110  		er.election.Close(ctx)
   111  	}(ctx, er)
   112  
   113  	for {
   114  		glog.V(1).Infof("%d: When I left you, I was but the learner", er.id)
   115  		if err := er.election.WaitForMastership(ctx); err != nil {
   116  			glog.Errorf("%d: er.election.WaitForMastership() failed: %v", er.id, err)
   117  			return
   118  		}
   119  		glog.V(1).Infof("%d: Now, I am the master", er.id)
   120  		er.tracker.Set(er.id, true)
   121  		masterSince := er.cfg.TimeSource.Now()
   122  
   123  		// While-master loop
   124  		for {
   125  			if err := util.SleepContext(ctx, er.cfg.MasterCheckInterval); err != nil {
   126  				glog.Infof("%d: termination requested", er.id)
   127  				return
   128  			}
   129  			master, err := er.election.IsMaster(ctx)
   130  			if err != nil {
   131  				glog.Errorf("%d: failed to check mastership status", er.id)
   132  				break
   133  			}
   134  			if !master {
   135  				glog.Errorf("%d: no longer the master!", er.id)
   136  				er.tracker.Set(er.id, false)
   137  				break
   138  			}
   139  			if er.ShouldResign(masterSince) {
   140  				glog.Infof("%d: queue up resignation of mastership", er.id)
   141  				er.tracker.Set(er.id, false)
   142  
   143  				done := make(chan bool)
   144  				r := Resignation{ID: er.id, er: er, done: done}
   145  				pending <- r
   146  				<-done // block until acted on
   147  				break  // no longer master
   148  			}
   149  		}
   150  	}
   151  }
   152  
   153  // ShouldResign randomly decides whether this runner should resign mastership.
   154  func (er *Runner) ShouldResign(masterSince time.Time) bool {
   155  	now := er.cfg.TimeSource.Now()
   156  	duration := now.Sub(masterSince)
   157  	if duration < er.cfg.MasterHoldInterval {
   158  		// Always hold onto mastership for a minimum interval to prevent churn.
   159  		return false
   160  	}
   161  	// Roll the bones.
   162  	odds := er.cfg.ResignOdds
   163  	if odds <= 0 {
   164  		return true
   165  	}
   166  	return rand.Intn(er.cfg.ResignOdds) == 0
   167  }
   168  
   169  // Resignation indicates that a master should explicitly resign mastership, by invoking
   170  // the Execute() method at a point where no master-related activity is ongoing.
   171  type Resignation struct {
   172  	ID   int64
   173  	er   *Runner
   174  	done chan<- bool
   175  }
   176  
   177  // Execute performs the pending deliberate resignation for an election runner.
   178  func (r *Resignation) Execute(ctx context.Context) {
   179  	glog.Infof("%d: deliberately resigning mastership", r.er.id)
   180  	if err := r.er.election.Resign(ctx); err != nil {
   181  		glog.Errorf("%d: failed to resign mastership: %v", r.er.id, err)
   182  	}
   183  	if err := r.er.election.Start(ctx); err != nil {
   184  		glog.Errorf("%d: failed to restart election: %v", r.er.id, err)
   185  	}
   186  	r.done <- true
   187  }