github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/apiserver/facades/agent/presence/pinger.go (about)

     1  // Copyright 2016 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package presence
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/clock"
    11  	"github.com/juju/errors"
    12  	"github.com/juju/loggo"
    13  	"gopkg.in/juju/names.v2"
    14  	"gopkg.in/juju/worker.v1"
    15  	"gopkg.in/juju/worker.v1/catacomb"
    16  )
    17  
    18  // Pinger exposes some methods implemented by state/presence.Pinger.
    19  type Pinger interface {
    20  	// Stop kills the pinger, then waits for it to exit.
    21  	Stop() error
    22  	// Wait waits for the pinger to stop.
    23  	Wait() error
    24  }
    25  
    26  // Config contains the information necessary to drive a Worker.
    27  type Config struct {
    28  
    29  	// Identity records the entity whose connectedness is being
    30  	// affirmed by this worker. It's used to create a logger that
    31  	// can let us see which agent's pinger is actually failing.
    32  	Identity names.Tag
    33  
    34  	// Start starts a new, running Pinger or returns an error.
    35  	Start func() (Pinger, error)
    36  
    37  	// Clock is used to throttle failed Start attempts.
    38  	Clock clock.Clock
    39  
    40  	// RetryDelay controls by how much we throttle failed Start
    41  	// attempts. Note that we only apply the delay when a Start
    42  	// fails; if a Pinger ran, however briefly, we'll try to restart
    43  	// it immediately, so as to minimise the changes of erroneously
    44  	// causing agent-lost to be reported.
    45  	RetryDelay time.Duration
    46  }
    47  
    48  // Validate returns an error if Config cannot be expected to drive a
    49  // Worker.
    50  func (config Config) Validate() error {
    51  	if config.Identity == nil {
    52  		return errors.NotValidf("nil Identity")
    53  	}
    54  	if config.Start == nil {
    55  		return errors.NotValidf("nil Start")
    56  	}
    57  	if config.Clock == nil {
    58  		return errors.NotValidf("nil Clock")
    59  	}
    60  	if config.RetryDelay <= 0 {
    61  		return errors.NotValidf("non-positive RetryDelay")
    62  	}
    63  	return nil
    64  }
    65  
    66  // New returns a Worker backed by Config. The caller is responsible for
    67  // Kill()ing the Worker and handling any errors returned from Wait();
    68  // but as it happens it's designed to be an apiserver/common.Resource,
    69  // and never to exit unless Kill()ed, so in practice Stop(), which will
    70  // call Kill() and Wait() internally, is Good Enough.
    71  func New(config Config) (*Worker, error) {
    72  	if err := config.Validate(); err != nil {
    73  		return nil, errors.Trace(err)
    74  	}
    75  	name := fmt.Sprintf("juju.apiserver.presence.%s", config.Identity)
    76  	w := &Worker{
    77  		config:  config,
    78  		logger:  loggo.GetLogger(name),
    79  		running: make(chan struct{}),
    80  	}
    81  	err := catacomb.Invoke(catacomb.Plan{
    82  		Site: &w.catacomb,
    83  		Work: w.loop,
    84  	})
    85  	if err != nil {
    86  		return nil, errors.Trace(err)
    87  	}
    88  
    89  	// To support unhappy assumptions in apiserver/server_test.go,
    90  	// we block New until at least one attempt to start a Pinger
    91  	// has been made. This preserves the apparent behaviour of an
    92  	// unwrapped Pinger under normal conditions.
    93  	select {
    94  	case <-w.catacomb.Dying():
    95  		if err := w.Wait(); err != nil {
    96  			return nil, errors.Trace(err)
    97  		}
    98  		return nil, errors.New("worker stopped abnormally without reporting an error")
    99  	case <-w.running:
   100  		return w, nil
   101  	}
   102  }
   103  
   104  // Worker creates a Pinger as configured, and recreates it as it fails
   105  // until the Worker is stopped; at which point it shuts down any extant
   106  // Pinger before returning.
   107  type Worker struct {
   108  	catacomb catacomb.Catacomb
   109  	config   Config
   110  	logger   loggo.Logger
   111  	running  chan struct{}
   112  }
   113  
   114  // Kill is part of the worker.Worker interface.
   115  func (w *Worker) Kill() {
   116  	w.catacomb.Kill(nil)
   117  }
   118  
   119  // Wait is part of the worker.Worker interface.
   120  func (w *Worker) Wait() error {
   121  	return w.catacomb.Wait()
   122  }
   123  
   124  // Stop is part of the apiserver/common.Resource interface.
   125  //
   126  // It's not a very good idea -- see comments on lp:1572237 -- but we're
   127  // only addressing the proximate cause of the issue here.
   128  func (w *Worker) Stop() error {
   129  	return worker.Stop(w)
   130  }
   131  
   132  // loop runs Pingers until w is stopped.
   133  func (w *Worker) loop() error {
   134  	var delay time.Duration
   135  	for {
   136  		select {
   137  		case <-w.catacomb.Dying():
   138  			return w.catacomb.ErrDying()
   139  		case <-w.config.Clock.After(delay):
   140  			maybePinger := w.maybeStartPinger()
   141  			w.reportRunning()
   142  			w.waitPinger(maybePinger)
   143  		}
   144  		delay = w.config.RetryDelay
   145  	}
   146  }
   147  
   148  // maybeStartPinger starts and returns a new Pinger; or, if it
   149  // encounters an error, logs it and returns nil.
   150  func (w *Worker) maybeStartPinger() Pinger {
   151  	w.logger.Tracef("starting pinger...")
   152  	pinger, err := w.config.Start()
   153  	if err != nil {
   154  		w.logger.Errorf("cannot start pinger: %v", err)
   155  		return nil
   156  	}
   157  	w.logger.Tracef("pinger started")
   158  	return pinger
   159  }
   160  
   161  // reportRunning is a foul hack designed to delay apparent worker start
   162  // until at least one ping has been delivered (or attempted). It only
   163  // exists to make various distant tests, which should ideally not be
   164  // depending on these implementation details, reliable.
   165  func (w *Worker) reportRunning() {
   166  	select {
   167  	case <-w.running:
   168  	default:
   169  		close(w.running)
   170  	}
   171  }
   172  
   173  // waitPinger waits for the death of either the pinger or the worker;
   174  // stops the pinger if necessary; and returns once the pinger is
   175  // finished. If pinger is nil, it returns immediately.
   176  func (w *Worker) waitPinger(pinger Pinger) {
   177  	if pinger == nil {
   178  		return
   179  	}
   180  
   181  	// Set up a channel that will last as long as this method call.
   182  	done := make(chan struct{})
   183  	defer close(done)
   184  
   185  	// Start a goroutine to stop the Pinger if the worker is killed.
   186  	// If the enclosing method completes, we know that the Pinger
   187  	// has already stopped, and we can return immediately.
   188  	//
   189  	// Note that we ignore errors out of Stop(), depending on the
   190  	// Pinger to manage errors properly and report them via Wait()
   191  	// below.
   192  	go func() {
   193  		select {
   194  		case <-done:
   195  		case <-w.catacomb.Dying():
   196  			w.logger.Tracef("stopping pinger")
   197  			pinger.Stop()
   198  		}
   199  	}()
   200  
   201  	// Now, just wait for the Pinger to stop. It might be caused by
   202  	// the Worker's death, or it might have failed on its own; in
   203  	// any case, errors are worth recording, but we don't need to
   204  	// respond in any way because that's loop()'s responsibility.
   205  	w.logger.Tracef("waiting for pinger...")
   206  	if err := pinger.Wait(); err != nil {
   207  		w.logger.Errorf("pinger failed: %v", err)
   208  	}
   209  }