github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/apiserver/presence/pinger.go (about)

     1  // Copyright 2016 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package presence
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/juju/worker"
    12  	"github.com/juju/juju/worker/catacomb"
    13  	"github.com/juju/loggo"
    14  	"github.com/juju/names"
    15  	"github.com/juju/utils/clock"
    16  )
    17  
    18  // Pinger exposes some methods implemented by state/presence.Pinger.
    19  type Pinger interface {
    20  	// Stop kills the pinger, then waits for it to exit.
    21  	Stop() error
    22  	// Wait waits for the pinger to stop.
    23  	Wait() error
    24  }
    25  
    26  // Config contains the information necessary to drive a Worker.
    27  type Config struct {
    28  
    29  	// Identity records the entity whose connectedness is being
    30  	// affirmed by this worker. It's used to create a logger that
    31  	// can let us see which agent's pinger is actually failing.
    32  	Identity names.Tag
    33  
    34  	// Start starts a new, running Pinger or returns an error.
    35  	Start func() (Pinger, error)
    36  
    37  	// Clock is used to throttle failed Start attempts.
    38  	Clock clock.Clock
    39  
    40  	// RetryDelay controls by how much we throttle failed Start
    41  	// attempts. Note that we only apply the delay when a Start
    42  	// fails; if a Pinger ran, however briefly, we'll try to restart
    43  	// it immediately, so as to minimise the changes of erroneously
    44  	// causing agent-lost to be reported.
    45  	RetryDelay time.Duration
    46  }
    47  
    48  // Validate returns an error if Config cannot be expected to drive a
    49  // Worker.
    50  func (config Config) Validate() error {
    51  	if config.Identity == nil {
    52  		return errors.NotValidf("nil Identity")
    53  	}
    54  	if config.Start == nil {
    55  		return errors.NotValidf("nil Start")
    56  	}
    57  	if config.Clock == nil {
    58  		return errors.NotValidf("nil Clock")
    59  	}
    60  	if config.RetryDelay <= 0 {
    61  		return errors.NotValidf("non-positive RetryDelay")
    62  	}
    63  	return nil
    64  }
    65  
    66  // New returns a Worker backed by Config. The caller is responsible for
    67  // Kill()ing the Worker and handling any errors returned from Wait();
    68  // but as it happens it's designed to be an apiserver/common.Resource,
    69  // and never to exit unless Kill()ed, so in practice Stop(), which will
    70  // call Kill() and Wait() internally, is Good Enough.
    71  func New(config Config) (*Worker, error) {
    72  	if err := config.Validate(); err != nil {
    73  		return nil, errors.Trace(err)
    74  	}
    75  	name := fmt.Sprintf("juju.apiserver.presence.%s", config.Identity)
    76  	w := &Worker{
    77  		config: config,
    78  		logger: loggo.GetLogger(name),
    79  	}
    80  	ready := make(chan struct{})
    81  	err := catacomb.Invoke(catacomb.Plan{
    82  		Site: &w.catacomb,
    83  		Work: func() error {
    84  			// Run once to prime presence before diving into the loop.
    85  			pinger := w.startPinger()
    86  			if ready != nil {
    87  				close(ready)
    88  				ready = nil
    89  			}
    90  			if pinger != nil {
    91  				w.waitOnPinger(pinger)
    92  			}
    93  			return w.loop()
    94  		},
    95  	})
    96  	if err != nil {
    97  		return nil, errors.Trace(err)
    98  	}
    99  	<-ready
   100  	return w, nil
   101  }
   102  
   103  // Worker creates a Pinger as configured, and recreates it as it fails
   104  // until the Worker is stopped; at which point it shuts down any extant
   105  // Pinger before returning.
   106  type Worker struct {
   107  	catacomb catacomb.Catacomb
   108  	config   Config
   109  	logger   loggo.Logger
   110  }
   111  
   112  // Kill is part of the worker.Worker interface.
   113  func (w *Worker) Kill() {
   114  	w.catacomb.Kill(nil)
   115  }
   116  
   117  // Wait is part of the worker.Worker interface.
   118  func (w *Worker) Wait() error {
   119  	return w.catacomb.Wait()
   120  }
   121  
   122  // Stop is part of the apiserver/common.Resource interface.
   123  //
   124  // It's not a very good idea -- see comments on lp:1572237 -- but we're
   125  // only addressing the proximate cause of the issue here.
   126  func (w *Worker) Stop() error {
   127  	return worker.Stop(w)
   128  }
   129  
   130  // loop runs Pingers until w is stopped.
   131  func (w *Worker) loop() error {
   132  	var delay time.Duration
   133  	clock := w.config.Clock
   134  	for {
   135  		select {
   136  		case <-w.catacomb.Dying():
   137  			return w.catacomb.ErrDying()
   138  		case <-clock.After(delay):
   139  			delay = 0
   140  			pinger := w.startPinger()
   141  			if pinger == nil {
   142  				// Failed to start.
   143  				delay = w.config.RetryDelay
   144  				continue
   145  			}
   146  			w.waitOnPinger(pinger)
   147  		}
   148  	}
   149  }
   150  
   151  // startPinger starts a single Pinger. It returns nil if the pinger
   152  // could not be started.
   153  func (w *Worker) startPinger() Pinger {
   154  	w.logger.Debugf("starting pinger...")
   155  	pinger, err := w.config.Start()
   156  	if err != nil {
   157  		w.logger.Errorf("pinger failed to start: %v", err)
   158  		return nil
   159  	}
   160  	w.logger.Debugf("pinger started")
   161  	return pinger
   162  }
   163  
   164  // waitOnPinger waits indefinitely for the given Pinger to complete,
   165  // stopping it only when the Worker is Kill()ed.
   166  func (w *Worker) waitOnPinger(pinger Pinger) {
   167  	// Start a goroutine that waits for the Worker to be stopped,
   168  	// and then stops the Pinger.  Note also that we ignore errors
   169  	// out of Stop(): they will be caught by the Pinger anyway, and
   170  	// we'll see them come out of Wait() below.
   171  	go func() {
   172  		<-w.catacomb.Dying()
   173  		pinger.Stop()
   174  	}()
   175  
   176  	// Now, just wait for the Pinger to stop. It might be caused by
   177  	// the Worker's death, or it might have failed on its own; in
   178  	// any case, errors are worth recording, but we don't need to
   179  	// respond in any way because that's loop()'s responsibility.
   180  	if err := pinger.Wait(); err != nil {
   181  		w.logger.Errorf("pinger failed: %v", err)
   182  	}
   183  }