github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/apiserver/presence/pinger.go (about) 1 // Copyright 2016 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package presence 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/juju/worker" 12 "github.com/juju/juju/worker/catacomb" 13 "github.com/juju/loggo" 14 "github.com/juju/names" 15 "github.com/juju/utils/clock" 16 ) 17 18 // Pinger exposes some methods implemented by state/presence.Pinger. 19 type Pinger interface { 20 // Stop kills the pinger, then waits for it to exit. 21 Stop() error 22 // Wait waits for the pinger to stop. 23 Wait() error 24 } 25 26 // Config contains the information necessary to drive a Worker. 27 type Config struct { 28 29 // Identity records the entity whose connectedness is being 30 // affirmed by this worker. It's used to create a logger that 31 // can let us see which agent's pinger is actually failing. 32 Identity names.Tag 33 34 // Start starts a new, running Pinger or returns an error. 35 Start func() (Pinger, error) 36 37 // Clock is used to throttle failed Start attempts. 38 Clock clock.Clock 39 40 // RetryDelay controls by how much we throttle failed Start 41 // attempts. Note that we only apply the delay when a Start 42 // fails; if a Pinger ran, however briefly, we'll try to restart 43 // it immediately, so as to minimise the changes of erroneously 44 // causing agent-lost to be reported. 45 RetryDelay time.Duration 46 } 47 48 // Validate returns an error if Config cannot be expected to drive a 49 // Worker. 50 func (config Config) Validate() error { 51 if config.Identity == nil { 52 return errors.NotValidf("nil Identity") 53 } 54 if config.Start == nil { 55 return errors.NotValidf("nil Start") 56 } 57 if config.Clock == nil { 58 return errors.NotValidf("nil Clock") 59 } 60 if config.RetryDelay <= 0 { 61 return errors.NotValidf("non-positive RetryDelay") 62 } 63 return nil 64 } 65 66 // New returns a Worker backed by Config. The caller is responsible for 67 // Kill()ing the Worker and handling any errors returned from Wait(); 68 // but as it happens it's designed to be an apiserver/common.Resource, 69 // and never to exit unless Kill()ed, so in practice Stop(), which will 70 // call Kill() and Wait() internally, is Good Enough. 71 func New(config Config) (*Worker, error) { 72 if err := config.Validate(); err != nil { 73 return nil, errors.Trace(err) 74 } 75 name := fmt.Sprintf("juju.apiserver.presence.%s", config.Identity) 76 w := &Worker{ 77 config: config, 78 logger: loggo.GetLogger(name), 79 } 80 ready := make(chan struct{}) 81 err := catacomb.Invoke(catacomb.Plan{ 82 Site: &w.catacomb, 83 Work: func() error { 84 // Run once to prime presence before diving into the loop. 85 pinger := w.startPinger() 86 if ready != nil { 87 close(ready) 88 ready = nil 89 } 90 if pinger != nil { 91 w.waitOnPinger(pinger) 92 } 93 return w.loop() 94 }, 95 }) 96 if err != nil { 97 return nil, errors.Trace(err) 98 } 99 <-ready 100 return w, nil 101 } 102 103 // Worker creates a Pinger as configured, and recreates it as it fails 104 // until the Worker is stopped; at which point it shuts down any extant 105 // Pinger before returning. 106 type Worker struct { 107 catacomb catacomb.Catacomb 108 config Config 109 logger loggo.Logger 110 } 111 112 // Kill is part of the worker.Worker interface. 113 func (w *Worker) Kill() { 114 w.catacomb.Kill(nil) 115 } 116 117 // Wait is part of the worker.Worker interface. 118 func (w *Worker) Wait() error { 119 return w.catacomb.Wait() 120 } 121 122 // Stop is part of the apiserver/common.Resource interface. 123 // 124 // It's not a very good idea -- see comments on lp:1572237 -- but we're 125 // only addressing the proximate cause of the issue here. 126 func (w *Worker) Stop() error { 127 return worker.Stop(w) 128 } 129 130 // loop runs Pingers until w is stopped. 131 func (w *Worker) loop() error { 132 var delay time.Duration 133 clock := w.config.Clock 134 for { 135 select { 136 case <-w.catacomb.Dying(): 137 return w.catacomb.ErrDying() 138 case <-clock.After(delay): 139 delay = 0 140 pinger := w.startPinger() 141 if pinger == nil { 142 // Failed to start. 143 delay = w.config.RetryDelay 144 continue 145 } 146 w.waitOnPinger(pinger) 147 } 148 } 149 } 150 151 // startPinger starts a single Pinger. It returns nil if the pinger 152 // could not be started. 153 func (w *Worker) startPinger() Pinger { 154 w.logger.Debugf("starting pinger...") 155 pinger, err := w.config.Start() 156 if err != nil { 157 w.logger.Errorf("pinger failed to start: %v", err) 158 return nil 159 } 160 w.logger.Debugf("pinger started") 161 return pinger 162 } 163 164 // waitOnPinger waits indefinitely for the given Pinger to complete, 165 // stopping it only when the Worker is Kill()ed. 166 func (w *Worker) waitOnPinger(pinger Pinger) { 167 // Start a goroutine that waits for the Worker to be stopped, 168 // and then stops the Pinger. Note also that we ignore errors 169 // out of Stop(): they will be caught by the Pinger anyway, and 170 // we'll see them come out of Wait() below. 171 go func() { 172 <-w.catacomb.Dying() 173 pinger.Stop() 174 }() 175 176 // Now, just wait for the Pinger to stop. It might be caused by 177 // the Worker's death, or it might have failed on its own; in 178 // any case, errors are worth recording, but we don't need to 179 // respond in any way because that's loop()'s responsibility. 180 if err := pinger.Wait(); err != nil { 181 w.logger.Errorf("pinger failed: %v", err) 182 } 183 }