github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/worker/catacomb/catacomb.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package catacomb 5 6 import ( 7 "fmt" 8 "sync" 9 "sync/atomic" 10 11 "github.com/juju/errors" 12 "gopkg.in/tomb.v1" 13 14 "github.com/juju/juju/worker" 15 ) 16 17 // Catacomb is a variant of tomb.Tomb with its own internal goroutine, designed 18 // for coordinating the lifetimes of private workers needed by a single parent. 19 // 20 // As a client, you should only ever create zero values; these should be used 21 // with Invoke to manage a parent task. No Catacomb methods are meaningful 22 // until the catacomb has been started with a successful Invoke. 23 // 24 // See the package documentation for more detailed discussion and usage notes. 25 type Catacomb struct { 26 tomb tomb.Tomb 27 wg sync.WaitGroup 28 adds chan worker.Worker 29 dirty int32 30 } 31 32 // Plan defines the strategy for an Invoke. 33 type Plan struct { 34 35 // Site must point to an unused Catacomb. 36 Site *Catacomb 37 38 // Work will be run on a new goroutine, and tracked by Site. 39 Work func() error 40 41 // Init contains additional workers for which Site must be responsible. 42 Init []worker.Worker 43 } 44 45 // Validate returns an error if the plan cannot be used. It doesn't check for 46 // reused catacombs: plan validity is necessary but not sufficient to determine 47 // that an Invoke will succeed. 48 func (plan Plan) Validate() error { 49 if plan.Site == nil { 50 return errors.NotValidf("nil Site") 51 } 52 if plan.Work == nil { 53 return errors.NotValidf("nil Work") 54 } 55 for i, w := range plan.Init { 56 if w == nil { 57 return errors.NotValidf("nil Init item %d", i) 58 } 59 } 60 return nil 61 } 62 63 // Invoke uses the plan's catacomb to run the work func. It will return an 64 // error if the plan is not valid, or if the catacomb has already been used. 65 // If Invoke returns no error, the catacomb is now controlling the work func, 66 // and its exported methods can be called safely. 67 // 68 // Invoke takes responsibility for all workers in plan.Init, *whether or not 69 // it succeeds*. 70 func Invoke(plan Plan) (err error) { 71 72 defer func() { 73 if err != nil { 74 stopWorkers(plan.Init) 75 } 76 }() 77 78 if err := plan.Validate(); err != nil { 79 return errors.Trace(err) 80 } 81 catacomb := plan.Site 82 if !atomic.CompareAndSwapInt32(&catacomb.dirty, 0, 1) { 83 return errors.Errorf("catacomb %p has already been used", catacomb) 84 } 85 catacomb.adds = make(chan worker.Worker) 86 87 // Add the Init workers right away, so the client can't induce data races 88 // by modifying the slice post-return. 89 for _, w := range plan.Init { 90 catacomb.add(w) 91 } 92 93 // This goroutine listens for added workers until the catacomb is Killed. 94 // We ensure the wg can't complete until we know no new workers will be 95 // added. 96 catacomb.wg.Add(1) 97 go func() { 98 defer catacomb.wg.Done() 99 for { 100 select { 101 case <-catacomb.tomb.Dying(): 102 return 103 case w := <-catacomb.adds: 104 catacomb.add(w) 105 } 106 } 107 }() 108 109 // This goroutine runs the work func and stops the catacomb with its error; 110 // and waits for for the listen goroutine and all added workers to complete 111 // before marking the catacomb's tomb Dead. 112 go func() { 113 defer catacomb.tomb.Done() 114 defer catacomb.wg.Wait() 115 catacomb.Kill(runSafely(plan.Work)) 116 }() 117 return nil 118 } 119 120 // stopWorkers stops all non-nil workers in the supplied slice, and swallows 121 // all errors. This is consistent, for now, because Catacomb swallows all 122 // errors but the first; as we come to rank or log errors, this must change 123 // to accommodate better practices. 124 func stopWorkers(workers []worker.Worker) { 125 for _, w := range workers { 126 if w != nil { 127 worker.Stop(w) 128 } 129 } 130 } 131 132 // Add causes the supplied worker's lifetime to be bound to the catacomb's, 133 // relieving the client of responsibility for Kill()ing it and Wait()ing for an 134 // error, *whether or not this method succeeds*. If the method returns an error, 135 // it always indicates that the catacomb is shutting down; the value will either 136 // be the error from the (now-stopped) worker, or catacomb.ErrDying(). 137 // 138 // If the worker completes without error, the catacomb will continue unaffected; 139 // otherwise the catacomb's tomb will be killed with the returned error. This 140 // allows clients to freely Kill() workers that have been Add()ed; any errors 141 // encountered will still kill the catacomb, so the workers stay under control 142 // until the last moment, and so can be managed pretty casually once they've 143 // been added. 144 // 145 // Don't try to add a worker to its own catacomb; that'll deadlock the shutdown 146 // procedure. I don't think there's much we can do about that. 147 func (catacomb *Catacomb) Add(w worker.Worker) error { 148 select { 149 case <-catacomb.tomb.Dying(): 150 if err := worker.Stop(w); err != nil { 151 return errors.Trace(err) 152 } 153 return catacomb.ErrDying() 154 case catacomb.adds <- w: 155 // Note that we don't need to wait for confirmation here. This depends 156 // on the catacomb.wg.Add() for the listen loop, which ensures the wg 157 // won't complete until no more adds can be received. 158 return nil 159 } 160 } 161 162 // add starts two goroutines that (1) kill the catacomb's tomb with any 163 // error encountered by the worker; and (2) kill the worker when the 164 // catacomb starts dying. 165 func (catacomb *Catacomb) add(w worker.Worker) { 166 // We must wait for _both_ goroutines to exit in 167 // arbitrary order depending on the order of the worker 168 // and the catacomb shutting down. 169 catacomb.wg.Add(2) 170 go func() { 171 defer catacomb.wg.Done() 172 if err := w.Wait(); err != nil { 173 catacomb.Kill(err) 174 } 175 }() 176 go func() { 177 defer catacomb.wg.Done() 178 <-catacomb.tomb.Dying() 179 worker.Stop(w) 180 }() 181 } 182 183 // Dying returns a channel that will be closed when Kill is called. 184 func (catacomb *Catacomb) Dying() <-chan struct{} { 185 return catacomb.tomb.Dying() 186 } 187 188 // Dead returns a channel that will be closed when Invoke has completed (and 189 // thus when subsequent calls to Wait() are known not to block). 190 func (catacomb *Catacomb) Dead() <-chan struct{} { 191 return catacomb.tomb.Dead() 192 } 193 194 // Wait blocks until Invoke completes, and returns the first non-nil and 195 // non-tomb.ErrDying error passed to Kill before Invoke finished. 196 func (catacomb *Catacomb) Wait() error { 197 return catacomb.tomb.Wait() 198 } 199 200 // Kill kills the Catacomb's internal tomb with the supplied error, or one 201 // derived from it. 202 // * if it's caused by this catacomb's ErrDying, it passes on tomb.ErrDying. 203 // * if it's tomb.ErrDying, or caused by another catacomb's ErrDying, it passes 204 // on a new error complaining about the misuse. 205 // * all other errors are passed on unmodified. 206 // It's always safe to call Kill, but errors passed to Kill after the catacomb 207 // is dead will be ignored. 208 func (catacomb *Catacomb) Kill(err error) { 209 if err == tomb.ErrDying { 210 err = errors.New("bad catacomb Kill: tomb.ErrDying") 211 } 212 cause := errors.Cause(err) 213 if match, ok := cause.(dyingError); ok { 214 if catacomb != match.catacomb { 215 err = errors.Errorf("bad catacomb Kill: other catacomb's ErrDying") 216 } else { 217 err = tomb.ErrDying 218 } 219 } 220 221 // TODO(fwereade) it's pretty clear that this ought to be a Kill(nil), and 222 // the catacomb should be responsible for ranking errors, just like the 223 // dependency.Engine does, rather than determining priority by scheduling 224 // alone. 225 catacomb.tomb.Kill(err) 226 } 227 228 // ErrDying returns an error that can be used to Kill *this* catacomb without 229 // overwriting nil errors. It should only be used when the catacomb is already 230 // known to be dying; calling this method at any other time will return a 231 // different error, indicating client misuse. 232 func (catacomb *Catacomb) ErrDying() error { 233 select { 234 case <-catacomb.tomb.Dying(): 235 return dyingError{catacomb} 236 default: 237 return errors.New("bad catacomb ErrDying: still alive") 238 } 239 } 240 241 // dyingError holds a reference to the catacomb that created it. 242 type dyingError struct { 243 catacomb *Catacomb 244 } 245 246 // Error is part of the error interface. 247 func (err dyingError) Error() string { 248 return fmt.Sprintf("catacomb %p is dying", err.catacomb) 249 } 250 251 // runSafely will ensure that the function is run, and any error is returned. 252 // If there is a panic, then that will be returned as an error. 253 func runSafely(f func() error) (err error) { 254 defer func() { 255 if panicResult := recover(); panicResult != nil { 256 err = errors.Errorf("panic resulted in: %v", panicResult) 257 } 258 }() 259 return f() 260 }