github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/worker/catacomb/catacomb.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package catacomb
     5  
     6  import (
     7  	"fmt"
     8  	"sync"
     9  	"sync/atomic"
    10  
    11  	"github.com/juju/errors"
    12  	"gopkg.in/tomb.v1"
    13  
    14  	"github.com/juju/juju/worker"
    15  )
    16  
    17  // Catacomb is a variant of tomb.Tomb with its own internal goroutine, designed
    18  // for coordinating the lifetimes of private workers needed by a single parent.
    19  //
    20  // As a client, you should only ever create zero values; these should be used
    21  // with Invoke to manage a parent task. No Catacomb methods are meaningful
    22  // until the catacomb has been started with a successful Invoke.
    23  //
    24  // See the package documentation for more detailed discussion and usage notes.
    25  type Catacomb struct {
    26  	tomb  tomb.Tomb
    27  	wg    sync.WaitGroup
    28  	adds  chan worker.Worker
    29  	dirty int32
    30  }
    31  
    32  // Plan defines the strategy for an Invoke.
    33  type Plan struct {
    34  
    35  	// Site must point to an unused Catacomb.
    36  	Site *Catacomb
    37  
    38  	// Work will be run on a new goroutine, and tracked by Site.
    39  	Work func() error
    40  
    41  	// Init contains additional workers for which Site must be responsible.
    42  	Init []worker.Worker
    43  }
    44  
    45  // Validate returns an error if the plan cannot be used. It doesn't check for
    46  // reused catacombs: plan validity is necessary but not sufficient to determine
    47  // that an Invoke will succeed.
    48  func (plan Plan) Validate() error {
    49  	if plan.Site == nil {
    50  		return errors.NotValidf("nil Site")
    51  	}
    52  	if plan.Work == nil {
    53  		return errors.NotValidf("nil Work")
    54  	}
    55  	for i, w := range plan.Init {
    56  		if w == nil {
    57  			return errors.NotValidf("nil Init item %d", i)
    58  		}
    59  	}
    60  	return nil
    61  }
    62  
    63  // Invoke uses the plan's catacomb to run the work func. It will return an
    64  // error if the plan is not valid, or if the catacomb has already been used.
    65  // If Invoke returns no error, the catacomb is now controlling the work func,
    66  // and its exported methods can be called safely.
    67  //
    68  // Invoke takes responsibility for all workers in plan.Init, *whether or not
    69  // it succeeds*.
    70  func Invoke(plan Plan) (err error) {
    71  
    72  	defer func() {
    73  		if err != nil {
    74  			stopWorkers(plan.Init)
    75  		}
    76  	}()
    77  
    78  	if err := plan.Validate(); err != nil {
    79  		return errors.Trace(err)
    80  	}
    81  	catacomb := plan.Site
    82  	if !atomic.CompareAndSwapInt32(&catacomb.dirty, 0, 1) {
    83  		return errors.Errorf("catacomb %p has already been used", catacomb)
    84  	}
    85  	catacomb.adds = make(chan worker.Worker)
    86  
    87  	// Add the Init workers right away, so the client can't induce data races
    88  	// by modifying the slice post-return.
    89  	for _, w := range plan.Init {
    90  		catacomb.add(w)
    91  	}
    92  
    93  	// This goroutine listens for added workers until the catacomb is Killed.
    94  	// We ensure the wg can't complete until we know no new workers will be
    95  	// added.
    96  	catacomb.wg.Add(1)
    97  	go func() {
    98  		defer catacomb.wg.Done()
    99  		for {
   100  			select {
   101  			case <-catacomb.tomb.Dying():
   102  				return
   103  			case w := <-catacomb.adds:
   104  				catacomb.add(w)
   105  			}
   106  		}
   107  	}()
   108  
   109  	// This goroutine runs the work func and stops the catacomb with its error;
   110  	// and waits for for the listen goroutine and all added workers to complete
   111  	// before marking the catacomb's tomb Dead.
   112  	go func() {
   113  		defer catacomb.tomb.Done()
   114  		defer catacomb.wg.Wait()
   115  		catacomb.Kill(runSafely(plan.Work))
   116  	}()
   117  	return nil
   118  }
   119  
   120  // stopWorkers stops all non-nil workers in the supplied slice, and swallows
   121  // all errors. This is consistent, for now, because Catacomb swallows all
   122  // errors but the first; as we come to rank or log errors, this must change
   123  // to accommodate better practices.
   124  func stopWorkers(workers []worker.Worker) {
   125  	for _, w := range workers {
   126  		if w != nil {
   127  			worker.Stop(w)
   128  		}
   129  	}
   130  }
   131  
   132  // Add causes the supplied worker's lifetime to be bound to the catacomb's,
   133  // relieving the client of responsibility for Kill()ing it and Wait()ing for an
   134  // error, *whether or not this method succeeds*. If the method returns an error,
   135  // it always indicates that the catacomb is shutting down; the value will either
   136  // be the error from the (now-stopped) worker, or catacomb.ErrDying().
   137  //
   138  // If the worker completes without error, the catacomb will continue unaffected;
   139  // otherwise the catacomb's tomb will be killed with the returned error. This
   140  // allows clients to freely Kill() workers that have been Add()ed; any errors
   141  // encountered will still kill the catacomb, so the workers stay under control
   142  // until the last moment, and so can be managed pretty casually once they've
   143  // been added.
   144  //
   145  // Don't try to add a worker to its own catacomb; that'll deadlock the shutdown
   146  // procedure. I don't think there's much we can do about that.
   147  func (catacomb *Catacomb) Add(w worker.Worker) error {
   148  	select {
   149  	case <-catacomb.tomb.Dying():
   150  		if err := worker.Stop(w); err != nil {
   151  			return errors.Trace(err)
   152  		}
   153  		return catacomb.ErrDying()
   154  	case catacomb.adds <- w:
   155  		// Note that we don't need to wait for confirmation here. This depends
   156  		// on the catacomb.wg.Add() for the listen loop, which ensures the wg
   157  		// won't complete until no more adds can be received.
   158  		return nil
   159  	}
   160  }
   161  
   162  // add starts two goroutines that (1) kill the catacomb's tomb with any
   163  // error encountered by the worker; and (2) kill the worker when the
   164  // catacomb starts dying.
   165  func (catacomb *Catacomb) add(w worker.Worker) {
   166  	// We must wait for _both_ goroutines to exit in
   167  	// arbitrary order depending on the order of the worker
   168  	// and the catacomb shutting down.
   169  	catacomb.wg.Add(2)
   170  	go func() {
   171  		defer catacomb.wg.Done()
   172  		if err := w.Wait(); err != nil {
   173  			catacomb.Kill(err)
   174  		}
   175  	}()
   176  	go func() {
   177  		defer catacomb.wg.Done()
   178  		<-catacomb.tomb.Dying()
   179  		worker.Stop(w)
   180  	}()
   181  }
   182  
   183  // Dying returns a channel that will be closed when Kill is called.
   184  func (catacomb *Catacomb) Dying() <-chan struct{} {
   185  	return catacomb.tomb.Dying()
   186  }
   187  
   188  // Dead returns a channel that will be closed when Invoke has completed (and
   189  // thus when subsequent calls to Wait() are known not to block).
   190  func (catacomb *Catacomb) Dead() <-chan struct{} {
   191  	return catacomb.tomb.Dead()
   192  }
   193  
   194  // Wait blocks until Invoke completes, and returns the first non-nil and
   195  // non-tomb.ErrDying error passed to Kill before Invoke finished.
   196  func (catacomb *Catacomb) Wait() error {
   197  	return catacomb.tomb.Wait()
   198  }
   199  
   200  // Kill kills the Catacomb's internal tomb with the supplied error, or one
   201  // derived from it.
   202  //  * if it's caused by this catacomb's ErrDying, it passes on tomb.ErrDying.
   203  //  * if it's tomb.ErrDying, or caused by another catacomb's ErrDying, it passes
   204  //    on a new error complaining about the misuse.
   205  //  * all other errors are passed on unmodified.
   206  // It's always safe to call Kill, but errors passed to Kill after the catacomb
   207  // is dead will be ignored.
   208  func (catacomb *Catacomb) Kill(err error) {
   209  	if err == tomb.ErrDying {
   210  		err = errors.New("bad catacomb Kill: tomb.ErrDying")
   211  	}
   212  	cause := errors.Cause(err)
   213  	if match, ok := cause.(dyingError); ok {
   214  		if catacomb != match.catacomb {
   215  			err = errors.Errorf("bad catacomb Kill: other catacomb's ErrDying")
   216  		} else {
   217  			err = tomb.ErrDying
   218  		}
   219  	}
   220  
   221  	// TODO(fwereade) it's pretty clear that this ought to be a Kill(nil), and
   222  	// the catacomb should be responsible for ranking errors, just like the
   223  	// dependency.Engine does, rather than determining priority by scheduling
   224  	// alone.
   225  	catacomb.tomb.Kill(err)
   226  }
   227  
   228  // ErrDying returns an error that can be used to Kill *this* catacomb without
   229  // overwriting nil errors. It should only be used when the catacomb is already
   230  // known to be dying; calling this method at any other time will return a
   231  // different error, indicating client misuse.
   232  func (catacomb *Catacomb) ErrDying() error {
   233  	select {
   234  	case <-catacomb.tomb.Dying():
   235  		return dyingError{catacomb}
   236  	default:
   237  		return errors.New("bad catacomb ErrDying: still alive")
   238  	}
   239  }
   240  
   241  // dyingError holds a reference to the catacomb that created it.
   242  type dyingError struct {
   243  	catacomb *Catacomb
   244  }
   245  
   246  // Error is part of the error interface.
   247  func (err dyingError) Error() string {
   248  	return fmt.Sprintf("catacomb %p is dying", err.catacomb)
   249  }
   250  
   251  // runSafely will ensure that the function is run, and any error is returned.
   252  // If there is a panic, then that will be returned as an error.
   253  func runSafely(f func() error) (err error) {
   254  	defer func() {
   255  		if panicResult := recover(); panicResult != nil {
   256  			err = errors.Errorf("panic resulted in: %v", panicResult)
   257  		}
   258  	}()
   259  	return f()
   260  }