launchpad.net/~rogpeppe/juju-core/500-errgo-fix@v0.0.0-20140213181702-000000002356/worker/instancepoller/updater.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package instancepoller
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/loggo/loggo"
    10  
    11  	errgo "launchpad.net/errgo/errors"
    12  	"launchpad.net/juju-core/errors"
    13  	"launchpad.net/juju-core/instance"
    14  	"launchpad.net/juju-core/state"
    15  	"launchpad.net/juju-core/state/api/params"
    16  	"launchpad.net/juju-core/state/watcher"
    17  )
    18  
    19  var logger = loggo.GetLogger("juju.worker.instanceupdater")
    20  
    21  var mask = errgo.Mask
    22  
    23  // ShortPoll and LongPoll hold the polling intervals for the instance
    24  // updater. When a machine has no address or is not started, it will be
    25  // polled at ShortPoll intervals until it does, exponentially backing off
    26  // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll.
    27  //
    28  // When a machine has an address and is started LongPoll will be used to
    29  // check that the instance address or status has not changed.
    30  var (
    31  	ShortPoll        = 1 * time.Second
    32  	ShortPollBackoff = 2.0
    33  	LongPoll         = 15 * time.Minute
    34  )
    35  
    36  type machine interface {
    37  	Id() string
    38  	InstanceId() (instance.Id, error)
    39  	Addresses() []instance.Address
    40  	SetAddresses([]instance.Address) error
    41  	InstanceStatus() (string, error)
    42  	SetInstanceStatus(status string) error
    43  	String() string
    44  	Refresh() error
    45  	Life() state.Life
    46  	Status() (status params.Status, info string, data params.StatusData, err error)
    47  }
    48  
    49  type instanceInfo struct {
    50  	addresses []instance.Address
    51  	status    string
    52  }
    53  
    54  type machineContext interface {
    55  	killAll(err error)
    56  	instanceInfo(id instance.Id) (instanceInfo, error)
    57  	dying() <-chan struct{}
    58  }
    59  
    60  type machineAddress struct {
    61  	machine   machine
    62  	addresses []instance.Address
    63  }
    64  
    65  var _ machine = (*state.Machine)(nil)
    66  
    67  type machinesWatcher interface {
    68  	Changes() <-chan []string
    69  	Err() error
    70  	Stop() error
    71  }
    72  
    73  type updaterContext interface {
    74  	newMachineContext() machineContext
    75  	getMachine(id string) (machine, error)
    76  	dying() <-chan struct{}
    77  }
    78  
    79  type updater struct {
    80  	context     updaterContext
    81  	machines    map[string]chan struct{}
    82  	machineDead chan machine
    83  }
    84  
    85  // watchMachinesLoop watches for changes provided by the given
    86  // machinesWatcher and starts machine goroutines to deal
    87  // with them, using the provided newMachineContext
    88  // function to create the appropriate context for each new machine id.
    89  func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) {
    90  	p := &updater{
    91  		context:     context,
    92  		machines:    make(map[string]chan struct{}),
    93  		machineDead: make(chan machine),
    94  	}
    95  	defer func() {
    96  		if stopErr := w.Stop(); stopErr != nil {
    97  			if err == nil {
    98  				err = errgo.Newf("error stopping watcher: %v", stopErr)
    99  			} else {
   100  				logger.Warningf("ignoring error when stopping watcher: %v", stopErr)
   101  			}
   102  		}
   103  		for len(p.machines) > 0 {
   104  			delete(p.machines, (<-p.machineDead).Id())
   105  		}
   106  	}()
   107  	for {
   108  		select {
   109  		case ids, ok := <-w.Changes():
   110  			if !ok {
   111  				return watcher.MustErr(w)
   112  			}
   113  			if err := p.startMachines(ids); err != nil {
   114  				return mask(err)
   115  			}
   116  		case m := <-p.machineDead:
   117  			delete(p.machines, m.Id())
   118  		case <-p.context.dying():
   119  			return nil
   120  		}
   121  	}
   122  }
   123  
   124  func (p *updater) startMachines(ids []string) error {
   125  	for _, id := range ids {
   126  		if c := p.machines[id]; c == nil {
   127  			// We don't know about the machine - start
   128  			// a goroutine to deal with it.
   129  			m, err := p.context.getMachine(id)
   130  			if errors.IsNotFoundError(err) {
   131  				logger.Warningf("watcher gave notification of non-existent machine %q", id)
   132  				continue
   133  			}
   134  			if err != nil {
   135  				return mask(err)
   136  			}
   137  			c = make(chan struct{})
   138  			p.machines[id] = c
   139  			go runMachine(p.context.newMachineContext(), m, c, p.machineDead)
   140  		} else {
   141  			c <- struct{}{}
   142  		}
   143  	}
   144  	return nil
   145  }
   146  
   147  // runMachine processes the address and status publishing for a given machine.
   148  // We assume that the machine is alive when this is first called.
   149  func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) {
   150  	defer func() {
   151  		// We can't just send on the died channel because the
   152  		// central loop might be trying to write to us on the
   153  		// changed channel.
   154  		for {
   155  			select {
   156  			case died <- m:
   157  				return
   158  			case <-changed:
   159  			}
   160  		}
   161  	}()
   162  	if err := machineLoop(context, m, changed); err != nil {
   163  		context.killAll(err)
   164  	}
   165  }
   166  
   167  func machineLoop(context machineContext, m machine, changed <-chan struct{}) error {
   168  	// Use a short poll interval when initially waiting for
   169  	// a machine's address and machine agent to start, and a long one when it already
   170  	// has an address and the machine agent is started.
   171  	pollInterval := ShortPoll
   172  	pollInstance := true
   173  	for {
   174  		if pollInstance {
   175  			instInfo, err := pollInstanceInfo(context, m)
   176  			if err != nil {
   177  				// If the provider doesn't implement Addresses/Status now,
   178  				// it never will until we're upgraded, so don't bother
   179  				// asking any more. We could use less resources
   180  				// by taking down the entire worker, but this is easier for now
   181  				// (and hopefully the local provider will implement
   182  				// Addresses/Status in the not-too-distant future),
   183  				// so we won't need to worry about this case at all.
   184  				if errors.IsNotImplementedError(err) {
   185  					pollInterval = 365 * 24 * time.Hour
   186  				} else {
   187  					return err
   188  				}
   189  			}
   190  			machineStatus, _, _, err := m.Status()
   191  			if err != nil {
   192  				logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err)
   193  			}
   194  			if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted {
   195  				// We've got at least one address and a status and instance is started, so poll infrequently.
   196  				pollInterval = LongPoll
   197  			} else if pollInterval < LongPoll {
   198  				// We have no addresses or not started - poll increasingly rarely
   199  				// until we do.
   200  				pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff)
   201  			}
   202  			pollInstance = false
   203  		}
   204  		select {
   205  		case <-time.After(pollInterval):
   206  			pollInstance = true
   207  		case <-context.dying():
   208  			return nil
   209  		case <-changed:
   210  			if err := m.Refresh(); err != nil {
   211  				return mask(err)
   212  			}
   213  			if m.Life() == state.Dead {
   214  				return nil
   215  			}
   216  		}
   217  	}
   218  }
   219  
   220  // pollInstanceInfo checks the current provider addresses and status
   221  // for the given machine's instance, and sets them on the machine if they've changed.
   222  func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) {
   223  	instInfo = instanceInfo{}
   224  	instId, err := m.InstanceId()
   225  	if err != nil && !state.IsNotProvisionedError(err) {
   226  		return instInfo, errgo.Notef(err, "cannot get machine's instance id")
   227  	}
   228  	instInfo, err = context.instanceInfo(instId)
   229  	if err != nil {
   230  		if errors.IsNotImplementedError(err) {
   231  			return instInfo, err
   232  		}
   233  		logger.Warningf("cannot get instance info for instance %q: %v", instId, err)
   234  		return instInfo, nil
   235  	}
   236  	currentInstStatus, err := m.InstanceStatus()
   237  	if err != nil {
   238  		// This should never occur since the machine is provisioned.
   239  		// But just in case, we reset polled status so we try again next time.
   240  		logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err)
   241  		instInfo.status = ""
   242  	} else {
   243  		if instInfo.status != currentInstStatus {
   244  			logger.Infof("machine %q has new instance status: %v", m.Id(), instInfo.status)
   245  			if err = m.SetInstanceStatus(instInfo.status); err != nil {
   246  				logger.Errorf("cannot set instance status on %q: %v", m, err)
   247  			}
   248  		}
   249  	}
   250  	if !addressesEqual(m.Addresses(), instInfo.addresses) {
   251  		logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses)
   252  		if err = m.SetAddresses(instInfo.addresses); err != nil {
   253  			logger.Errorf("cannot set addresses on %q: %v", m, err)
   254  		}
   255  	}
   256  	return instInfo, err
   257  }
   258  
   259  func addressesEqual(a0, a1 []instance.Address) bool {
   260  	if len(a0) != len(a1) {
   261  		return false
   262  	}
   263  	for i := range a0 {
   264  		if a0[i] != a1[i] {
   265  			return false
   266  		}
   267  	}
   268  	return true
   269  }