github.com/rogpeppe/juju@v0.0.0-20140613142852-6337964b789e/worker/instancepoller/updater.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package instancepoller
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/loggo"
    12  
    13  	"github.com/juju/juju/instance"
    14  	"github.com/juju/juju/network"
    15  	"github.com/juju/juju/state"
    16  	"github.com/juju/juju/state/api/params"
    17  	"github.com/juju/juju/state/watcher"
    18  )
    19  
    20  var logger = loggo.GetLogger("juju.worker.instanceupdater")
    21  
    22  // ShortPoll and LongPoll hold the polling intervals for the instance
    23  // updater. When a machine has no address or is not started, it will be
    24  // polled at ShortPoll intervals until it does, exponentially backing off
    25  // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll.
    26  //
    27  // When a machine has an address and is started LongPoll will be used to
    28  // check that the instance address or status has not changed.
    29  var (
    30  	ShortPoll        = 1 * time.Second
    31  	ShortPollBackoff = 2.0
    32  	LongPoll         = 15 * time.Minute
    33  )
    34  
    35  type machine interface {
    36  	Id() string
    37  	InstanceId() (instance.Id, error)
    38  	Addresses() []network.Address
    39  	SetAddresses(...network.Address) error
    40  	InstanceStatus() (string, error)
    41  	SetInstanceStatus(status string) error
    42  	String() string
    43  	Refresh() error
    44  	Life() state.Life
    45  	Status() (status params.Status, info string, data params.StatusData, err error)
    46  	IsManual() (bool, error)
    47  }
    48  
    49  type instanceInfo struct {
    50  	addresses []network.Address
    51  	status    string
    52  }
    53  
    54  type machineContext interface {
    55  	killAll(err error)
    56  	instanceInfo(id instance.Id) (instanceInfo, error)
    57  	dying() <-chan struct{}
    58  }
    59  
    60  type machineAddress struct {
    61  	machine   machine
    62  	addresses []network.Address
    63  }
    64  
    65  var _ machine = (*state.Machine)(nil)
    66  
    67  type machinesWatcher interface {
    68  	Changes() <-chan []string
    69  	Err() error
    70  	Stop() error
    71  }
    72  
    73  type updaterContext interface {
    74  	newMachineContext() machineContext
    75  	getMachine(id string) (machine, error)
    76  	dying() <-chan struct{}
    77  }
    78  
    79  type updater struct {
    80  	context     updaterContext
    81  	machines    map[string]chan struct{}
    82  	machineDead chan machine
    83  }
    84  
    85  // watchMachinesLoop watches for changes provided by the given
    86  // machinesWatcher and starts machine goroutines to deal
    87  // with them, using the provided newMachineContext
    88  // function to create the appropriate context for each new machine id.
    89  func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) {
    90  	p := &updater{
    91  		context:     context,
    92  		machines:    make(map[string]chan struct{}),
    93  		machineDead: make(chan machine),
    94  	}
    95  	defer func() {
    96  		if stopErr := w.Stop(); stopErr != nil {
    97  			if err == nil {
    98  				err = fmt.Errorf("error stopping watcher: %v", stopErr)
    99  			} else {
   100  				logger.Warningf("ignoring error when stopping watcher: %v", stopErr)
   101  			}
   102  		}
   103  		for len(p.machines) > 0 {
   104  			delete(p.machines, (<-p.machineDead).Id())
   105  		}
   106  	}()
   107  	for {
   108  		select {
   109  		case ids, ok := <-w.Changes():
   110  			if !ok {
   111  				return watcher.MustErr(w)
   112  			}
   113  			if err := p.startMachines(ids); err != nil {
   114  				return err
   115  			}
   116  		case m := <-p.machineDead:
   117  			delete(p.machines, m.Id())
   118  		case <-p.context.dying():
   119  			return nil
   120  		}
   121  	}
   122  }
   123  
   124  func (p *updater) startMachines(ids []string) error {
   125  	for _, id := range ids {
   126  		if c := p.machines[id]; c == nil {
   127  			// We don't know about the machine - start
   128  			// a goroutine to deal with it.
   129  			m, err := p.context.getMachine(id)
   130  			if errors.IsNotFound(err) {
   131  				logger.Warningf("watcher gave notification of non-existent machine %q", id)
   132  				continue
   133  			}
   134  			if err != nil {
   135  				return err
   136  			}
   137  			// We don't poll manual machines.
   138  			isManual, err := m.IsManual()
   139  			if err != nil {
   140  				return err
   141  			}
   142  			if isManual {
   143  				continue
   144  			}
   145  			c = make(chan struct{})
   146  			p.machines[id] = c
   147  			go runMachine(p.context.newMachineContext(), m, c, p.machineDead)
   148  		} else {
   149  			c <- struct{}{}
   150  		}
   151  	}
   152  	return nil
   153  }
   154  
   155  // runMachine processes the address and status publishing for a given machine.
   156  // We assume that the machine is alive when this is first called.
   157  func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) {
   158  	defer func() {
   159  		// We can't just send on the died channel because the
   160  		// central loop might be trying to write to us on the
   161  		// changed channel.
   162  		for {
   163  			select {
   164  			case died <- m:
   165  				return
   166  			case <-changed:
   167  			}
   168  		}
   169  	}()
   170  	if err := machineLoop(context, m, changed); err != nil {
   171  		context.killAll(err)
   172  	}
   173  }
   174  
   175  func machineLoop(context machineContext, m machine, changed <-chan struct{}) error {
   176  	// Use a short poll interval when initially waiting for
   177  	// a machine's address and machine agent to start, and a long one when it already
   178  	// has an address and the machine agent is started.
   179  	pollInterval := ShortPoll
   180  	pollInstance := true
   181  	for {
   182  		if pollInstance {
   183  			instInfo, err := pollInstanceInfo(context, m)
   184  			if err != nil && !state.IsNotProvisionedError(err) {
   185  				// If the provider doesn't implement Addresses/Status now,
   186  				// it never will until we're upgraded, so don't bother
   187  				// asking any more. We could use less resources
   188  				// by taking down the entire worker, but this is easier for now
   189  				// (and hopefully the local provider will implement
   190  				// Addresses/Status in the not-too-distant future),
   191  				// so we won't need to worry about this case at all.
   192  				if errors.IsNotImplemented(err) {
   193  					pollInterval = 365 * 24 * time.Hour
   194  				} else {
   195  					return err
   196  				}
   197  			}
   198  			machineStatus := params.StatusPending
   199  			if err == nil {
   200  				if machineStatus, _, _, err = m.Status(); err != nil {
   201  					logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err)
   202  				}
   203  			}
   204  			if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted {
   205  				// We've got at least one address and a status and instance is started, so poll infrequently.
   206  				pollInterval = LongPoll
   207  			} else if pollInterval < LongPoll {
   208  				// We have no addresses or not started - poll increasingly rarely
   209  				// until we do.
   210  				pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff)
   211  			}
   212  			pollInstance = false
   213  		}
   214  		select {
   215  		case <-time.After(pollInterval):
   216  			pollInstance = true
   217  		case <-context.dying():
   218  			return nil
   219  		case <-changed:
   220  			if err := m.Refresh(); err != nil {
   221  				return err
   222  			}
   223  			if m.Life() == state.Dead {
   224  				return nil
   225  			}
   226  		}
   227  	}
   228  }
   229  
   230  // pollInstanceInfo checks the current provider addresses and status
   231  // for the given machine's instance, and sets them on the machine if they've changed.
   232  func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) {
   233  	instInfo = instanceInfo{}
   234  	instId, err := m.InstanceId()
   235  	// We can't ask the machine for its addresses if it isn't provisioned yet.
   236  	if state.IsNotProvisionedError(err) {
   237  		return instInfo, err
   238  	}
   239  	if err != nil {
   240  		return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err)
   241  	}
   242  	instInfo, err = context.instanceInfo(instId)
   243  	if err != nil {
   244  		if errors.IsNotImplemented(err) {
   245  			return instInfo, err
   246  		}
   247  		logger.Warningf("cannot get instance info for instance %q: %v", instId, err)
   248  		return instInfo, nil
   249  	}
   250  	currentInstStatus, err := m.InstanceStatus()
   251  	if err != nil {
   252  		// This should never occur since the machine is provisioned.
   253  		// But just in case, we reset polled status so we try again next time.
   254  		logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err)
   255  		instInfo.status = ""
   256  	} else {
   257  		if instInfo.status != currentInstStatus {
   258  			logger.Infof("machine %q has new instance status: %v", m.Id(), instInfo.status)
   259  			if err = m.SetInstanceStatus(instInfo.status); err != nil {
   260  				logger.Errorf("cannot set instance status on %q: %v", m, err)
   261  			}
   262  		}
   263  	}
   264  	if !addressesEqual(m.Addresses(), instInfo.addresses) {
   265  		logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses)
   266  		if err = m.SetAddresses(instInfo.addresses...); err != nil {
   267  			logger.Errorf("cannot set addresses on %q: %v", m, err)
   268  		}
   269  	}
   270  	return instInfo, err
   271  }
   272  
   273  func addressesEqual(a0, a1 []network.Address) bool {
   274  	if len(a0) != len(a1) {
   275  		return false
   276  	}
   277  	for i := range a0 {
   278  		if a0[i] != a1[i] {
   279  			return false
   280  		}
   281  	}
   282  	return true
   283  }