github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/worker/instancepoller/updater.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package instancepoller
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/loggo"
    12  
    13  	"github.com/juju/juju/instance"
    14  	"github.com/juju/juju/state"
    15  	"github.com/juju/juju/state/api/params"
    16  	"github.com/juju/juju/state/watcher"
    17  )
    18  
    19  var logger = loggo.GetLogger("juju.worker.instanceupdater")
    20  
    21  // ShortPoll and LongPoll hold the polling intervals for the instance
    22  // updater. When a machine has no address or is not started, it will be
    23  // polled at ShortPoll intervals until it does, exponentially backing off
    24  // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll.
    25  //
    26  // When a machine has an address and is started LongPoll will be used to
    27  // check that the instance address or status has not changed.
    28  var (
    29  	ShortPoll        = 1 * time.Second
    30  	ShortPollBackoff = 2.0
    31  	LongPoll         = 15 * time.Minute
    32  )
    33  
    34  type machine interface {
    35  	Id() string
    36  	InstanceId() (instance.Id, error)
    37  	Addresses() []instance.Address
    38  	SetAddresses(...instance.Address) error
    39  	InstanceStatus() (string, error)
    40  	SetInstanceStatus(status string) error
    41  	String() string
    42  	Refresh() error
    43  	Life() state.Life
    44  	Status() (status params.Status, info string, data params.StatusData, err error)
    45  	IsManual() (bool, error)
    46  }
    47  
    48  type instanceInfo struct {
    49  	addresses []instance.Address
    50  	status    string
    51  }
    52  
    53  type machineContext interface {
    54  	killAll(err error)
    55  	instanceInfo(id instance.Id) (instanceInfo, error)
    56  	dying() <-chan struct{}
    57  }
    58  
    59  type machineAddress struct {
    60  	machine   machine
    61  	addresses []instance.Address
    62  }
    63  
    64  var _ machine = (*state.Machine)(nil)
    65  
    66  type machinesWatcher interface {
    67  	Changes() <-chan []string
    68  	Err() error
    69  	Stop() error
    70  }
    71  
    72  type updaterContext interface {
    73  	newMachineContext() machineContext
    74  	getMachine(id string) (machine, error)
    75  	dying() <-chan struct{}
    76  }
    77  
    78  type updater struct {
    79  	context     updaterContext
    80  	machines    map[string]chan struct{}
    81  	machineDead chan machine
    82  }
    83  
    84  // watchMachinesLoop watches for changes provided by the given
    85  // machinesWatcher and starts machine goroutines to deal
    86  // with them, using the provided newMachineContext
    87  // function to create the appropriate context for each new machine id.
    88  func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) {
    89  	p := &updater{
    90  		context:     context,
    91  		machines:    make(map[string]chan struct{}),
    92  		machineDead: make(chan machine),
    93  	}
    94  	defer func() {
    95  		if stopErr := w.Stop(); stopErr != nil {
    96  			if err == nil {
    97  				err = fmt.Errorf("error stopping watcher: %v", stopErr)
    98  			} else {
    99  				logger.Warningf("ignoring error when stopping watcher: %v", stopErr)
   100  			}
   101  		}
   102  		for len(p.machines) > 0 {
   103  			delete(p.machines, (<-p.machineDead).Id())
   104  		}
   105  	}()
   106  	for {
   107  		select {
   108  		case ids, ok := <-w.Changes():
   109  			if !ok {
   110  				return watcher.MustErr(w)
   111  			}
   112  			if err := p.startMachines(ids); err != nil {
   113  				return err
   114  			}
   115  		case m := <-p.machineDead:
   116  			delete(p.machines, m.Id())
   117  		case <-p.context.dying():
   118  			return nil
   119  		}
   120  	}
   121  }
   122  
   123  func (p *updater) startMachines(ids []string) error {
   124  	for _, id := range ids {
   125  		if c := p.machines[id]; c == nil {
   126  			// We don't know about the machine - start
   127  			// a goroutine to deal with it.
   128  			m, err := p.context.getMachine(id)
   129  			if errors.IsNotFound(err) {
   130  				logger.Warningf("watcher gave notification of non-existent machine %q", id)
   131  				continue
   132  			}
   133  			if err != nil {
   134  				return err
   135  			}
   136  			// We don't poll manual machines.
   137  			isManual, err := m.IsManual()
   138  			if err != nil {
   139  				return err
   140  			}
   141  			if isManual {
   142  				continue
   143  			}
   144  			c = make(chan struct{})
   145  			p.machines[id] = c
   146  			go runMachine(p.context.newMachineContext(), m, c, p.machineDead)
   147  		} else {
   148  			c <- struct{}{}
   149  		}
   150  	}
   151  	return nil
   152  }
   153  
   154  // runMachine processes the address and status publishing for a given machine.
   155  // We assume that the machine is alive when this is first called.
   156  func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) {
   157  	defer func() {
   158  		// We can't just send on the died channel because the
   159  		// central loop might be trying to write to us on the
   160  		// changed channel.
   161  		for {
   162  			select {
   163  			case died <- m:
   164  				return
   165  			case <-changed:
   166  			}
   167  		}
   168  	}()
   169  	if err := machineLoop(context, m, changed); err != nil {
   170  		context.killAll(err)
   171  	}
   172  }
   173  
   174  func machineLoop(context machineContext, m machine, changed <-chan struct{}) error {
   175  	// Use a short poll interval when initially waiting for
   176  	// a machine's address and machine agent to start, and a long one when it already
   177  	// has an address and the machine agent is started.
   178  	pollInterval := ShortPoll
   179  	pollInstance := true
   180  	for {
   181  		if pollInstance {
   182  			instInfo, err := pollInstanceInfo(context, m)
   183  			if err != nil && !state.IsNotProvisionedError(err) {
   184  				// If the provider doesn't implement Addresses/Status now,
   185  				// it never will until we're upgraded, so don't bother
   186  				// asking any more. We could use less resources
   187  				// by taking down the entire worker, but this is easier for now
   188  				// (and hopefully the local provider will implement
   189  				// Addresses/Status in the not-too-distant future),
   190  				// so we won't need to worry about this case at all.
   191  				if errors.IsNotImplemented(err) {
   192  					pollInterval = 365 * 24 * time.Hour
   193  				} else {
   194  					return err
   195  				}
   196  			}
   197  			machineStatus := params.StatusPending
   198  			if err == nil {
   199  				if machineStatus, _, _, err = m.Status(); err != nil {
   200  					logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err)
   201  				}
   202  			}
   203  			if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted {
   204  				// We've got at least one address and a status and instance is started, so poll infrequently.
   205  				pollInterval = LongPoll
   206  			} else if pollInterval < LongPoll {
   207  				// We have no addresses or not started - poll increasingly rarely
   208  				// until we do.
   209  				pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff)
   210  			}
   211  			pollInstance = false
   212  		}
   213  		select {
   214  		case <-time.After(pollInterval):
   215  			pollInstance = true
   216  		case <-context.dying():
   217  			return nil
   218  		case <-changed:
   219  			if err := m.Refresh(); err != nil {
   220  				return err
   221  			}
   222  			if m.Life() == state.Dead {
   223  				return nil
   224  			}
   225  		}
   226  	}
   227  }
   228  
   229  // pollInstanceInfo checks the current provider addresses and status
   230  // for the given machine's instance, and sets them on the machine if they've changed.
   231  func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) {
   232  	instInfo = instanceInfo{}
   233  	instId, err := m.InstanceId()
   234  	// We can't ask the machine for its addresses if it isn't provisioned yet.
   235  	if state.IsNotProvisionedError(err) {
   236  		return instInfo, err
   237  	}
   238  	if err != nil {
   239  		return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err)
   240  	}
   241  	instInfo, err = context.instanceInfo(instId)
   242  	if err != nil {
   243  		if errors.IsNotImplemented(err) {
   244  			return instInfo, err
   245  		}
   246  		logger.Warningf("cannot get instance info for instance %q: %v", instId, err)
   247  		return instInfo, nil
   248  	}
   249  	currentInstStatus, err := m.InstanceStatus()
   250  	if err != nil {
   251  		// This should never occur since the machine is provisioned.
   252  		// But just in case, we reset polled status so we try again next time.
   253  		logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err)
   254  		instInfo.status = ""
   255  	} else {
   256  		if instInfo.status != currentInstStatus {
   257  			logger.Infof("machine %q has new instance status: %v", m.Id(), instInfo.status)
   258  			if err = m.SetInstanceStatus(instInfo.status); err != nil {
   259  				logger.Errorf("cannot set instance status on %q: %v", m, err)
   260  			}
   261  		}
   262  	}
   263  	if !addressesEqual(m.Addresses(), instInfo.addresses) {
   264  		logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses)
   265  		if err = m.SetAddresses(instInfo.addresses...); err != nil {
   266  			logger.Errorf("cannot set addresses on %q: %v", m, err)
   267  		}
   268  	}
   269  	return instInfo, err
   270  }
   271  
   272  func addressesEqual(a0, a1 []instance.Address) bool {
   273  	if len(a0) != len(a1) {
   274  		return false
   275  	}
   276  	for i := range a0 {
   277  		if a0[i] != a1[i] {
   278  			return false
   279  		}
   280  	}
   281  	return true
   282  }