github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/worker/instancepoller/updater.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package instancepoller
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/loggo"
    11  	"github.com/juju/names"
    12  
    13  	"github.com/juju/juju/apiserver/params"
    14  	"github.com/juju/juju/instance"
    15  	"github.com/juju/juju/network"
    16  	"github.com/juju/juju/state/watcher"
    17  )
    18  
    19  var logger = loggo.GetLogger("juju.worker.instanceupdater")
    20  
    21  // ShortPoll and LongPoll hold the polling intervals for the instance
    22  // updater. When a machine has no address or is not started, it will be
    23  // polled at ShortPoll intervals until it does, exponentially backing off
    24  // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll.
    25  //
    26  // When a machine has an address and is started LongPoll will be used to
    27  // check that the instance address or status has not changed.
    28  var (
    29  	ShortPoll        = 1 * time.Second
    30  	ShortPollBackoff = 2.0
    31  	LongPoll         = 15 * time.Minute
    32  )
    33  
    34  type machine interface {
    35  	Id() string
    36  	Tag() names.MachineTag
    37  	InstanceId() (instance.Id, error)
    38  	ProviderAddresses() ([]network.Address, error)
    39  	SetProviderAddresses(...network.Address) error
    40  	InstanceStatus() (string, error)
    41  	SetInstanceStatus(status string) error
    42  	String() string
    43  	Refresh() error
    44  	Life() params.Life
    45  	Status() (params.StatusResult, error)
    46  	IsManual() (bool, error)
    47  }
    48  
    49  type instanceInfo struct {
    50  	addresses []network.Address
    51  	status    string
    52  }
    53  
    54  type machineContext interface {
    55  	killAll(err error)
    56  	instanceInfo(id instance.Id) (instanceInfo, error)
    57  	dying() <-chan struct{}
    58  }
    59  
    60  type machineAddress struct {
    61  	machine   machine
    62  	addresses []network.Address
    63  }
    64  
    65  type machinesWatcher interface {
    66  	Changes() <-chan []string
    67  	Err() error
    68  	Stop() error
    69  }
    70  
    71  type updaterContext interface {
    72  	newMachineContext() machineContext
    73  	getMachine(tag names.MachineTag) (machine, error)
    74  	dying() <-chan struct{}
    75  }
    76  
    77  type updater struct {
    78  	context     updaterContext
    79  	machines    map[names.MachineTag]chan struct{}
    80  	machineDead chan machine
    81  }
    82  
    83  // watchMachinesLoop watches for changes provided by the given
    84  // machinesWatcher and starts machine goroutines to deal with them,
    85  // using the provided newMachineContext function to create the
    86  // appropriate context for each new machine tag.
    87  func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) {
    88  	p := &updater{
    89  		context:     context,
    90  		machines:    make(map[names.MachineTag]chan struct{}),
    91  		machineDead: make(chan machine),
    92  	}
    93  	defer func() {
    94  		if stopErr := w.Stop(); stopErr != nil {
    95  			if err == nil {
    96  				err = fmt.Errorf("error stopping watcher: %v", stopErr)
    97  			} else {
    98  				logger.Warningf("ignoring error when stopping watcher: %v", stopErr)
    99  			}
   100  		}
   101  		for len(p.machines) > 0 {
   102  			delete(p.machines, (<-p.machineDead).Tag())
   103  		}
   104  	}()
   105  	for {
   106  		select {
   107  		case ids, ok := <-w.Changes():
   108  			if !ok {
   109  				return watcher.EnsureErr(w)
   110  			}
   111  			tags := make([]names.MachineTag, len(ids))
   112  			for i := range ids {
   113  				tags[i] = names.NewMachineTag(ids[i])
   114  			}
   115  			if err := p.startMachines(tags); err != nil {
   116  				return err
   117  			}
   118  		case m := <-p.machineDead:
   119  			delete(p.machines, m.Tag())
   120  		case <-p.context.dying():
   121  			return nil
   122  		}
   123  	}
   124  }
   125  
   126  func (p *updater) startMachines(tags []names.MachineTag) error {
   127  	for _, tag := range tags {
   128  		if c := p.machines[tag]; c == nil {
   129  			// We don't know about the machine - start
   130  			// a goroutine to deal with it.
   131  			m, err := p.context.getMachine(tag)
   132  			if params.IsCodeNotFound(err) {
   133  				logger.Warningf("watcher gave notification of non-existent machine %q", tag.Id())
   134  				continue
   135  			}
   136  			if err != nil {
   137  				return err
   138  			}
   139  			// We don't poll manual machines.
   140  			isManual, err := m.IsManual()
   141  			if err != nil {
   142  				return err
   143  			}
   144  			if isManual {
   145  				continue
   146  			}
   147  			c = make(chan struct{})
   148  			p.machines[tag] = c
   149  			go runMachine(p.context.newMachineContext(), m, c, p.machineDead)
   150  		} else {
   151  			c <- struct{}{}
   152  		}
   153  	}
   154  	return nil
   155  }
   156  
   157  // runMachine processes the address and status publishing for a given machine.
   158  // We assume that the machine is alive when this is first called.
   159  func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) {
   160  	defer func() {
   161  		// We can't just send on the died channel because the
   162  		// central loop might be trying to write to us on the
   163  		// changed channel.
   164  		for {
   165  			select {
   166  			case died <- m:
   167  				return
   168  			case <-changed:
   169  			}
   170  		}
   171  	}()
   172  	if err := machineLoop(context, m, changed); err != nil {
   173  		context.killAll(err)
   174  	}
   175  }
   176  
   177  func machineLoop(context machineContext, m machine, changed <-chan struct{}) error {
   178  	// Use a short poll interval when initially waiting for
   179  	// a machine's address and machine agent to start, and a long one when it already
   180  	// has an address and the machine agent is started.
   181  	pollInterval := ShortPoll
   182  	pollInstance := true
   183  	for {
   184  		if pollInstance {
   185  			instInfo, err := pollInstanceInfo(context, m)
   186  			if err != nil && !params.IsCodeNotProvisioned(err) {
   187  				// If the provider doesn't implement Addresses/Status now,
   188  				// it never will until we're upgraded, so don't bother
   189  				// asking any more. We could use less resources
   190  				// by taking down the entire worker, but this is easier for now
   191  				// (and hopefully the local provider will implement
   192  				// Addresses/Status in the not-too-distant future),
   193  				// so we won't need to worry about this case at all.
   194  				if params.IsCodeNotImplemented(err) {
   195  					pollInterval = 365 * 24 * time.Hour
   196  				} else {
   197  					return err
   198  				}
   199  			}
   200  			machineStatus := params.StatusPending
   201  			if err == nil {
   202  				if statusInfo, err := m.Status(); err != nil {
   203  					logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err)
   204  				} else {
   205  					machineStatus = statusInfo.Status
   206  				}
   207  			}
   208  			if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted {
   209  				// We've got at least one address and a status and instance is started, so poll infrequently.
   210  				pollInterval = LongPoll
   211  			} else if pollInterval < LongPoll {
   212  				// We have no addresses or not started - poll increasingly rarely
   213  				// until we do.
   214  				pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff)
   215  			}
   216  			pollInstance = false
   217  		}
   218  		select {
   219  		case <-time.After(pollInterval):
   220  			pollInstance = true
   221  		case <-context.dying():
   222  			return nil
   223  		case <-changed:
   224  			if err := m.Refresh(); err != nil {
   225  				return err
   226  			}
   227  			if m.Life() == params.Dead {
   228  				return nil
   229  			}
   230  		}
   231  	}
   232  }
   233  
   234  // pollInstanceInfo checks the current provider addresses and status
   235  // for the given machine's instance, and sets them on the machine if they've changed.
   236  func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) {
   237  	instInfo = instanceInfo{}
   238  	instId, err := m.InstanceId()
   239  	// We can't ask the machine for its addresses if it isn't provisioned yet.
   240  	if params.IsCodeNotProvisioned(err) {
   241  		return instInfo, err
   242  	}
   243  	if err != nil {
   244  		return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err)
   245  	}
   246  	instInfo, err = context.instanceInfo(instId)
   247  	if err != nil {
   248  		if params.IsCodeNotImplemented(err) {
   249  			return instInfo, err
   250  		}
   251  		logger.Warningf("cannot get instance info for instance %q: %v", instId, err)
   252  		return instInfo, nil
   253  	}
   254  	currentInstStatus, err := m.InstanceStatus()
   255  	if err != nil {
   256  		// This should never occur since the machine is provisioned.
   257  		// But just in case, we reset polled status so we try again next time.
   258  		logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err)
   259  		instInfo.status = ""
   260  	} else {
   261  		if instInfo.status != currentInstStatus {
   262  			logger.Infof("machine %q instance status changed from %q to %q", m.Id(), currentInstStatus, instInfo.status)
   263  			if err = m.SetInstanceStatus(instInfo.status); err != nil {
   264  				logger.Errorf("cannot set instance status on %q: %v", m, err)
   265  			}
   266  		}
   267  	}
   268  	providerAddresses, err := m.ProviderAddresses()
   269  	if err != nil {
   270  		return instInfo, err
   271  	}
   272  	if !addressesEqual(providerAddresses, instInfo.addresses) {
   273  		logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses)
   274  		if err = m.SetProviderAddresses(instInfo.addresses...); err != nil {
   275  			logger.Errorf("cannot set addresses on %q: %v", m, err)
   276  		}
   277  	}
   278  	return instInfo, err
   279  }
   280  
   281  // addressesEqual compares the addresses of the machine and the instance information.
   282  func addressesEqual(a0, a1 []network.Address) bool {
   283  	if len(a0) != len(a1) {
   284  		logger.Tracef("address lists have different lengths %d != %d for %v != %v",
   285  			len(a0), len(a1), a0, a1)
   286  		return false
   287  	}
   288  
   289  	ca0 := make([]network.Address, len(a0))
   290  	copy(ca0, a0)
   291  	network.SortAddresses(ca0, true)
   292  	ca1 := make([]network.Address, len(a1))
   293  	copy(ca1, a1)
   294  	network.SortAddresses(ca1, true)
   295  
   296  	for i := range ca0 {
   297  		if ca0[i] != ca1[i] {
   298  			logger.Tracef("address entry at offset %d has a different value for %v != %v",
   299  				i, ca0, ca1)
   300  			return false
   301  		}
   302  	}
   303  	return true
   304  }