github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/worker/instancepoller/updater.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package instancepoller
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/loggo"
    12  
    13  	"github.com/juju/juju/instance"
    14  	"github.com/juju/juju/network"
    15  	"github.com/juju/juju/state"
    16  	"github.com/juju/juju/state/watcher"
    17  )
    18  
    19  var logger = loggo.GetLogger("juju.worker.instanceupdater")
    20  
    21  // ShortPoll and LongPoll hold the polling intervals for the instance
    22  // updater. When a machine has no address or is not started, it will be
    23  // polled at ShortPoll intervals until it does, exponentially backing off
    24  // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll.
    25  //
    26  // When a machine has an address and is started LongPoll will be used to
    27  // check that the instance address or status has not changed.
    28  var (
    29  	ShortPoll        = 1 * time.Second
    30  	ShortPollBackoff = 2.0
    31  	LongPoll         = 15 * time.Minute
    32  )
    33  
    34  type machine interface {
    35  	Id() string
    36  	InstanceId() (instance.Id, error)
    37  	Addresses() []network.Address
    38  	SetAddresses(...network.Address) error
    39  	InstanceStatus() (string, error)
    40  	SetInstanceStatus(status string) error
    41  	String() string
    42  	Refresh() error
    43  	Life() state.Life
    44  	Status() (status state.Status, info string, data map[string]interface{}, err error)
    45  	IsManual() (bool, error)
    46  }
    47  
    48  type instanceInfo struct {
    49  	addresses []network.Address
    50  	status    string
    51  }
    52  
    53  type machineContext interface {
    54  	killAll(err error)
    55  	instanceInfo(id instance.Id) (instanceInfo, error)
    56  	dying() <-chan struct{}
    57  }
    58  
    59  type machineAddress struct {
    60  	machine   machine
    61  	addresses []network.Address
    62  }
    63  
    64  var _ machine = (*state.Machine)(nil)
    65  
    66  type machinesWatcher interface {
    67  	Changes() <-chan []string
    68  	Err() error
    69  	Stop() error
    70  }
    71  
    72  type updaterContext interface {
    73  	newMachineContext() machineContext
    74  	getMachine(id string) (machine, error)
    75  	dying() <-chan struct{}
    76  }
    77  
    78  type updater struct {
    79  	context     updaterContext
    80  	machines    map[string]chan struct{}
    81  	machineDead chan machine
    82  }
    83  
    84  // watchMachinesLoop watches for changes provided by the given
    85  // machinesWatcher and starts machine goroutines to deal
    86  // with them, using the provided newMachineContext
    87  // function to create the appropriate context for each new machine id.
    88  func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) {
    89  	p := &updater{
    90  		context:     context,
    91  		machines:    make(map[string]chan struct{}),
    92  		machineDead: make(chan machine),
    93  	}
    94  	defer func() {
    95  		if stopErr := w.Stop(); stopErr != nil {
    96  			if err == nil {
    97  				err = fmt.Errorf("error stopping watcher: %v", stopErr)
    98  			} else {
    99  				logger.Warningf("ignoring error when stopping watcher: %v", stopErr)
   100  			}
   101  		}
   102  		for len(p.machines) > 0 {
   103  			delete(p.machines, (<-p.machineDead).Id())
   104  		}
   105  	}()
   106  	for {
   107  		select {
   108  		case ids, ok := <-w.Changes():
   109  			if !ok {
   110  				return watcher.EnsureErr(w)
   111  			}
   112  			if err := p.startMachines(ids); err != nil {
   113  				return err
   114  			}
   115  		case m := <-p.machineDead:
   116  			delete(p.machines, m.Id())
   117  		case <-p.context.dying():
   118  			return nil
   119  		}
   120  	}
   121  }
   122  
   123  func (p *updater) startMachines(ids []string) error {
   124  	for _, id := range ids {
   125  		if c := p.machines[id]; c == nil {
   126  			// We don't know about the machine - start
   127  			// a goroutine to deal with it.
   128  			m, err := p.context.getMachine(id)
   129  			if errors.IsNotFound(err) {
   130  				logger.Warningf("watcher gave notification of non-existent machine %q", id)
   131  				continue
   132  			}
   133  			if err != nil {
   134  				return err
   135  			}
   136  			// We don't poll manual machines.
   137  			isManual, err := m.IsManual()
   138  			if err != nil {
   139  				return err
   140  			}
   141  			if isManual {
   142  				continue
   143  			}
   144  			c = make(chan struct{})
   145  			p.machines[id] = c
   146  			go runMachine(p.context.newMachineContext(), m, c, p.machineDead)
   147  		} else {
   148  			c <- struct{}{}
   149  		}
   150  	}
   151  	return nil
   152  }
   153  
   154  // runMachine processes the address and status publishing for a given machine.
   155  // We assume that the machine is alive when this is first called.
   156  func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) {
   157  	defer func() {
   158  		// We can't just send on the died channel because the
   159  		// central loop might be trying to write to us on the
   160  		// changed channel.
   161  		for {
   162  			select {
   163  			case died <- m:
   164  				return
   165  			case <-changed:
   166  			}
   167  		}
   168  	}()
   169  	if err := machineLoop(context, m, changed); err != nil {
   170  		context.killAll(err)
   171  	}
   172  }
   173  
   174  func machineLoop(context machineContext, m machine, changed <-chan struct{}) error {
   175  	// Use a short poll interval when initially waiting for
   176  	// a machine's address and machine agent to start, and a long one when it already
   177  	// has an address and the machine agent is started.
   178  	pollInterval := ShortPoll
   179  	pollInstance := true
   180  	for {
   181  		if pollInstance {
   182  			instInfo, err := pollInstanceInfo(context, m)
   183  			if err != nil && !errors.IsNotProvisioned(err) {
   184  				// If the provider doesn't implement Addresses/Status now,
   185  				// it never will until we're upgraded, so don't bother
   186  				// asking any more. We could use less resources
   187  				// by taking down the entire worker, but this is easier for now
   188  				// (and hopefully the local provider will implement
   189  				// Addresses/Status in the not-too-distant future),
   190  				// so we won't need to worry about this case at all.
   191  				if errors.IsNotImplemented(err) {
   192  					pollInterval = 365 * 24 * time.Hour
   193  				} else {
   194  					return err
   195  				}
   196  			}
   197  			machineStatus := state.StatusPending
   198  			if err == nil {
   199  				if machineStatus, _, _, err = m.Status(); err != nil {
   200  					logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err)
   201  				}
   202  			}
   203  			if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == state.StatusStarted {
   204  				// We've got at least one address and a status and instance is started, so poll infrequently.
   205  				pollInterval = LongPoll
   206  			} else if pollInterval < LongPoll {
   207  				// We have no addresses or not started - poll increasingly rarely
   208  				// until we do.
   209  				pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff)
   210  			}
   211  			pollInstance = false
   212  		}
   213  		select {
   214  		case <-time.After(pollInterval):
   215  			pollInstance = true
   216  		case <-context.dying():
   217  			return nil
   218  		case <-changed:
   219  			if err := m.Refresh(); err != nil {
   220  				return err
   221  			}
   222  			if m.Life() == state.Dead {
   223  				return nil
   224  			}
   225  		}
   226  	}
   227  }
   228  
   229  // pollInstanceInfo checks the current provider addresses and status
   230  // for the given machine's instance, and sets them on the machine if they've changed.
   231  func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) {
   232  	instInfo = instanceInfo{}
   233  	instId, err := m.InstanceId()
   234  	// We can't ask the machine for its addresses if it isn't provisioned yet.
   235  	if errors.IsNotProvisioned(err) {
   236  		return instInfo, err
   237  	}
   238  	if err != nil {
   239  		return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err)
   240  	}
   241  	instInfo, err = context.instanceInfo(instId)
   242  	if err != nil {
   243  		if errors.IsNotImplemented(err) {
   244  			return instInfo, err
   245  		}
   246  		logger.Warningf("cannot get instance info for instance %q: %v", instId, err)
   247  		return instInfo, nil
   248  	}
   249  	currentInstStatus, err := m.InstanceStatus()
   250  	if err != nil {
   251  		// This should never occur since the machine is provisioned.
   252  		// But just in case, we reset polled status so we try again next time.
   253  		logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err)
   254  		instInfo.status = ""
   255  	} else {
   256  		if instInfo.status != currentInstStatus {
   257  			logger.Infof("machine %q instance status changed from %q to %q", m.Id(), currentInstStatus, instInfo.status)
   258  			if err = m.SetInstanceStatus(instInfo.status); err != nil {
   259  				logger.Errorf("cannot set instance status on %q: %v", m, err)
   260  			}
   261  		}
   262  	}
   263  	if !addressesEqual(m.Addresses(), instInfo.addresses) {
   264  		logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses)
   265  		if err = m.SetAddresses(instInfo.addresses...); err != nil {
   266  			logger.Errorf("cannot set addresses on %q: %v", m, err)
   267  		}
   268  	}
   269  	return instInfo, err
   270  }
   271  
   272  // addressesEqual compares the addresses of the machine and the instance information.
   273  func addressesEqual(a0, a1 []network.Address) bool {
   274  	if len(a0) != len(a1) {
   275  		logger.Tracef("address lists have different lengths %d != %d for %v != %v",
   276  			len(a0), len(a1), a0, a1)
   277  		return false
   278  	}
   279  
   280  	ca0 := make([]network.Address, len(a0))
   281  	copy(ca0, a0)
   282  	network.SortAddresses(ca0, true)
   283  	ca1 := make([]network.Address, len(a1))
   284  	copy(ca1, a1)
   285  	network.SortAddresses(ca1, true)
   286  
   287  	for i := range ca0 {
   288  		if ca0[i] != ca1[i] {
   289  			logger.Tracef("address entry at offset %d has a different value for %v != %v",
   290  				i, ca0, ca1)
   291  			return false
   292  		}
   293  	}
   294  	return true
   295  }