github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/worker/instancepoller/updater.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package instancepoller
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/loggo"
    11  
    12  	"launchpad.net/juju-core/errors"
    13  	"launchpad.net/juju-core/instance"
    14  	"launchpad.net/juju-core/state"
    15  	"launchpad.net/juju-core/state/api/params"
    16  	"launchpad.net/juju-core/state/watcher"
    17  )
    18  
    19  var logger = loggo.GetLogger("juju.worker.instanceupdater")
    20  
    21  // ShortPoll and LongPoll hold the polling intervals for the instance
    22  // updater. When a machine has no address or is not started, it will be
    23  // polled at ShortPoll intervals until it does, exponentially backing off
    24  // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll.
    25  //
    26  // When a machine has an address and is started LongPoll will be used to
    27  // check that the instance address or status has not changed.
    28  var (
    29  	ShortPoll        = 1 * time.Second
    30  	ShortPollBackoff = 2.0
    31  	LongPoll         = 15 * time.Minute
    32  )
    33  
    34  type machine interface {
    35  	Id() string
    36  	InstanceId() (instance.Id, error)
    37  	Addresses() []instance.Address
    38  	SetAddresses([]instance.Address) error
    39  	InstanceStatus() (string, error)
    40  	SetInstanceStatus(status string) error
    41  	String() string
    42  	Refresh() error
    43  	Life() state.Life
    44  	Status() (status params.Status, info string, data params.StatusData, err error)
    45  }
    46  
    47  type instanceInfo struct {
    48  	addresses []instance.Address
    49  	status    string
    50  }
    51  
    52  type machineContext interface {
    53  	killAll(err error)
    54  	instanceInfo(id instance.Id) (instanceInfo, error)
    55  	dying() <-chan struct{}
    56  }
    57  
    58  type machineAddress struct {
    59  	machine   machine
    60  	addresses []instance.Address
    61  }
    62  
    63  var _ machine = (*state.Machine)(nil)
    64  
    65  type machinesWatcher interface {
    66  	Changes() <-chan []string
    67  	Err() error
    68  	Stop() error
    69  }
    70  
    71  type updaterContext interface {
    72  	newMachineContext() machineContext
    73  	getMachine(id string) (machine, error)
    74  	dying() <-chan struct{}
    75  }
    76  
    77  type updater struct {
    78  	context     updaterContext
    79  	machines    map[string]chan struct{}
    80  	machineDead chan machine
    81  }
    82  
    83  // watchMachinesLoop watches for changes provided by the given
    84  // machinesWatcher and starts machine goroutines to deal
    85  // with them, using the provided newMachineContext
    86  // function to create the appropriate context for each new machine id.
    87  func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) {
    88  	p := &updater{
    89  		context:     context,
    90  		machines:    make(map[string]chan struct{}),
    91  		machineDead: make(chan machine),
    92  	}
    93  	defer func() {
    94  		if stopErr := w.Stop(); stopErr != nil {
    95  			if err == nil {
    96  				err = fmt.Errorf("error stopping watcher: %v", stopErr)
    97  			} else {
    98  				logger.Warningf("ignoring error when stopping watcher: %v", stopErr)
    99  			}
   100  		}
   101  		for len(p.machines) > 0 {
   102  			delete(p.machines, (<-p.machineDead).Id())
   103  		}
   104  	}()
   105  	for {
   106  		select {
   107  		case ids, ok := <-w.Changes():
   108  			if !ok {
   109  				return watcher.MustErr(w)
   110  			}
   111  			if err := p.startMachines(ids); err != nil {
   112  				return err
   113  			}
   114  		case m := <-p.machineDead:
   115  			delete(p.machines, m.Id())
   116  		case <-p.context.dying():
   117  			return nil
   118  		}
   119  	}
   120  }
   121  
   122  func (p *updater) startMachines(ids []string) error {
   123  	for _, id := range ids {
   124  		if c := p.machines[id]; c == nil {
   125  			// We don't know about the machine - start
   126  			// a goroutine to deal with it.
   127  			m, err := p.context.getMachine(id)
   128  			if errors.IsNotFoundError(err) {
   129  				logger.Warningf("watcher gave notification of non-existent machine %q", id)
   130  				continue
   131  			}
   132  			if err != nil {
   133  				return err
   134  			}
   135  			c = make(chan struct{})
   136  			p.machines[id] = c
   137  			go runMachine(p.context.newMachineContext(), m, c, p.machineDead)
   138  		} else {
   139  			c <- struct{}{}
   140  		}
   141  	}
   142  	return nil
   143  }
   144  
   145  // runMachine processes the address and status publishing for a given machine.
   146  // We assume that the machine is alive when this is first called.
   147  func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) {
   148  	defer func() {
   149  		// We can't just send on the died channel because the
   150  		// central loop might be trying to write to us on the
   151  		// changed channel.
   152  		for {
   153  			select {
   154  			case died <- m:
   155  				return
   156  			case <-changed:
   157  			}
   158  		}
   159  	}()
   160  	if err := machineLoop(context, m, changed); err != nil {
   161  		context.killAll(err)
   162  	}
   163  }
   164  
   165  func machineLoop(context machineContext, m machine, changed <-chan struct{}) error {
   166  	// Use a short poll interval when initially waiting for
   167  	// a machine's address and machine agent to start, and a long one when it already
   168  	// has an address and the machine agent is started.
   169  	pollInterval := ShortPoll
   170  	pollInstance := true
   171  	for {
   172  		if pollInstance {
   173  			instInfo, err := pollInstanceInfo(context, m)
   174  			if err != nil {
   175  				// If the provider doesn't implement Addresses/Status now,
   176  				// it never will until we're upgraded, so don't bother
   177  				// asking any more. We could use less resources
   178  				// by taking down the entire worker, but this is easier for now
   179  				// (and hopefully the local provider will implement
   180  				// Addresses/Status in the not-too-distant future),
   181  				// so we won't need to worry about this case at all.
   182  				if errors.IsNotImplementedError(err) {
   183  					pollInterval = 365 * 24 * time.Hour
   184  				} else {
   185  					return err
   186  				}
   187  			}
   188  			machineStatus, _, _, err := m.Status()
   189  			if err != nil {
   190  				logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err)
   191  			}
   192  			if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted {
   193  				// We've got at least one address and a status and instance is started, so poll infrequently.
   194  				pollInterval = LongPoll
   195  			} else if pollInterval < LongPoll {
   196  				// We have no addresses or not started - poll increasingly rarely
   197  				// until we do.
   198  				pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff)
   199  			}
   200  			pollInstance = false
   201  		}
   202  		select {
   203  		case <-time.After(pollInterval):
   204  			pollInstance = true
   205  		case <-context.dying():
   206  			return nil
   207  		case <-changed:
   208  			if err := m.Refresh(); err != nil {
   209  				return err
   210  			}
   211  			if m.Life() == state.Dead {
   212  				return nil
   213  			}
   214  		}
   215  	}
   216  }
   217  
   218  // pollInstanceInfo checks the current provider addresses and status
   219  // for the given machine's instance, and sets them on the machine if they've changed.
   220  func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) {
   221  	instInfo = instanceInfo{}
   222  	instId, err := m.InstanceId()
   223  	if err != nil && !state.IsNotProvisionedError(err) {
   224  		return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err)
   225  	}
   226  	instInfo, err = context.instanceInfo(instId)
   227  	if err != nil {
   228  		if errors.IsNotImplementedError(err) {
   229  			return instInfo, err
   230  		}
   231  		logger.Warningf("cannot get instance info for instance %q: %v", instId, err)
   232  		return instInfo, nil
   233  	}
   234  	currentInstStatus, err := m.InstanceStatus()
   235  	if err != nil {
   236  		// This should never occur since the machine is provisioned.
   237  		// But just in case, we reset polled status so we try again next time.
   238  		logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err)
   239  		instInfo.status = ""
   240  	} else {
   241  		if instInfo.status != currentInstStatus {
   242  			logger.Infof("machine %q has new instance status: %v", m.Id(), instInfo.status)
   243  			if err = m.SetInstanceStatus(instInfo.status); err != nil {
   244  				logger.Errorf("cannot set instance status on %q: %v", m, err)
   245  			}
   246  		}
   247  	}
   248  	if !addressesEqual(m.Addresses(), instInfo.addresses) {
   249  		logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses)
   250  		if err = m.SetAddresses(instInfo.addresses); err != nil {
   251  			logger.Errorf("cannot set addresses on %q: %v", m, err)
   252  		}
   253  	}
   254  	return instInfo, err
   255  }
   256  
   257  func addressesEqual(a0, a1 []instance.Address) bool {
   258  	if len(a0) != len(a1) {
   259  		return false
   260  	}
   261  	for i := range a0 {
   262  		if a0[i] != a1[i] {
   263  			return false
   264  		}
   265  	}
   266  	return true
   267  }