github.com/Cloud-Foundations/Dominator@v0.3.4/cmd/hyper-control/rolloutImage.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"math"
     7  	"net"
     8  	"os"
     9  	"os/exec"
    10  	"path"
    11  	"path/filepath"
    12  	"sync"
    13  	"syscall"
    14  	"time"
    15  
    16  	imageclient "github.com/Cloud-Foundations/Dominator/imageserver/client"
    17  	"github.com/Cloud-Foundations/Dominator/lib/concurrent"
    18  	"github.com/Cloud-Foundations/Dominator/lib/constants"
    19  	"github.com/Cloud-Foundations/Dominator/lib/cpusharer"
    20  	"github.com/Cloud-Foundations/Dominator/lib/errors"
    21  	"github.com/Cloud-Foundations/Dominator/lib/format"
    22  	"github.com/Cloud-Foundations/Dominator/lib/json"
    23  	"github.com/Cloud-Foundations/Dominator/lib/log"
    24  	"github.com/Cloud-Foundations/Dominator/lib/log/prefixlogger"
    25  	libnet "github.com/Cloud-Foundations/Dominator/lib/net"
    26  	"github.com/Cloud-Foundations/Dominator/lib/rpcclientpool"
    27  	"github.com/Cloud-Foundations/Dominator/lib/srpc"
    28  	"github.com/Cloud-Foundations/Dominator/lib/tags"
    29  	fm_proto "github.com/Cloud-Foundations/Dominator/proto/fleetmanager"
    30  	hyper_proto "github.com/Cloud-Foundations/Dominator/proto/hypervisor"
    31  	sub_proto "github.com/Cloud-Foundations/Dominator/proto/sub"
    32  	subclient "github.com/Cloud-Foundations/Dominator/sub/client"
    33  	"github.com/Cloud-Foundations/tricorder/go/tricorder/messages"
    34  )
    35  
    36  const (
    37  	filePerms = syscall.S_IRUSR | syscall.S_IWUSR | syscall.S_IRGRP |
    38  		syscall.S_IROTH
    39  )
    40  
    41  type hypervisorType struct {
    42  	alreadyUpdated            bool
    43  	healthAgentClientResource *rpcclientpool.ClientResource
    44  	hostname                  string
    45  	hypervisorClientResource  *srpc.ClientResource
    46  	initialTags               tags.Tags
    47  	initialUnhealthyList      map[string]struct{}
    48  	logger                    log.DebugLogger
    49  	noVMs                     bool
    50  	subClientResource         *srpc.ClientResource
    51  }
    52  
    53  func rolloutImageSubcommand(args []string, logger log.DebugLogger) error {
    54  	err := rolloutImage(args[0], logger)
    55  	if err != nil {
    56  		return fmt.Errorf("error rolling out image: %s", err)
    57  	}
    58  	return nil
    59  }
    60  
    61  func checkCertificates(predictedDuration time.Duration) error {
    62  	predictedFinish := time.Now().Add(predictedDuration)
    63  	if srpc.GetEarliestClientCertExpiration().Before(predictedFinish) {
    64  		return fmt.Errorf("a certificate expires before: %s", predictedFinish)
    65  	}
    66  	return nil
    67  }
    68  
    69  func extendImageLifetime(imageServerClientResource *srpc.ClientResource,
    70  	imageName string, expiresAt time.Time, predictedDuration time.Duration,
    71  	logger log.DebugLogger) error {
    72  	if expiresAt.IsZero() {
    73  		return nil
    74  	}
    75  	if time.Until(expiresAt) >= predictedDuration {
    76  		return nil
    77  	}
    78  	newExpiration := time.Now().Add(predictedDuration)
    79  	logger.Debugf(0, "extending image lifetime by %s\n",
    80  		format.Duration(time.Until(newExpiration)))
    81  	client, err := imageServerClientResource.GetHTTP(nil, 0)
    82  	if err != nil {
    83  		return err
    84  	}
    85  	defer client.Put()
    86  	return imageclient.ChangeImageExpiration(client, imageName, newExpiration)
    87  }
    88  
    89  func gitCommand(repositoryDirectory string, command ...string) ([]byte, error) {
    90  	cmd := exec.Command("git", command...)
    91  	cmd.Dir = repositoryDirectory
    92  	cmd.Stderr = os.Stderr
    93  	if output, err := cmd.Output(); err != nil {
    94  		return nil, fmt.Errorf("error running git %v: %s", cmd.Args, err)
    95  	} else {
    96  		return output, nil
    97  	}
    98  }
    99  
   100  func rolloutImage(imageName string, logger log.DebugLogger) error {
   101  	startTime := time.Now()
   102  	cpuSharer := cpusharer.NewFifoCpuSharer()
   103  	if *topologyDir != "" {
   104  		logger.Debugln(0, "updating Git repository")
   105  		stdout, err := gitCommand(*topologyDir, "status", "--porcelain")
   106  		if err != nil {
   107  			return err
   108  		}
   109  		if len(stdout) > 0 {
   110  			return errors.New("Git repository is not clean")
   111  		}
   112  		if _, err := gitCommand(*topologyDir, "pull"); err != nil {
   113  			return err
   114  		}
   115  	}
   116  	logger.Debugln(0, "checking image")
   117  	imageServerClientResource := srpc.NewClientResource("tcp",
   118  		fmt.Sprintf("%s:%d", *imageServerHostname, *imageServerPortNum))
   119  	defer imageServerClientResource.ScheduleClose()
   120  	expiresAt, err := checkImage(imageServerClientResource, imageName)
   121  	if err != nil {
   122  		return err
   123  	}
   124  	fleetManagerClientResource := srpc.NewClientResource("tcp",
   125  		fmt.Sprintf("%s:%d", *fleetManagerHostname, *fleetManagerPortNum))
   126  	defer fleetManagerClientResource.ScheduleClose()
   127  	logger.Debugln(0, "finding good Hypervisors")
   128  	hypervisorAddresses, err := listConnectedHypervisors(
   129  		fleetManagerClientResource)
   130  	if err != nil {
   131  		return err
   132  	}
   133  	hypervisors := make([]*hypervisorType, 0, len(hypervisorAddresses))
   134  	defer closeHypervisors(hypervisors)
   135  	tagsForHypervisors, err := getTagsForHypervisors(fleetManagerClientResource)
   136  	logger.Debugln(0, "checking and tagging Hypervisors")
   137  	if err != nil {
   138  		return fmt.Errorf("failure getting tags: %s", err)
   139  	}
   140  	hypervisorsChannel := make(chan *hypervisorType, len(hypervisorAddresses))
   141  	for _, address := range hypervisorAddresses {
   142  		if hostname, _, err := net.SplitHostPort(address); err != nil {
   143  			return err
   144  		} else {
   145  			go func(hostname string) {
   146  				cpuSharer.GrabCpu()
   147  				defer cpuSharer.ReleaseCpu()
   148  				hypervisor := setupHypervisor(hostname, imageName,
   149  					tagsForHypervisors[hostname], cpuSharer, logger)
   150  				hypervisorsChannel <- hypervisor
   151  			}(hostname)
   152  		}
   153  	}
   154  	numAlreadyUpdated := 0
   155  	for range hypervisorAddresses {
   156  		if hypervisor := <-hypervisorsChannel; hypervisor != nil {
   157  			if hypervisor.alreadyUpdated {
   158  				numAlreadyUpdated++
   159  				continue
   160  			}
   161  			err := hypervisor.updateTagForHypervisor(
   162  				fleetManagerClientResource, "PlannedImage", imageName)
   163  			if err != nil {
   164  				return fmt.Errorf("%s: failure updating tags: %s",
   165  					hypervisor.hostname, err)
   166  			}
   167  			hypervisors = append(hypervisors, hypervisor)
   168  		}
   169  	}
   170  	if numAlreadyUpdated == len(hypervisorAddresses) {
   171  		return releaseImage(imageServerClientResource, imageName, expiresAt,
   172  			logger)
   173  	}
   174  	if len(hypervisors) < 1 {
   175  		return errors.New("no hypervisors to update")
   176  	}
   177  	logger.Debugln(0, "splitting unused/used Hypervisors")
   178  	unusedHypervisors, usedHypervisors := markUnusedHypervisors(hypervisors,
   179  		cpuSharer)
   180  	logger.Debugf(0, "%d unused, %d used Hypervisors\n",
   181  		len(unusedHypervisors), len(usedHypervisors))
   182  	numSteps := math.Sqrt(float64(len(unusedHypervisors)*2)) +
   183  		math.Sqrt(float64(len(usedHypervisors)*2))
   184  	predictedDuration := time.Minute * 5 * time.Duration(numSteps)
   185  	if err := checkCertificates(predictedDuration); err != nil {
   186  		return err
   187  	}
   188  	err = extendImageLifetime(imageServerClientResource, imageName, expiresAt,
   189  		predictedDuration, logger)
   190  	if err != nil {
   191  		return err
   192  	}
   193  	logger.Debugln(0, "upgrading unused Hypervisors")
   194  	err = upgradeOneThenAll(fleetManagerClientResource, imageName,
   195  		unusedHypervisors, cpuSharer, uint(len(unusedHypervisors)))
   196  	if err != nil {
   197  		return err
   198  	}
   199  	numConcurrent := uint(len(usedHypervisors) / 2)
   200  	if numConcurrent < 1 {
   201  		numConcurrent = 1
   202  	} else if numConcurrent > uint(len(unusedHypervisors)) {
   203  		numConcurrent = 1
   204  	} else if numConcurrent*10 < uint(len(usedHypervisors)) {
   205  		numConcurrent++
   206  	}
   207  	logger.Debugln(0, "upgrading used Hypervisors")
   208  	err = upgradeOneThenAll(fleetManagerClientResource, imageName,
   209  		usedHypervisors, cpuSharer, numConcurrent)
   210  	if err != nil {
   211  		return err
   212  	}
   213  	err = releaseImage(imageServerClientResource, imageName, expiresAt, logger)
   214  	if err != nil {
   215  		return err
   216  	}
   217  	if *topologyDir != "" {
   218  		var tgs tags.Tags
   219  		tagsFilename := filepath.Join(*topologyDir, *location, "tags.json")
   220  		if err := json.ReadFromFile(tagsFilename, &tgs); err != nil {
   221  			if !os.IsNotExist(err) {
   222  				return err
   223  			}
   224  			tgs = make(tags.Tags)
   225  		}
   226  		oldImageName := tgs["RequiredImage"]
   227  		tgs["RequiredImage"] = imageName
   228  		delete(tgs, "PlannedImage")
   229  		err := json.WriteToFile(tagsFilename, filePerms, "    ", tgs)
   230  		if err != nil {
   231  			return err
   232  		}
   233  		if _, err := gitCommand(*topologyDir, "add", tagsFilename); err != nil {
   234  			return err
   235  		}
   236  		var locationInsert string
   237  		if *location != "" {
   238  			locationInsert = "in " + *location + " "
   239  		}
   240  		_, err = gitCommand(*topologyDir, "commit", "-m",
   241  			fmt.Sprintf("Upgrade %sfrom %s to %s",
   242  				locationInsert, oldImageName, imageName))
   243  		if err != nil {
   244  			return err
   245  		}
   246  		if _, err := gitCommand(*topologyDir, "push"); err != nil {
   247  			return err
   248  		}
   249  	}
   250  	logger.Printf("rollout completed in %s\n",
   251  		format.Duration(time.Since(startTime)))
   252  	return nil
   253  }
   254  
   255  func checkImage(imageServerClientResource *srpc.ClientResource,
   256  	imageName string) (time.Time, error) {
   257  	client, err := imageServerClientResource.GetHTTP(nil, 0)
   258  	if err != nil {
   259  		return time.Time{}, err
   260  	}
   261  	defer client.Put()
   262  	expiresAt, err := imageclient.GetImageExpiration(client, imageName)
   263  	if err != nil {
   264  		return time.Time{}, err
   265  	}
   266  	if expiresAt.IsZero() {
   267  		return expiresAt, nil
   268  	}
   269  	return expiresAt,
   270  		imageclient.ChangeImageExpiration(client, imageName, expiresAt)
   271  }
   272  
   273  func closeHypervisors(hypervisors []*hypervisorType) {
   274  	for _, hypervisor := range hypervisors {
   275  		hypervisor.hypervisorClientResource.ScheduleClose()
   276  		hypervisor.subClientResource.ScheduleClose()
   277  	}
   278  }
   279  
   280  func getTagsForHypervisors(clientResource *srpc.ClientResource) (
   281  	map[string]tags.Tags, error) {
   282  	client, err := clientResource.GetHTTP(nil, 0)
   283  	if err != nil {
   284  		return nil, err
   285  	}
   286  	defer client.Close()
   287  	conn, err := client.Call("FleetManager.GetUpdates")
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  	defer conn.Close()
   292  	request := fm_proto.GetUpdatesRequest{Location: *location, MaxUpdates: 1}
   293  	if err := conn.Encode(request); err != nil {
   294  		return nil, err
   295  	}
   296  	if err := conn.Flush(); err != nil {
   297  		return nil, err
   298  	}
   299  	var reply fm_proto.Update
   300  	if err := conn.Decode(&reply); err != nil {
   301  		return nil, err
   302  	}
   303  	if err := errors.New(reply.Error); err != nil {
   304  		return nil, err
   305  	}
   306  	tagsForHypervisors := make(map[string]tags.Tags, len(reply.ChangedMachines))
   307  	for _, machine := range reply.ChangedMachines {
   308  		tagsForHypervisors[machine.Hostname] = machine.Tags
   309  	}
   310  	return tagsForHypervisors, nil
   311  }
   312  
   313  func listConnectedHypervisors(clientResource *srpc.ClientResource) (
   314  	[]string, error) {
   315  	return listConnectedHypervisorsInLocation(clientResource, *location)
   316  }
   317  
   318  func listConnectedHypervisorsInLocation(clientResource *srpc.ClientResource,
   319  	location string) ([]string, error) {
   320  	client, err := clientResource.GetHTTP(nil, 0)
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  	defer client.Put()
   325  	request := fm_proto.ListHypervisorsInLocationRequest{
   326  		IncludeUnhealthy: true,
   327  		Location:         location,
   328  	}
   329  	var reply fm_proto.ListHypervisorsInLocationResponse
   330  	err = client.RequestReply("FleetManager.ListHypervisorsInLocation",
   331  		request, &reply)
   332  	if err != nil {
   333  		return nil, err
   334  	}
   335  	if err := errors.New(reply.Error); err != nil {
   336  		return nil, err
   337  	}
   338  	return reply.HypervisorAddresses, nil
   339  }
   340  
   341  func markUnusedHypervisors(hypervisors []*hypervisorType,
   342  	cpuSharer cpusharer.CpuSharer) (
   343  	map[*hypervisorType]struct{}, map[*hypervisorType]struct{}) {
   344  	dialer := libnet.NewCpuSharingDialer(&net.Dialer{}, cpuSharer)
   345  	waitGroup := &sync.WaitGroup{}
   346  	for _, hypervisor_ := range hypervisors {
   347  		waitGroup.Add(1)
   348  		go func(h *hypervisorType) {
   349  			defer waitGroup.Done()
   350  			cpuSharer.GrabCpu()
   351  			defer cpuSharer.ReleaseCpu()
   352  			client, err := h.hypervisorClientResource.GetHTTPWithDialer(nil,
   353  				dialer)
   354  			if err != nil {
   355  				h.logger.Printf("error connecting to hypervisor: %s\n", err)
   356  				return
   357  			}
   358  			defer client.Put()
   359  			request := hyper_proto.ListVMsRequest{
   360  				IgnoreStateMask: 1<<hyper_proto.StateFailedToStart |
   361  					1<<hyper_proto.StateStopping |
   362  					1<<hyper_proto.StateStopped |
   363  					1<<hyper_proto.StateDestroying,
   364  			}
   365  			var reply hyper_proto.ListVMsResponse
   366  			err = client.RequestReply("Hypervisor.ListVMs", request, &reply)
   367  			if err != nil {
   368  				h.logger.Printf("error listing VMS: %s", err)
   369  				return
   370  			}
   371  			if len(reply.IpAddresses) < 1 {
   372  				h.noVMs = true
   373  			}
   374  		}(hypervisor_)
   375  	}
   376  	waitGroup.Wait()
   377  	unusedHypervisors := make(map[*hypervisorType]struct{})
   378  	usedHypervisors := make(map[*hypervisorType]struct{})
   379  	for _, hypervisor := range hypervisors {
   380  		if hypervisor.noVMs {
   381  			unusedHypervisors[hypervisor] = struct{}{}
   382  		} else {
   383  			usedHypervisors[hypervisor] = struct{}{}
   384  		}
   385  	}
   386  	return unusedHypervisors, usedHypervisors
   387  }
   388  
   389  func releaseImage(imageServerClientResource *srpc.ClientResource,
   390  	imageName string, expiresAt time.Time, logger log.DebugLogger) error {
   391  	if expiresAt.IsZero() {
   392  		logger.Debugln(1, "image already released")
   393  		return nil
   394  	}
   395  	logger.Debugln(0, "releasing image")
   396  	client, err := imageServerClientResource.GetHTTP(nil, 0)
   397  	if err != nil {
   398  		return err
   399  	}
   400  	defer client.Put()
   401  	return imageclient.ChangeImageExpiration(client, imageName, time.Time{})
   402  }
   403  
   404  func setupHypervisor(hostname string, imageName string, tgs tags.Tags,
   405  	cpuSharer *cpusharer.FifoCpuSharer,
   406  	logger log.DebugLogger) *hypervisorType {
   407  	logger = prefixlogger.New(hostname+": ", logger)
   408  	currentRequiredImage := tgs["RequiredImage"]
   409  	if currentRequiredImage != "" &&
   410  		path.Dir(currentRequiredImage) != path.Dir(imageName) {
   411  		logger.Printf(
   412  			"image stream: current=%s != new=%s, skipping\n",
   413  			path.Dir(currentRequiredImage), path.Dir(imageName))
   414  		return nil
   415  	}
   416  	h := &hypervisorType{
   417  		healthAgentClientResource: rpcclientpool.New("tcp",
   418  			fmt.Sprintf("%s:%d", hostname, 6910), true, ""),
   419  		hostname: hostname,
   420  		hypervisorClientResource: srpc.NewClientResource("tcp",
   421  			fmt.Sprintf("%s:%d", hostname,
   422  				constants.HypervisorPortNumber)),
   423  		initialTags:          tgs,
   424  		initialUnhealthyList: make(map[string]struct{}),
   425  		logger:               logger,
   426  		subClientResource: srpc.NewClientResource("tcp",
   427  			fmt.Sprintf("%s:%d", hostname, constants.SubPortNumber)),
   428  	}
   429  	if lastImage, err := h.getLastImageName(cpuSharer); err != nil {
   430  		logger.Printf("skipping: %s\n", err)
   431  		return nil
   432  	} else if lastImage == imageName {
   433  		logger.Println("already updated, skipping")
   434  		h.alreadyUpdated = true
   435  		return h
   436  	} else {
   437  		return h
   438  	}
   439  }
   440  
   441  func upgradeOneThenAll(fleetManagerClientResource *srpc.ClientResource,
   442  	imageName string, hypervisors map[*hypervisorType]struct{},
   443  	cpuSharer *cpusharer.FifoCpuSharer, maxConcurrent uint) error {
   444  	if len(hypervisors) < 1 {
   445  		return nil
   446  	}
   447  	state := concurrent.NewStateWithLinearConcurrencyIncrease(1, maxConcurrent)
   448  	for hypervisor := range hypervisors {
   449  		hypervisor := hypervisor
   450  		err := state.GoRun(func() error {
   451  			err := hypervisor.upgrade(fleetManagerClientResource, imageName,
   452  				cpuSharer)
   453  			if err != nil {
   454  				return fmt.Errorf("error upgrading: %s: %s",
   455  					hypervisor.hostname, err)
   456  			}
   457  			return nil
   458  		})
   459  		if err != nil {
   460  			return err
   461  		}
   462  	}
   463  	return state.Reap()
   464  }
   465  
   466  func (h *hypervisorType) getFailingHealthChecks(
   467  	cpuSharer *cpusharer.FifoCpuSharer,
   468  	timeout time.Duration) ([]string, time.Time, error) {
   469  	stopTime := time.Now().Add(timeout)
   470  	for ; time.Until(stopTime) >= 0; cpuSharer.Sleep(time.Second) {
   471  		if list, timestamp, err := h.getFailingHealthChecksOnce(); err == nil {
   472  			return list, timestamp, nil
   473  		}
   474  	}
   475  	return nil, time.Time{}, errors.New("timed out getting health status")
   476  }
   477  
   478  func (h *hypervisorType) getFailingHealthChecksOnce() (
   479  	[]string, time.Time, error) {
   480  	client, err := h.healthAgentClientResource.Get(nil)
   481  	if err != nil {
   482  		return nil, time.Time{}, err
   483  	}
   484  	defer client.Put()
   485  	var metric messages.Metric
   486  	err = client.Call("MetricsServer.GetMetric",
   487  		"/health-checks/*/unhealthy-list", &metric)
   488  	if err != nil {
   489  		client.Close()
   490  		return nil, time.Time{}, err
   491  	}
   492  	if list, ok := metric.Value.([]string); !ok {
   493  		return nil, time.Time{}, errors.New("list metric is not []string")
   494  	} else {
   495  		if timestamp, ok := metric.TimeStamp.(time.Time); ok {
   496  			return list, timestamp, nil
   497  		} else {
   498  			return list, time.Time{}, nil
   499  		}
   500  	}
   501  }
   502  
   503  func (h *hypervisorType) getLastImageName(cpuSharer *cpusharer.FifoCpuSharer) (
   504  	string, error) {
   505  	client, err := h.subClientResource.GetHTTP(nil, time.Second*15)
   506  	if err != nil {
   507  		return "", fmt.Errorf("error connecting to sub: %s", err)
   508  	}
   509  	defer client.Put()
   510  	request := sub_proto.PollRequest{ShortPollOnly: true}
   511  	var reply sub_proto.PollResponse
   512  	if err := subclient.CallPoll(client, request, &reply); err != nil {
   513  		client.Close()
   514  		if err != io.EOF {
   515  			return "", fmt.Errorf("error polling sub: %s", err)
   516  		}
   517  	}
   518  	return reply.LastSuccessfulImageName, nil
   519  }
   520  
   521  func (h *hypervisorType) updateTagForHypervisor(
   522  	clientResource *srpc.ClientResource, key, value string) error {
   523  	newTags := h.initialTags.Copy()
   524  	newTags[key] = value
   525  	if key == "RequiredImage" {
   526  		delete(newTags, "PlannedImage")
   527  	}
   528  	if h.initialTags.Equal(newTags) {
   529  		return nil
   530  	}
   531  	client, err := clientResource.GetHTTP(nil, 0)
   532  	if err != nil {
   533  		return err
   534  	}
   535  	defer client.Put()
   536  	request := fm_proto.ChangeMachineTagsRequest{
   537  		Hostname: h.hostname,
   538  		Tags:     newTags,
   539  	}
   540  	var reply fm_proto.ChangeMachineTagsResponse
   541  	err = client.RequestReply("FleetManager.ChangeMachineTags",
   542  		request, &reply)
   543  	if err != nil {
   544  		return err
   545  	}
   546  	return errors.New(reply.Error)
   547  }
   548  
   549  func (h *hypervisorType) upgrade(clientResource *srpc.ClientResource,
   550  	imageName string, cpuSharer *cpusharer.FifoCpuSharer) error {
   551  	cpuSharer.GrabCpu()
   552  	defer cpuSharer.ReleaseCpu()
   553  	list, _, err := h.getFailingHealthChecks(cpuSharer, time.Second)
   554  	if err != nil {
   555  		h.logger.Println(err)
   556  		return nil
   557  	} else if len(list) > 0 {
   558  		for _, failed := range list {
   559  			h.initialUnhealthyList[failed] = struct{}{}
   560  		}
   561  	}
   562  	h.logger.Debugln(0, "upgrading")
   563  	err = h.updateTagForHypervisor(clientResource, "RequiredImage", imageName)
   564  	if err != nil {
   565  		return err
   566  	}
   567  	stopTime := time.Now().Add(time.Minute * 15)
   568  	updateCompleted := false
   569  	var lastError string
   570  	for ; time.Until(stopTime) > 0; cpuSharer.Sleep(time.Second) {
   571  		if syncedImage, err := h.getLastImageName(cpuSharer); err != nil {
   572  			if lastError != err.Error() {
   573  				h.logger.Debugln(0, err)
   574  			}
   575  			lastError = err.Error()
   576  			continue
   577  		} else if syncedImage == imageName {
   578  			updateCompleted = true
   579  			break
   580  		}
   581  	}
   582  	if !updateCompleted {
   583  		return errors.New("timed out waiting for image update to complete")
   584  	}
   585  	h.logger.Debugln(0, "upgraded")
   586  	cpuSharer.Sleep(time.Second * 15)
   587  	list, _, err = h.getFailingHealthChecks(cpuSharer, time.Minute)
   588  	if err != nil {
   589  		return err
   590  	} else {
   591  		for _, entry := range list {
   592  			if _, ok := h.initialUnhealthyList[entry]; !ok {
   593  				return fmt.Errorf("health check failed: %s:", entry)
   594  			}
   595  		}
   596  	}
   597  	h.logger.Debugln(0, "still healthy")
   598  	return nil
   599  }
   600  
   601  func (h *hypervisorType) waitLastImageName(cpuSharer *cpusharer.FifoCpuSharer) (
   602  	string, error) {
   603  	stopTime := time.Now().Add(time.Minute)
   604  	for ; time.Until(stopTime) > 0; cpuSharer.Sleep(time.Second * 5) {
   605  		imageName, err := h.getLastImageName(cpuSharer)
   606  		if err != nil {
   607  			h.logger.Debugln(0, err)
   608  			continue
   609  		}
   610  		return imageName, nil
   611  	}
   612  	return "", errors.New("timed out getting last image name")
   613  }