github.com/Cloud-Foundations/Dominator@v0.3.4/dom/herd/sub.go (about)

     1  package herd
     2  
     3  import (
     4  	"errors"
     5  	"flag"
     6  	"fmt"
     7  	"io"
     8  	"net"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/Cloud-Foundations/Dominator/dom/lib"
    13  	"github.com/Cloud-Foundations/Dominator/lib/constants"
    14  	filegenclient "github.com/Cloud-Foundations/Dominator/lib/filegen/client"
    15  	"github.com/Cloud-Foundations/Dominator/lib/filesystem"
    16  	"github.com/Cloud-Foundations/Dominator/lib/hash"
    17  	"github.com/Cloud-Foundations/Dominator/lib/image"
    18  	"github.com/Cloud-Foundations/Dominator/lib/objectcache"
    19  	"github.com/Cloud-Foundations/Dominator/lib/resourcepool"
    20  	"github.com/Cloud-Foundations/Dominator/lib/srpc"
    21  	subproto "github.com/Cloud-Foundations/Dominator/proto/sub"
    22  	"github.com/Cloud-Foundations/Dominator/sub/client"
    23  )
    24  
    25  var (
    26  	updateConfigurationsForSubs = flag.Bool("updateConfigurationsForSubs",
    27  		true, "If true, update the configurations for all subs")
    28  	logUnknownSubConnectErrors = flag.Bool("logUnknownSubConnectErrors", false,
    29  		"If true, log unknown sub connection errors")
    30  	showIP = flag.Bool("showIP", false,
    31  		"If true, prefer to show IP address from MDB if available")
    32  	useIP = flag.Bool("useIP", true,
    33  		"If true, prefer to use IP address from MDB if available")
    34  
    35  	subPortNumber = fmt.Sprintf(":%d", constants.SubPortNumber)
    36  	zeroHash      hash.Hash
    37  )
    38  
    39  func (sub *Sub) string() string {
    40  	if *showIP && sub.mdb.IpAddress != "" {
    41  		return sub.mdb.IpAddress
    42  	}
    43  	return sub.mdb.Hostname
    44  }
    45  
    46  func (sub *Sub) address() string {
    47  	if *useIP && sub.mdb.IpAddress != "" {
    48  		hostInstance := strings.SplitN(sub.mdb.Hostname, "*", 2)
    49  		if len(hostInstance) > 1 {
    50  			return sub.mdb.IpAddress + "*" + hostInstance[1] + subPortNumber
    51  		}
    52  		return sub.mdb.IpAddress + subPortNumber
    53  	}
    54  	return sub.mdb.Hostname + subPortNumber
    55  }
    56  
    57  // Returns true if the principal described by authInfo has administrative access
    58  // to the sub.
    59  func (sub *Sub) checkAdminAccess(authInfo *srpc.AuthInformation) bool {
    60  	if authInfo == nil {
    61  		return false
    62  	}
    63  	if authInfo.HaveMethodAccess {
    64  		return true
    65  	}
    66  	if sub.clientResource == nil {
    67  		return false
    68  	}
    69  	srpcClient, err := sub.clientResource.GetHTTPWithDialer(sub.cancelChannel,
    70  		sub.herd.dialer)
    71  	if err != nil {
    72  		return false
    73  	}
    74  	defer srpcClient.Put()
    75  	conf, err := client.GetConfiguration(srpcClient)
    76  	if err != nil {
    77  		return false
    78  	}
    79  	for _, group := range conf.OwnerGroups {
    80  		if _, ok := authInfo.GroupList[group]; ok {
    81  			return true
    82  		}
    83  	}
    84  	if authInfo.Username != "" {
    85  		for _, user := range conf.OwnerUsers {
    86  			if user == authInfo.Username {
    87  				return true
    88  			}
    89  		}
    90  	}
    91  	return false
    92  }
    93  
    94  func (sub *Sub) getComputedFiles(im *image.Image) []filegenclient.ComputedFile {
    95  	if im == nil {
    96  		return nil
    97  	}
    98  	numComputed := im.FileSystem.NumComputedRegularInodes()
    99  	if numComputed < 1 {
   100  		return nil
   101  	}
   102  	computedFiles := make([]filegenclient.ComputedFile, 0, numComputed)
   103  	inodeToFilenamesTable := im.FileSystem.InodeToFilenamesTable()
   104  	for inum, inode := range im.FileSystem.InodeTable {
   105  		if inode, ok := inode.(*filesystem.ComputedRegularInode); ok {
   106  			if filenames, ok := inodeToFilenamesTable[inum]; ok {
   107  				if len(filenames) == 1 {
   108  					computedFiles = append(computedFiles,
   109  						filegenclient.ComputedFile{filenames[0], inode.Source})
   110  				}
   111  			}
   112  		}
   113  	}
   114  	return computedFiles
   115  }
   116  
   117  func (sub *Sub) tryMakeBusy() bool {
   118  	sub.busyFlagMutex.Lock()
   119  	defer sub.busyFlagMutex.Unlock()
   120  	if sub.busy {
   121  		return false
   122  	}
   123  	sub.busyStartTime = time.Now()
   124  	sub.busy = true
   125  	return true
   126  }
   127  
   128  func (sub *Sub) makeUnbusy() {
   129  	sub.busyFlagMutex.Lock()
   130  	defer sub.busyFlagMutex.Unlock()
   131  	sub.busyStopTime = time.Now()
   132  	sub.busy = false
   133  }
   134  
   135  func (sub *Sub) connectAndPoll() {
   136  	sub.loadConfiguration()
   137  	if sub.processFileUpdates() {
   138  		sub.generationCount = 0 // Force a full poll.
   139  	}
   140  	sub.deletingFlagMutex.Lock()
   141  	if sub.deleting {
   142  		sub.deletingFlagMutex.Unlock()
   143  		return
   144  	}
   145  	if sub.clientResource == nil {
   146  		sub.clientResource = srpc.NewClientResource("tcp", sub.address())
   147  	}
   148  	sub.deletingFlagMutex.Unlock()
   149  	previousStatus := sub.status
   150  	sub.status = statusConnecting
   151  	timer := time.AfterFunc(time.Second, func() {
   152  		sub.publishedStatus = sub.status
   153  	})
   154  	defer func() {
   155  		timer.Stop()
   156  		sub.publishedStatus = sub.status
   157  		switch sub.status {
   158  		case statusUnknown:
   159  		case statusConnecting:
   160  		case statusDNSError:
   161  		case statusNoRouteToHost:
   162  		case statusConnectionRefused,
   163  			statusConnectTimeout,
   164  			statusFailedToConnect:
   165  			sub.herd.addSubToInstallerQueue(sub.mdb.Hostname)
   166  		default:
   167  			sub.herd.removeSubFromInstallerQueue(sub.mdb.Hostname)
   168  		}
   169  	}()
   170  	sub.lastConnectionStartTime = time.Now()
   171  	srpcClient, err := sub.clientResource.GetHTTPWithDialer(sub.cancelChannel,
   172  		sub.herd.dialer)
   173  	dialReturnedTime := time.Now()
   174  	if err != nil {
   175  		sub.isInsecure = false
   176  		sub.pollTime = time.Time{}
   177  		if err == resourcepool.ErrorResourceLimitExceeded {
   178  			return
   179  		}
   180  		if err, ok := err.(*net.OpError); ok {
   181  			if _, ok := err.Err.(*net.DNSError); ok {
   182  				sub.status = statusDNSError
   183  				return
   184  			}
   185  			if err.Timeout() {
   186  				sub.status = statusConnectTimeout
   187  				return
   188  			}
   189  		}
   190  		if err == srpc.ErrorConnectionRefused {
   191  			sub.status = statusConnectionRefused
   192  			return
   193  		}
   194  		if err == srpc.ErrorNoRouteToHost {
   195  			sub.status = statusNoRouteToHost
   196  			return
   197  		}
   198  		if err == srpc.ErrorMissingCertificate {
   199  			sub.lastReachableTime = dialReturnedTime
   200  			sub.status = statusMissingCertificate
   201  			return
   202  		}
   203  		if err == srpc.ErrorBadCertificate {
   204  			sub.lastReachableTime = dialReturnedTime
   205  			sub.status = statusBadCertificate
   206  			return
   207  		}
   208  		sub.status = statusFailedToConnect
   209  		if *logUnknownSubConnectErrors {
   210  			sub.herd.logger.Println(err)
   211  		}
   212  		return
   213  	}
   214  	defer srpcClient.Put()
   215  	if srpcClient.IsEncrypted() {
   216  		sub.isInsecure = false
   217  	} else {
   218  		sub.isInsecure = true
   219  	}
   220  	sub.lastReachableTime = dialReturnedTime
   221  	sub.lastConnectionSucceededTime = dialReturnedTime
   222  	sub.lastConnectDuration =
   223  		sub.lastConnectionSucceededTime.Sub(sub.lastConnectionStartTime)
   224  	connectDistribution.Add(sub.lastConnectDuration)
   225  	waitStartTime := time.Now()
   226  	sub.herd.cpuSharer.ReleaseCpu()
   227  	select {
   228  	case sub.herd.pollSemaphore <- struct{}{}:
   229  		sub.herd.cpuSharer.GrabCpu()
   230  		break
   231  	case <-sub.cancelChannel:
   232  		sub.herd.cpuSharer.GrabCpu()
   233  		return
   234  	}
   235  	pollWaitTimeDistribution.Add(time.Since(waitStartTime))
   236  	sub.status = statusPolling
   237  	sub.poll(srpcClient, previousStatus)
   238  	<-sub.herd.pollSemaphore
   239  }
   240  
   241  func (sub *Sub) loadConfiguration() {
   242  	// Get a stable copy of the configuration.
   243  	newRequiredImageName := sub.mdb.RequiredImage
   244  	if newRequiredImageName == "" {
   245  		newRequiredImageName = sub.herd.defaultImageName
   246  	}
   247  	if newRequiredImageName != sub.requiredImageName {
   248  		sub.computedInodes = nil
   249  	}
   250  	sub.herd.cpuSharer.ReleaseCpu()
   251  	defer sub.herd.cpuSharer.GrabCpu()
   252  	sub.requiredImageName = newRequiredImageName
   253  	sub.requiredImage = sub.herd.imageManager.GetNoError(sub.requiredImageName)
   254  	sub.plannedImageName = sub.mdb.PlannedImage
   255  	sub.plannedImage = sub.herd.imageManager.GetNoError(sub.plannedImageName)
   256  }
   257  
   258  func (sub *Sub) processFileUpdates() bool {
   259  	haveUpdates := false
   260  	for {
   261  		image := sub.requiredImage
   262  		if image != nil && sub.computedInodes == nil {
   263  			sub.computedInodes = make(map[string]*filesystem.RegularInode)
   264  			sub.deletingFlagMutex.Lock()
   265  			if sub.deleting {
   266  				sub.deletingFlagMutex.Unlock()
   267  				return false
   268  			}
   269  			computedFiles := sub.getComputedFiles(image)
   270  			sub.herd.cpuSharer.ReleaseCpu()
   271  			sub.herd.computedFilesManager.Update(
   272  				filegenclient.Machine{sub.mdb, computedFiles})
   273  			sub.herd.cpuSharer.GrabCpu()
   274  			sub.deletingFlagMutex.Unlock()
   275  		}
   276  		select {
   277  		case fileInfos := <-sub.fileUpdateChannel:
   278  			if image == nil {
   279  				continue
   280  			}
   281  			filenameToInodeTable := image.FileSystem.FilenameToInodeTable()
   282  			for _, fileInfo := range fileInfos {
   283  				if fileInfo.Hash == zeroHash {
   284  					continue // No object.
   285  				}
   286  				inum, ok := filenameToInodeTable[fileInfo.Pathname]
   287  				if !ok {
   288  					continue
   289  				}
   290  				genericInode, ok := image.FileSystem.InodeTable[inum]
   291  				if !ok {
   292  					continue
   293  				}
   294  				cInode, ok := genericInode.(*filesystem.ComputedRegularInode)
   295  				if !ok {
   296  					continue
   297  				}
   298  				rInode := &filesystem.RegularInode{
   299  					Mode:         cInode.Mode,
   300  					Uid:          cInode.Uid,
   301  					Gid:          cInode.Gid,
   302  					MtimeSeconds: -1, // The time is set during the compute.
   303  					Size:         fileInfo.Length,
   304  					Hash:         fileInfo.Hash,
   305  				}
   306  				sub.computedInodes[fileInfo.Pathname] = rInode
   307  				haveUpdates = true
   308  			}
   309  		default:
   310  			return haveUpdates
   311  		}
   312  	}
   313  }
   314  
   315  func (sub *Sub) poll(srpcClient *srpc.Client, previousStatus subStatus) {
   316  	// If the planned image has just become available, force a full poll.
   317  	if previousStatus == statusSynced &&
   318  		!sub.havePlannedImage &&
   319  		sub.plannedImage != nil {
   320  		sub.havePlannedImage = true
   321  		sub.generationCount = 0 // Force a full poll.
   322  	}
   323  	// If the computed files have changed since the last sync, force a full poll
   324  	if previousStatus == statusSynced &&
   325  		sub.computedFilesChangeTime.After(sub.lastSyncTime) {
   326  		sub.generationCount = 0 // Force a full poll.
   327  	}
   328  	// If the last update was disabled and updates are enabled now, force a full
   329  	// poll.
   330  	if previousStatus == statusUpdatesDisabled &&
   331  		sub.herd.updatesDisabledReason == "" && !sub.mdb.DisableUpdates {
   332  		sub.generationCount = 0 // Force a full poll.
   333  	}
   334  	// If the last update was disabled due to a safety check and there is a
   335  	// pending SafetyClear, force a full poll to re-compute the update.
   336  	if previousStatus == statusUnsafeUpdate && sub.pendingSafetyClear {
   337  		sub.generationCount = 0 // Force a full poll.
   338  	}
   339  	// If the last update failed because disruption was not permitted and there
   340  	// is a pending ForceDisruption, force a full poll to re-compute the update.
   341  	if (previousStatus == statusDisruptionRequested ||
   342  		previousStatus == statusDisruptionDenied) &&
   343  		sub.pendingForceDisruptiveUpdate {
   344  		sub.generationCount = 0 // Force a full poll.
   345  	}
   346  	var request subproto.PollRequest
   347  	request.HaveGeneration = sub.generationCount
   348  	var reply subproto.PollResponse
   349  	haveImage := false
   350  	if sub.requiredImage == nil && sub.plannedImage == nil {
   351  		request.ShortPollOnly = true
   352  		// Ensure a full poll when the image becomes available later. This will
   353  		// cover the special case when an image expiration is extended, which
   354  		// leads to the sub showing "image not ready" until the next generation
   355  		// increment.
   356  		sub.generationCount = 0
   357  	} else {
   358  		haveImage = true
   359  	}
   360  	logger := sub.herd.logger
   361  	sub.lastPollStartTime = time.Now()
   362  	if err := client.CallPoll(srpcClient, request, &reply); err != nil {
   363  		srpcClient.Close()
   364  		if err == io.EOF {
   365  			return
   366  		}
   367  		sub.pollTime = time.Time{}
   368  		if err == srpc.ErrorAccessToMethodDenied {
   369  			sub.status = statusPollDenied
   370  		} else {
   371  			sub.status = statusFailedToPoll
   372  		}
   373  		logger.Printf("Error calling %s.Poll(): %s\n", sub, err)
   374  		return
   375  	}
   376  	sub.lastDisruptionState = reply.DisruptionState
   377  	sub.lastPollSucceededTime = time.Now()
   378  	sub.lastSuccessfulImageName = reply.LastSuccessfulImageName
   379  	sub.lastNote = reply.LastNote
   380  	sub.lastWriteError = reply.LastWriteError
   381  	sub.systemUptime = reply.SystemUptime
   382  	if reply.GenerationCount == 0 {
   383  		sub.reclaim()
   384  		sub.generationCount = 0
   385  	}
   386  	sub.lastScanDuration = reply.DurationOfLastScan
   387  	if fs := reply.FileSystem; fs == nil {
   388  		sub.lastPollWasFull = false
   389  		sub.lastShortPollDuration =
   390  			sub.lastPollSucceededTime.Sub(sub.lastPollStartTime)
   391  		shortPollDistribution.Add(sub.lastShortPollDuration)
   392  		if !sub.startTime.Equal(reply.StartTime) {
   393  			sub.generationCount = 0 // Sub has restarted: force a full poll.
   394  		}
   395  		if sub.freeSpaceThreshold != nil && reply.FreeSpace != nil {
   396  			if *reply.FreeSpace > *sub.freeSpaceThreshold {
   397  				sub.generationCount = 0 // Force a full poll for next time.
   398  			}
   399  		}
   400  	} else {
   401  		sub.lastPollWasFull = true
   402  		sub.freeSpaceThreshold = nil
   403  		if err := fs.RebuildInodePointers(); err != nil {
   404  			sub.status = statusFailedToPoll
   405  			logger.Printf("Error building pointers for: %s %s\n", sub, err)
   406  			return
   407  		}
   408  		fs.BuildEntryMap()
   409  		sub.fileSystem = fs
   410  		sub.objectCache = reply.ObjectCache
   411  		sub.generationCount = reply.GenerationCount
   412  		sub.lastFullPollDuration =
   413  			sub.lastPollSucceededTime.Sub(sub.lastPollStartTime)
   414  		fullPollDistribution.Add(sub.lastFullPollDuration)
   415  	}
   416  	sub.startTime = reply.StartTime
   417  	sub.pollTime = reply.PollTime
   418  	sub.updateConfiguration(srpcClient, reply)
   419  	if reply.FetchInProgress {
   420  		sub.status = statusFetching
   421  		return
   422  	}
   423  	if reply.UpdateInProgress {
   424  		sub.status = statusUpdating
   425  		return
   426  	}
   427  	if reply.LastWriteError != "" {
   428  		sub.status = statusUnwritable
   429  		sub.reclaim()
   430  		return
   431  	}
   432  	if reply.GenerationCount < 1 {
   433  		sub.status = statusSubNotReady
   434  		return
   435  	}
   436  	if reply.LockedByAnotherClient {
   437  		sub.status = statusLocked
   438  		sub.reclaim()
   439  		return
   440  	}
   441  	if previousStatus == statusLocked { // Not locked anymore, but was locked.
   442  		if sub.fileSystem == nil {
   443  			sub.generationCount = 0 // Force a full poll next cycle.
   444  			return
   445  		}
   446  	}
   447  	if previousStatus == statusFetching && reply.LastFetchError != "" {
   448  		logger.Printf("Fetch failure for: %s: %s\n", sub, reply.LastFetchError)
   449  		sub.status = statusFailedToFetch
   450  		if sub.fileSystem == nil {
   451  			sub.generationCount = 0 // Force a full poll next cycle.
   452  			return
   453  		}
   454  	}
   455  	if previousStatus == statusUpdating {
   456  		// Transition from updating to update ended (may be partial/failed).
   457  		switch reply.LastUpdateError {
   458  		case "":
   459  			sub.status = statusWaitingForNextFullPoll
   460  		case subproto.ErrorDisruptionPending:
   461  			sub.status = statusDisruptionRequested
   462  		case subproto.ErrorDisruptionDenied:
   463  			sub.status = statusDisruptionDenied
   464  		default:
   465  			logger.Printf("Update failure for: %s: %s\n",
   466  				sub, reply.LastUpdateError)
   467  			sub.status = statusFailedToUpdate
   468  		}
   469  		sub.scanCountAtLastUpdateEnd = reply.ScanCount
   470  		sub.reclaim()
   471  		return
   472  	}
   473  	if sub.checkCancel() {
   474  		// Configuration change pending: skip further processing. Do not reclaim
   475  		// file-system and objectcache data: it will speed up the next Poll.
   476  		return
   477  	}
   478  	if !haveImage {
   479  		if sub.requiredImageName == "" {
   480  			sub.status = statusImageUndefined
   481  		} else {
   482  			sub.status = statusImageNotReady
   483  		}
   484  		return
   485  	}
   486  	if previousStatus == statusFailedToUpdate ||
   487  		previousStatus == statusWaitingForNextFullPoll {
   488  		if sub.scanCountAtLastUpdateEnd == reply.ScanCount {
   489  			// Need to wait until sub has performed a new scan.
   490  			if sub.fileSystem != nil {
   491  				sub.reclaim()
   492  			}
   493  			sub.status = previousStatus
   494  			return
   495  		}
   496  		if sub.fileSystem == nil {
   497  			// Force a full poll next cycle so that we can see the state of the
   498  			// sub.
   499  			sub.generationCount = 0
   500  			sub.status = previousStatus
   501  			return
   502  		}
   503  	}
   504  	if previousStatus == statusDisruptionRequested ||
   505  		previousStatus == statusDisruptionDenied {
   506  		switch reply.DisruptionState {
   507  		case subproto.DisruptionStateAnytime:
   508  			sub.generationCount = 0
   509  		case subproto.DisruptionStatePermitted:
   510  			sub.generationCount = 0
   511  		case subproto.DisruptionStateRequested:
   512  			previousStatus = statusDisruptionRequested
   513  		case subproto.DisruptionStateDenied:
   514  			previousStatus = statusDisruptionDenied
   515  		}
   516  	}
   517  	if sub.fileSystem == nil {
   518  		sub.status = previousStatus
   519  		return
   520  	}
   521  	if idle, status := sub.fetchMissingObjects(srpcClient, sub.requiredImage,
   522  		reply.FreeSpace, true); !idle {
   523  		sub.status = status
   524  		sub.reclaim()
   525  		return
   526  	}
   527  	sub.status = statusComputingUpdate
   528  	if idle, status := sub.sendUpdate(srpcClient); !idle {
   529  		sub.status = status
   530  		sub.reclaim()
   531  		return
   532  	}
   533  	if idle, status := sub.fetchMissingObjects(srpcClient, sub.plannedImage,
   534  		reply.FreeSpace, false); !idle {
   535  		if status != statusImageNotReady && status != statusNotEnoughFreeSpace {
   536  			sub.status = status
   537  			sub.reclaim()
   538  			return
   539  		}
   540  	}
   541  	if previousStatus == statusWaitingForNextFullPoll &&
   542  		!sub.lastUpdateTime.IsZero() {
   543  		sub.lastSyncTime = time.Now()
   544  	}
   545  	sub.status = statusSynced
   546  	sub.cleanup(srpcClient)
   547  	sub.reclaim()
   548  }
   549  
   550  func (sub *Sub) reclaim() {
   551  	sub.fileSystem = nil  // Mark memory for reclaim.
   552  	sub.objectCache = nil // Mark memory for reclaim.
   553  }
   554  
   555  func (sub *Sub) updateConfiguration(srpcClient *srpc.Client,
   556  	pollReply subproto.PollResponse) {
   557  	if !*updateConfigurationsForSubs {
   558  		return
   559  	}
   560  	if pollReply.ScanCount < 1 {
   561  		return
   562  	}
   563  	sub.herd.RLockWithTimeout(time.Minute)
   564  	newConf := sub.herd.configurationForSubs
   565  	sub.herd.RUnlock()
   566  	if newConf.CpuPercent < 1 {
   567  		newConf.CpuPercent = pollReply.CurrentConfiguration.CpuPercent
   568  	}
   569  	if newConf.NetworkSpeedPercent < 1 {
   570  		newConf.NetworkSpeedPercent =
   571  			pollReply.CurrentConfiguration.NetworkSpeedPercent
   572  	}
   573  	if newConf.ScanSpeedPercent < 1 {
   574  		newConf.ScanSpeedPercent =
   575  			pollReply.CurrentConfiguration.ScanSpeedPercent
   576  	}
   577  	if compareConfigs(pollReply.CurrentConfiguration, newConf) {
   578  		return
   579  	}
   580  	if err := client.SetConfiguration(srpcClient, newConf); err != nil {
   581  		srpcClient.Close()
   582  		logger := sub.herd.logger
   583  		logger.Printf("Error setting configuration for sub: %s: %s\n",
   584  			sub, err)
   585  		return
   586  	}
   587  }
   588  
   589  func compareConfigs(oldConf, newConf subproto.Configuration) bool {
   590  	if newConf.CpuPercent != oldConf.CpuPercent {
   591  		return false
   592  	}
   593  	if newConf.NetworkSpeedPercent != oldConf.NetworkSpeedPercent {
   594  		return false
   595  	}
   596  	if newConf.ScanSpeedPercent != oldConf.ScanSpeedPercent {
   597  		return false
   598  	}
   599  	if len(newConf.ScanExclusionList) != len(oldConf.ScanExclusionList) {
   600  		return false
   601  	}
   602  	for index, newString := range newConf.ScanExclusionList {
   603  		if newString != oldConf.ScanExclusionList[index] {
   604  			return false
   605  		}
   606  	}
   607  	return true
   608  }
   609  
   610  // Returns true if all required objects are available.
   611  func (sub *Sub) fetchMissingObjects(srpcClient *srpc.Client, img *image.Image,
   612  	freeSpace *uint64, pushComputedFiles bool) (
   613  	bool, subStatus) {
   614  	if img == nil {
   615  		return false, statusImageNotReady
   616  	}
   617  	logger := sub.herd.logger
   618  	subObj := lib.Sub{
   619  		Hostname:       sub.mdb.Hostname,
   620  		Client:         srpcClient,
   621  		FileSystem:     sub.fileSystem,
   622  		ComputedInodes: sub.computedInodes,
   623  		ObjectCache:    sub.objectCache,
   624  		ObjectGetter:   sub.herd.objectServer}
   625  	objectsToFetch, objectsToPush := lib.BuildMissingLists(subObj, img,
   626  		pushComputedFiles, false, logger)
   627  	if objectsToPush == nil {
   628  		return false, statusMissingComputedFile
   629  	}
   630  	var returnAvailable bool = true
   631  	var returnStatus subStatus = statusSynced
   632  	if len(objectsToFetch) > 0 {
   633  		if !sub.checkForEnoughSpace(freeSpace, objectsToFetch) {
   634  			return false, statusNotEnoughFreeSpace
   635  		}
   636  		logger.Printf("Calling %s:Subd.Fetch() for: %d objects\n",
   637  			sub, len(objectsToFetch))
   638  		err := client.Fetch(srpcClient, sub.herd.imageManager.String(),
   639  			objectcache.ObjectMapToCache(objectsToFetch))
   640  		if err != nil {
   641  			srpcClient.Close()
   642  			logger.Printf("Error calling %s:Subd.Fetch(): %s\n", sub, err)
   643  			if err == srpc.ErrorAccessToMethodDenied {
   644  				return false, statusFetchDenied
   645  			}
   646  			return false, statusFailedToFetch
   647  		}
   648  		returnAvailable = false
   649  		returnStatus = statusFetching
   650  	}
   651  	if len(objectsToPush) > 0 {
   652  		sub.herd.cpuSharer.GrabSemaphore(sub.herd.pushSemaphore)
   653  		defer func() { <-sub.herd.pushSemaphore }()
   654  		sub.status = statusPushing
   655  		err := lib.PushObjects(subObj, objectsToPush, logger)
   656  		if err != nil {
   657  			if err == srpc.ErrorAccessToMethodDenied {
   658  				return false, statusPushDenied
   659  			}
   660  			if err == lib.ErrorFailedToGetObject {
   661  				return false, statusFailedToGetObject
   662  			}
   663  			return false, statusFailedToPush
   664  		}
   665  		if returnAvailable {
   666  			// Update local copy of objectcache, since there will not be
   667  			// another Poll() before the update computation.
   668  			for hashVal := range objectsToPush {
   669  				sub.objectCache = append(sub.objectCache, hashVal)
   670  			}
   671  		}
   672  	}
   673  	return returnAvailable, returnStatus
   674  }
   675  
   676  // Returns true if no update needs to be performed.
   677  func (sub *Sub) sendUpdate(srpcClient *srpc.Client) (bool, subStatus) {
   678  	logger := sub.herd.logger
   679  	var request subproto.UpdateRequest
   680  	var reply subproto.UpdateResponse
   681  	if idle, missing := sub.buildUpdateRequest(&request); missing {
   682  		return false, statusMissingComputedFile
   683  	} else if idle {
   684  		return true, statusSynced
   685  	}
   686  	if sub.mdb.DisableUpdates || sub.herd.updatesDisabledReason != "" {
   687  		return false, statusUpdatesDisabled
   688  	}
   689  	if !sub.pendingSafetyClear {
   690  		// Perform a cheap safety check: if over half the inodes will be deleted
   691  		// then mark the update as unsafe.
   692  		if sub.checkForUnsafeChange(request) {
   693  			return false, statusUnsafeUpdate
   694  		}
   695  	}
   696  	if _, ok := sub.mdb.Tags["ForceDisruptiveUpdate"]; ok {
   697  		request.ForceDisruption = true
   698  	}
   699  	if sub.pendingForceDisruptiveUpdate {
   700  		request.ForceDisruption = true
   701  	}
   702  	sub.status = statusSendingUpdate
   703  	sub.lastUpdateTime = time.Now()
   704  	logger.Printf("Calling %s:Subd.Update() for image: %s\n",
   705  		sub, sub.requiredImageName)
   706  	if err := client.CallUpdate(srpcClient, request, &reply); err != nil {
   707  		srpcClient.Close()
   708  		logger.Printf("Error calling %s:Subd.Update(): %s\n", sub, err)
   709  		if err == srpc.ErrorAccessToMethodDenied {
   710  			return false, statusUpdateDenied
   711  		}
   712  		return false, statusFailedToUpdate
   713  	}
   714  	sub.pendingSafetyClear = false
   715  	sub.pendingForceDisruptiveUpdate = false
   716  	return false, statusUpdating
   717  }
   718  
   719  // Returns true if the change is unsafe (very large number of deletions).
   720  func (sub *Sub) checkForUnsafeChange(request subproto.UpdateRequest) bool {
   721  	if sub.requiredImage.Filter == nil {
   722  		return false // Sparse image: no deletions.
   723  	}
   724  	if _, ok := sub.mdb.Tags["DisableSafetyCheck"]; ok {
   725  		return false // This sub doesn't need a safety check.
   726  	}
   727  	if len(sub.requiredImage.FileSystem.InodeTable) <
   728  		len(sub.fileSystem.InodeTable)>>1 {
   729  		return true
   730  	}
   731  	if len(request.PathsToDelete) > len(sub.fileSystem.InodeTable)>>1 {
   732  		return true
   733  	}
   734  	return false
   735  }
   736  
   737  // cleanup will tell the Sub to remove unused objects and that and disruptive
   738  // updates have completed.
   739  func (sub *Sub) cleanup(srpcClient *srpc.Client) {
   740  	logger := sub.herd.logger
   741  	unusedObjects := make(map[hash.Hash]bool)
   742  	for _, hash := range sub.objectCache {
   743  		unusedObjects[hash] = false // Potential cleanup candidate.
   744  	}
   745  	for _, inode := range sub.fileSystem.InodeTable {
   746  		if inode, ok := inode.(*filesystem.RegularInode); ok {
   747  			if inode.Size > 0 {
   748  				if _, ok := unusedObjects[inode.Hash]; ok {
   749  					unusedObjects[inode.Hash] = true // Must clean this one up.
   750  				}
   751  			}
   752  		}
   753  	}
   754  	image := sub.plannedImage
   755  	if image != nil {
   756  		for _, inode := range image.FileSystem.InodeTable {
   757  			if inode, ok := inode.(*filesystem.RegularInode); ok {
   758  				if inode.Size > 0 {
   759  					if clean, ok := unusedObjects[inode.Hash]; !clean && ok {
   760  						delete(unusedObjects, inode.Hash)
   761  					}
   762  				}
   763  			}
   764  		}
   765  	}
   766  	if len(unusedObjects) < 1 &&
   767  		sub.lastDisruptionState == subproto.DisruptionStateAnytime {
   768  		return
   769  	}
   770  	hashes := make([]hash.Hash, 0, len(unusedObjects))
   771  	for hash := range unusedObjects {
   772  		hashes = append(hashes, hash)
   773  	}
   774  	if err := client.Cleanup(srpcClient, hashes); err != nil {
   775  		srpcClient.Close()
   776  		logger.Printf("Error calling %s:Subd.Cleanup(): %s\n", sub, err)
   777  	}
   778  }
   779  
   780  func (sub *Sub) checkForEnoughSpace(freeSpace *uint64,
   781  	objects map[hash.Hash]uint64) bool {
   782  	if freeSpace == nil {
   783  		sub.freeSpaceThreshold = nil
   784  		return true // Don't know, assume OK.
   785  	}
   786  	var totalUsage uint64
   787  	for _, size := range objects {
   788  		usage := (size >> 12) << 12
   789  		if usage < size {
   790  			usage += 1 << 12
   791  		}
   792  		totalUsage += usage
   793  	}
   794  	if *freeSpace > totalUsage {
   795  		sub.freeSpaceThreshold = nil
   796  		return true
   797  	}
   798  	sub.freeSpaceThreshold = &totalUsage
   799  	return false
   800  }
   801  
   802  func (sub *Sub) clearSafetyShutoff(authInfo *srpc.AuthInformation) error {
   803  	if sub.status != statusUnsafeUpdate {
   804  		return errors.New("no pending unsafe update")
   805  	}
   806  	if !sub.checkAdminAccess(authInfo) {
   807  		return errors.New("no access to sub")
   808  	}
   809  	sub.pendingSafetyClear = true
   810  	return nil
   811  }
   812  
   813  func (sub *Sub) checkCancel() bool {
   814  	select {
   815  	case <-sub.cancelChannel:
   816  		return true
   817  	default:
   818  		return false
   819  	}
   820  }
   821  
   822  func (sub *Sub) forceDisruptiveUpdate(authInfo *srpc.AuthInformation) error {
   823  	switch sub.status {
   824  	case statusDisruptionRequested:
   825  	case statusDisruptionDenied:
   826  	default:
   827  		return errors.New("not waiting for disruptive update permission")
   828  	}
   829  	if !sub.checkAdminAccess(authInfo) {
   830  		return errors.New("no access to sub")
   831  	}
   832  	sub.pendingForceDisruptiveUpdate = true
   833  	return nil
   834  }
   835  
   836  func (sub *Sub) sendCancel() {
   837  	select {
   838  	case sub.cancelChannel <- struct{}{}:
   839  	default:
   840  	}
   841  }