github.com/Cloud-Foundations/Dominator@v0.3.4/dom/herd/sub.go (about) 1 package herd 2 3 import ( 4 "errors" 5 "flag" 6 "fmt" 7 "io" 8 "net" 9 "strings" 10 "time" 11 12 "github.com/Cloud-Foundations/Dominator/dom/lib" 13 "github.com/Cloud-Foundations/Dominator/lib/constants" 14 filegenclient "github.com/Cloud-Foundations/Dominator/lib/filegen/client" 15 "github.com/Cloud-Foundations/Dominator/lib/filesystem" 16 "github.com/Cloud-Foundations/Dominator/lib/hash" 17 "github.com/Cloud-Foundations/Dominator/lib/image" 18 "github.com/Cloud-Foundations/Dominator/lib/objectcache" 19 "github.com/Cloud-Foundations/Dominator/lib/resourcepool" 20 "github.com/Cloud-Foundations/Dominator/lib/srpc" 21 subproto "github.com/Cloud-Foundations/Dominator/proto/sub" 22 "github.com/Cloud-Foundations/Dominator/sub/client" 23 ) 24 25 var ( 26 updateConfigurationsForSubs = flag.Bool("updateConfigurationsForSubs", 27 true, "If true, update the configurations for all subs") 28 logUnknownSubConnectErrors = flag.Bool("logUnknownSubConnectErrors", false, 29 "If true, log unknown sub connection errors") 30 showIP = flag.Bool("showIP", false, 31 "If true, prefer to show IP address from MDB if available") 32 useIP = flag.Bool("useIP", true, 33 "If true, prefer to use IP address from MDB if available") 34 35 subPortNumber = fmt.Sprintf(":%d", constants.SubPortNumber) 36 zeroHash hash.Hash 37 ) 38 39 func (sub *Sub) string() string { 40 if *showIP && sub.mdb.IpAddress != "" { 41 return sub.mdb.IpAddress 42 } 43 return sub.mdb.Hostname 44 } 45 46 func (sub *Sub) address() string { 47 if *useIP && sub.mdb.IpAddress != "" { 48 hostInstance := strings.SplitN(sub.mdb.Hostname, "*", 2) 49 if len(hostInstance) > 1 { 50 return sub.mdb.IpAddress + "*" + hostInstance[1] + subPortNumber 51 } 52 return sub.mdb.IpAddress + subPortNumber 53 } 54 return sub.mdb.Hostname + subPortNumber 55 } 56 57 // Returns true if the principal described by authInfo has administrative access 58 // to the sub. 59 func (sub *Sub) checkAdminAccess(authInfo *srpc.AuthInformation) bool { 60 if authInfo == nil { 61 return false 62 } 63 if authInfo.HaveMethodAccess { 64 return true 65 } 66 if sub.clientResource == nil { 67 return false 68 } 69 srpcClient, err := sub.clientResource.GetHTTPWithDialer(sub.cancelChannel, 70 sub.herd.dialer) 71 if err != nil { 72 return false 73 } 74 defer srpcClient.Put() 75 conf, err := client.GetConfiguration(srpcClient) 76 if err != nil { 77 return false 78 } 79 for _, group := range conf.OwnerGroups { 80 if _, ok := authInfo.GroupList[group]; ok { 81 return true 82 } 83 } 84 if authInfo.Username != "" { 85 for _, user := range conf.OwnerUsers { 86 if user == authInfo.Username { 87 return true 88 } 89 } 90 } 91 return false 92 } 93 94 func (sub *Sub) getComputedFiles(im *image.Image) []filegenclient.ComputedFile { 95 if im == nil { 96 return nil 97 } 98 numComputed := im.FileSystem.NumComputedRegularInodes() 99 if numComputed < 1 { 100 return nil 101 } 102 computedFiles := make([]filegenclient.ComputedFile, 0, numComputed) 103 inodeToFilenamesTable := im.FileSystem.InodeToFilenamesTable() 104 for inum, inode := range im.FileSystem.InodeTable { 105 if inode, ok := inode.(*filesystem.ComputedRegularInode); ok { 106 if filenames, ok := inodeToFilenamesTable[inum]; ok { 107 if len(filenames) == 1 { 108 computedFiles = append(computedFiles, 109 filegenclient.ComputedFile{filenames[0], inode.Source}) 110 } 111 } 112 } 113 } 114 return computedFiles 115 } 116 117 func (sub *Sub) tryMakeBusy() bool { 118 sub.busyFlagMutex.Lock() 119 defer sub.busyFlagMutex.Unlock() 120 if sub.busy { 121 return false 122 } 123 sub.busyStartTime = time.Now() 124 sub.busy = true 125 return true 126 } 127 128 func (sub *Sub) makeUnbusy() { 129 sub.busyFlagMutex.Lock() 130 defer sub.busyFlagMutex.Unlock() 131 sub.busyStopTime = time.Now() 132 sub.busy = false 133 } 134 135 func (sub *Sub) connectAndPoll() { 136 sub.loadConfiguration() 137 if sub.processFileUpdates() { 138 sub.generationCount = 0 // Force a full poll. 139 } 140 sub.deletingFlagMutex.Lock() 141 if sub.deleting { 142 sub.deletingFlagMutex.Unlock() 143 return 144 } 145 if sub.clientResource == nil { 146 sub.clientResource = srpc.NewClientResource("tcp", sub.address()) 147 } 148 sub.deletingFlagMutex.Unlock() 149 previousStatus := sub.status 150 sub.status = statusConnecting 151 timer := time.AfterFunc(time.Second, func() { 152 sub.publishedStatus = sub.status 153 }) 154 defer func() { 155 timer.Stop() 156 sub.publishedStatus = sub.status 157 switch sub.status { 158 case statusUnknown: 159 case statusConnecting: 160 case statusDNSError: 161 case statusNoRouteToHost: 162 case statusConnectionRefused, 163 statusConnectTimeout, 164 statusFailedToConnect: 165 sub.herd.addSubToInstallerQueue(sub.mdb.Hostname) 166 default: 167 sub.herd.removeSubFromInstallerQueue(sub.mdb.Hostname) 168 } 169 }() 170 sub.lastConnectionStartTime = time.Now() 171 srpcClient, err := sub.clientResource.GetHTTPWithDialer(sub.cancelChannel, 172 sub.herd.dialer) 173 dialReturnedTime := time.Now() 174 if err != nil { 175 sub.isInsecure = false 176 sub.pollTime = time.Time{} 177 if err == resourcepool.ErrorResourceLimitExceeded { 178 return 179 } 180 if err, ok := err.(*net.OpError); ok { 181 if _, ok := err.Err.(*net.DNSError); ok { 182 sub.status = statusDNSError 183 return 184 } 185 if err.Timeout() { 186 sub.status = statusConnectTimeout 187 return 188 } 189 } 190 if err == srpc.ErrorConnectionRefused { 191 sub.status = statusConnectionRefused 192 return 193 } 194 if err == srpc.ErrorNoRouteToHost { 195 sub.status = statusNoRouteToHost 196 return 197 } 198 if err == srpc.ErrorMissingCertificate { 199 sub.lastReachableTime = dialReturnedTime 200 sub.status = statusMissingCertificate 201 return 202 } 203 if err == srpc.ErrorBadCertificate { 204 sub.lastReachableTime = dialReturnedTime 205 sub.status = statusBadCertificate 206 return 207 } 208 sub.status = statusFailedToConnect 209 if *logUnknownSubConnectErrors { 210 sub.herd.logger.Println(err) 211 } 212 return 213 } 214 defer srpcClient.Put() 215 if srpcClient.IsEncrypted() { 216 sub.isInsecure = false 217 } else { 218 sub.isInsecure = true 219 } 220 sub.lastReachableTime = dialReturnedTime 221 sub.lastConnectionSucceededTime = dialReturnedTime 222 sub.lastConnectDuration = 223 sub.lastConnectionSucceededTime.Sub(sub.lastConnectionStartTime) 224 connectDistribution.Add(sub.lastConnectDuration) 225 waitStartTime := time.Now() 226 sub.herd.cpuSharer.ReleaseCpu() 227 select { 228 case sub.herd.pollSemaphore <- struct{}{}: 229 sub.herd.cpuSharer.GrabCpu() 230 break 231 case <-sub.cancelChannel: 232 sub.herd.cpuSharer.GrabCpu() 233 return 234 } 235 pollWaitTimeDistribution.Add(time.Since(waitStartTime)) 236 sub.status = statusPolling 237 sub.poll(srpcClient, previousStatus) 238 <-sub.herd.pollSemaphore 239 } 240 241 func (sub *Sub) loadConfiguration() { 242 // Get a stable copy of the configuration. 243 newRequiredImageName := sub.mdb.RequiredImage 244 if newRequiredImageName == "" { 245 newRequiredImageName = sub.herd.defaultImageName 246 } 247 if newRequiredImageName != sub.requiredImageName { 248 sub.computedInodes = nil 249 } 250 sub.herd.cpuSharer.ReleaseCpu() 251 defer sub.herd.cpuSharer.GrabCpu() 252 sub.requiredImageName = newRequiredImageName 253 sub.requiredImage = sub.herd.imageManager.GetNoError(sub.requiredImageName) 254 sub.plannedImageName = sub.mdb.PlannedImage 255 sub.plannedImage = sub.herd.imageManager.GetNoError(sub.plannedImageName) 256 } 257 258 func (sub *Sub) processFileUpdates() bool { 259 haveUpdates := false 260 for { 261 image := sub.requiredImage 262 if image != nil && sub.computedInodes == nil { 263 sub.computedInodes = make(map[string]*filesystem.RegularInode) 264 sub.deletingFlagMutex.Lock() 265 if sub.deleting { 266 sub.deletingFlagMutex.Unlock() 267 return false 268 } 269 computedFiles := sub.getComputedFiles(image) 270 sub.herd.cpuSharer.ReleaseCpu() 271 sub.herd.computedFilesManager.Update( 272 filegenclient.Machine{sub.mdb, computedFiles}) 273 sub.herd.cpuSharer.GrabCpu() 274 sub.deletingFlagMutex.Unlock() 275 } 276 select { 277 case fileInfos := <-sub.fileUpdateChannel: 278 if image == nil { 279 continue 280 } 281 filenameToInodeTable := image.FileSystem.FilenameToInodeTable() 282 for _, fileInfo := range fileInfos { 283 if fileInfo.Hash == zeroHash { 284 continue // No object. 285 } 286 inum, ok := filenameToInodeTable[fileInfo.Pathname] 287 if !ok { 288 continue 289 } 290 genericInode, ok := image.FileSystem.InodeTable[inum] 291 if !ok { 292 continue 293 } 294 cInode, ok := genericInode.(*filesystem.ComputedRegularInode) 295 if !ok { 296 continue 297 } 298 rInode := &filesystem.RegularInode{ 299 Mode: cInode.Mode, 300 Uid: cInode.Uid, 301 Gid: cInode.Gid, 302 MtimeSeconds: -1, // The time is set during the compute. 303 Size: fileInfo.Length, 304 Hash: fileInfo.Hash, 305 } 306 sub.computedInodes[fileInfo.Pathname] = rInode 307 haveUpdates = true 308 } 309 default: 310 return haveUpdates 311 } 312 } 313 } 314 315 func (sub *Sub) poll(srpcClient *srpc.Client, previousStatus subStatus) { 316 // If the planned image has just become available, force a full poll. 317 if previousStatus == statusSynced && 318 !sub.havePlannedImage && 319 sub.plannedImage != nil { 320 sub.havePlannedImage = true 321 sub.generationCount = 0 // Force a full poll. 322 } 323 // If the computed files have changed since the last sync, force a full poll 324 if previousStatus == statusSynced && 325 sub.computedFilesChangeTime.After(sub.lastSyncTime) { 326 sub.generationCount = 0 // Force a full poll. 327 } 328 // If the last update was disabled and updates are enabled now, force a full 329 // poll. 330 if previousStatus == statusUpdatesDisabled && 331 sub.herd.updatesDisabledReason == "" && !sub.mdb.DisableUpdates { 332 sub.generationCount = 0 // Force a full poll. 333 } 334 // If the last update was disabled due to a safety check and there is a 335 // pending SafetyClear, force a full poll to re-compute the update. 336 if previousStatus == statusUnsafeUpdate && sub.pendingSafetyClear { 337 sub.generationCount = 0 // Force a full poll. 338 } 339 // If the last update failed because disruption was not permitted and there 340 // is a pending ForceDisruption, force a full poll to re-compute the update. 341 if (previousStatus == statusDisruptionRequested || 342 previousStatus == statusDisruptionDenied) && 343 sub.pendingForceDisruptiveUpdate { 344 sub.generationCount = 0 // Force a full poll. 345 } 346 var request subproto.PollRequest 347 request.HaveGeneration = sub.generationCount 348 var reply subproto.PollResponse 349 haveImage := false 350 if sub.requiredImage == nil && sub.plannedImage == nil { 351 request.ShortPollOnly = true 352 // Ensure a full poll when the image becomes available later. This will 353 // cover the special case when an image expiration is extended, which 354 // leads to the sub showing "image not ready" until the next generation 355 // increment. 356 sub.generationCount = 0 357 } else { 358 haveImage = true 359 } 360 logger := sub.herd.logger 361 sub.lastPollStartTime = time.Now() 362 if err := client.CallPoll(srpcClient, request, &reply); err != nil { 363 srpcClient.Close() 364 if err == io.EOF { 365 return 366 } 367 sub.pollTime = time.Time{} 368 if err == srpc.ErrorAccessToMethodDenied { 369 sub.status = statusPollDenied 370 } else { 371 sub.status = statusFailedToPoll 372 } 373 logger.Printf("Error calling %s.Poll(): %s\n", sub, err) 374 return 375 } 376 sub.lastDisruptionState = reply.DisruptionState 377 sub.lastPollSucceededTime = time.Now() 378 sub.lastSuccessfulImageName = reply.LastSuccessfulImageName 379 sub.lastNote = reply.LastNote 380 sub.lastWriteError = reply.LastWriteError 381 sub.systemUptime = reply.SystemUptime 382 if reply.GenerationCount == 0 { 383 sub.reclaim() 384 sub.generationCount = 0 385 } 386 sub.lastScanDuration = reply.DurationOfLastScan 387 if fs := reply.FileSystem; fs == nil { 388 sub.lastPollWasFull = false 389 sub.lastShortPollDuration = 390 sub.lastPollSucceededTime.Sub(sub.lastPollStartTime) 391 shortPollDistribution.Add(sub.lastShortPollDuration) 392 if !sub.startTime.Equal(reply.StartTime) { 393 sub.generationCount = 0 // Sub has restarted: force a full poll. 394 } 395 if sub.freeSpaceThreshold != nil && reply.FreeSpace != nil { 396 if *reply.FreeSpace > *sub.freeSpaceThreshold { 397 sub.generationCount = 0 // Force a full poll for next time. 398 } 399 } 400 } else { 401 sub.lastPollWasFull = true 402 sub.freeSpaceThreshold = nil 403 if err := fs.RebuildInodePointers(); err != nil { 404 sub.status = statusFailedToPoll 405 logger.Printf("Error building pointers for: %s %s\n", sub, err) 406 return 407 } 408 fs.BuildEntryMap() 409 sub.fileSystem = fs 410 sub.objectCache = reply.ObjectCache 411 sub.generationCount = reply.GenerationCount 412 sub.lastFullPollDuration = 413 sub.lastPollSucceededTime.Sub(sub.lastPollStartTime) 414 fullPollDistribution.Add(sub.lastFullPollDuration) 415 } 416 sub.startTime = reply.StartTime 417 sub.pollTime = reply.PollTime 418 sub.updateConfiguration(srpcClient, reply) 419 if reply.FetchInProgress { 420 sub.status = statusFetching 421 return 422 } 423 if reply.UpdateInProgress { 424 sub.status = statusUpdating 425 return 426 } 427 if reply.LastWriteError != "" { 428 sub.status = statusUnwritable 429 sub.reclaim() 430 return 431 } 432 if reply.GenerationCount < 1 { 433 sub.status = statusSubNotReady 434 return 435 } 436 if reply.LockedByAnotherClient { 437 sub.status = statusLocked 438 sub.reclaim() 439 return 440 } 441 if previousStatus == statusLocked { // Not locked anymore, but was locked. 442 if sub.fileSystem == nil { 443 sub.generationCount = 0 // Force a full poll next cycle. 444 return 445 } 446 } 447 if previousStatus == statusFetching && reply.LastFetchError != "" { 448 logger.Printf("Fetch failure for: %s: %s\n", sub, reply.LastFetchError) 449 sub.status = statusFailedToFetch 450 if sub.fileSystem == nil { 451 sub.generationCount = 0 // Force a full poll next cycle. 452 return 453 } 454 } 455 if previousStatus == statusUpdating { 456 // Transition from updating to update ended (may be partial/failed). 457 switch reply.LastUpdateError { 458 case "": 459 sub.status = statusWaitingForNextFullPoll 460 case subproto.ErrorDisruptionPending: 461 sub.status = statusDisruptionRequested 462 case subproto.ErrorDisruptionDenied: 463 sub.status = statusDisruptionDenied 464 default: 465 logger.Printf("Update failure for: %s: %s\n", 466 sub, reply.LastUpdateError) 467 sub.status = statusFailedToUpdate 468 } 469 sub.scanCountAtLastUpdateEnd = reply.ScanCount 470 sub.reclaim() 471 return 472 } 473 if sub.checkCancel() { 474 // Configuration change pending: skip further processing. Do not reclaim 475 // file-system and objectcache data: it will speed up the next Poll. 476 return 477 } 478 if !haveImage { 479 if sub.requiredImageName == "" { 480 sub.status = statusImageUndefined 481 } else { 482 sub.status = statusImageNotReady 483 } 484 return 485 } 486 if previousStatus == statusFailedToUpdate || 487 previousStatus == statusWaitingForNextFullPoll { 488 if sub.scanCountAtLastUpdateEnd == reply.ScanCount { 489 // Need to wait until sub has performed a new scan. 490 if sub.fileSystem != nil { 491 sub.reclaim() 492 } 493 sub.status = previousStatus 494 return 495 } 496 if sub.fileSystem == nil { 497 // Force a full poll next cycle so that we can see the state of the 498 // sub. 499 sub.generationCount = 0 500 sub.status = previousStatus 501 return 502 } 503 } 504 if previousStatus == statusDisruptionRequested || 505 previousStatus == statusDisruptionDenied { 506 switch reply.DisruptionState { 507 case subproto.DisruptionStateAnytime: 508 sub.generationCount = 0 509 case subproto.DisruptionStatePermitted: 510 sub.generationCount = 0 511 case subproto.DisruptionStateRequested: 512 previousStatus = statusDisruptionRequested 513 case subproto.DisruptionStateDenied: 514 previousStatus = statusDisruptionDenied 515 } 516 } 517 if sub.fileSystem == nil { 518 sub.status = previousStatus 519 return 520 } 521 if idle, status := sub.fetchMissingObjects(srpcClient, sub.requiredImage, 522 reply.FreeSpace, true); !idle { 523 sub.status = status 524 sub.reclaim() 525 return 526 } 527 sub.status = statusComputingUpdate 528 if idle, status := sub.sendUpdate(srpcClient); !idle { 529 sub.status = status 530 sub.reclaim() 531 return 532 } 533 if idle, status := sub.fetchMissingObjects(srpcClient, sub.plannedImage, 534 reply.FreeSpace, false); !idle { 535 if status != statusImageNotReady && status != statusNotEnoughFreeSpace { 536 sub.status = status 537 sub.reclaim() 538 return 539 } 540 } 541 if previousStatus == statusWaitingForNextFullPoll && 542 !sub.lastUpdateTime.IsZero() { 543 sub.lastSyncTime = time.Now() 544 } 545 sub.status = statusSynced 546 sub.cleanup(srpcClient) 547 sub.reclaim() 548 } 549 550 func (sub *Sub) reclaim() { 551 sub.fileSystem = nil // Mark memory for reclaim. 552 sub.objectCache = nil // Mark memory for reclaim. 553 } 554 555 func (sub *Sub) updateConfiguration(srpcClient *srpc.Client, 556 pollReply subproto.PollResponse) { 557 if !*updateConfigurationsForSubs { 558 return 559 } 560 if pollReply.ScanCount < 1 { 561 return 562 } 563 sub.herd.RLockWithTimeout(time.Minute) 564 newConf := sub.herd.configurationForSubs 565 sub.herd.RUnlock() 566 if newConf.CpuPercent < 1 { 567 newConf.CpuPercent = pollReply.CurrentConfiguration.CpuPercent 568 } 569 if newConf.NetworkSpeedPercent < 1 { 570 newConf.NetworkSpeedPercent = 571 pollReply.CurrentConfiguration.NetworkSpeedPercent 572 } 573 if newConf.ScanSpeedPercent < 1 { 574 newConf.ScanSpeedPercent = 575 pollReply.CurrentConfiguration.ScanSpeedPercent 576 } 577 if compareConfigs(pollReply.CurrentConfiguration, newConf) { 578 return 579 } 580 if err := client.SetConfiguration(srpcClient, newConf); err != nil { 581 srpcClient.Close() 582 logger := sub.herd.logger 583 logger.Printf("Error setting configuration for sub: %s: %s\n", 584 sub, err) 585 return 586 } 587 } 588 589 func compareConfigs(oldConf, newConf subproto.Configuration) bool { 590 if newConf.CpuPercent != oldConf.CpuPercent { 591 return false 592 } 593 if newConf.NetworkSpeedPercent != oldConf.NetworkSpeedPercent { 594 return false 595 } 596 if newConf.ScanSpeedPercent != oldConf.ScanSpeedPercent { 597 return false 598 } 599 if len(newConf.ScanExclusionList) != len(oldConf.ScanExclusionList) { 600 return false 601 } 602 for index, newString := range newConf.ScanExclusionList { 603 if newString != oldConf.ScanExclusionList[index] { 604 return false 605 } 606 } 607 return true 608 } 609 610 // Returns true if all required objects are available. 611 func (sub *Sub) fetchMissingObjects(srpcClient *srpc.Client, img *image.Image, 612 freeSpace *uint64, pushComputedFiles bool) ( 613 bool, subStatus) { 614 if img == nil { 615 return false, statusImageNotReady 616 } 617 logger := sub.herd.logger 618 subObj := lib.Sub{ 619 Hostname: sub.mdb.Hostname, 620 Client: srpcClient, 621 FileSystem: sub.fileSystem, 622 ComputedInodes: sub.computedInodes, 623 ObjectCache: sub.objectCache, 624 ObjectGetter: sub.herd.objectServer} 625 objectsToFetch, objectsToPush := lib.BuildMissingLists(subObj, img, 626 pushComputedFiles, false, logger) 627 if objectsToPush == nil { 628 return false, statusMissingComputedFile 629 } 630 var returnAvailable bool = true 631 var returnStatus subStatus = statusSynced 632 if len(objectsToFetch) > 0 { 633 if !sub.checkForEnoughSpace(freeSpace, objectsToFetch) { 634 return false, statusNotEnoughFreeSpace 635 } 636 logger.Printf("Calling %s:Subd.Fetch() for: %d objects\n", 637 sub, len(objectsToFetch)) 638 err := client.Fetch(srpcClient, sub.herd.imageManager.String(), 639 objectcache.ObjectMapToCache(objectsToFetch)) 640 if err != nil { 641 srpcClient.Close() 642 logger.Printf("Error calling %s:Subd.Fetch(): %s\n", sub, err) 643 if err == srpc.ErrorAccessToMethodDenied { 644 return false, statusFetchDenied 645 } 646 return false, statusFailedToFetch 647 } 648 returnAvailable = false 649 returnStatus = statusFetching 650 } 651 if len(objectsToPush) > 0 { 652 sub.herd.cpuSharer.GrabSemaphore(sub.herd.pushSemaphore) 653 defer func() { <-sub.herd.pushSemaphore }() 654 sub.status = statusPushing 655 err := lib.PushObjects(subObj, objectsToPush, logger) 656 if err != nil { 657 if err == srpc.ErrorAccessToMethodDenied { 658 return false, statusPushDenied 659 } 660 if err == lib.ErrorFailedToGetObject { 661 return false, statusFailedToGetObject 662 } 663 return false, statusFailedToPush 664 } 665 if returnAvailable { 666 // Update local copy of objectcache, since there will not be 667 // another Poll() before the update computation. 668 for hashVal := range objectsToPush { 669 sub.objectCache = append(sub.objectCache, hashVal) 670 } 671 } 672 } 673 return returnAvailable, returnStatus 674 } 675 676 // Returns true if no update needs to be performed. 677 func (sub *Sub) sendUpdate(srpcClient *srpc.Client) (bool, subStatus) { 678 logger := sub.herd.logger 679 var request subproto.UpdateRequest 680 var reply subproto.UpdateResponse 681 if idle, missing := sub.buildUpdateRequest(&request); missing { 682 return false, statusMissingComputedFile 683 } else if idle { 684 return true, statusSynced 685 } 686 if sub.mdb.DisableUpdates || sub.herd.updatesDisabledReason != "" { 687 return false, statusUpdatesDisabled 688 } 689 if !sub.pendingSafetyClear { 690 // Perform a cheap safety check: if over half the inodes will be deleted 691 // then mark the update as unsafe. 692 if sub.checkForUnsafeChange(request) { 693 return false, statusUnsafeUpdate 694 } 695 } 696 if _, ok := sub.mdb.Tags["ForceDisruptiveUpdate"]; ok { 697 request.ForceDisruption = true 698 } 699 if sub.pendingForceDisruptiveUpdate { 700 request.ForceDisruption = true 701 } 702 sub.status = statusSendingUpdate 703 sub.lastUpdateTime = time.Now() 704 logger.Printf("Calling %s:Subd.Update() for image: %s\n", 705 sub, sub.requiredImageName) 706 if err := client.CallUpdate(srpcClient, request, &reply); err != nil { 707 srpcClient.Close() 708 logger.Printf("Error calling %s:Subd.Update(): %s\n", sub, err) 709 if err == srpc.ErrorAccessToMethodDenied { 710 return false, statusUpdateDenied 711 } 712 return false, statusFailedToUpdate 713 } 714 sub.pendingSafetyClear = false 715 sub.pendingForceDisruptiveUpdate = false 716 return false, statusUpdating 717 } 718 719 // Returns true if the change is unsafe (very large number of deletions). 720 func (sub *Sub) checkForUnsafeChange(request subproto.UpdateRequest) bool { 721 if sub.requiredImage.Filter == nil { 722 return false // Sparse image: no deletions. 723 } 724 if _, ok := sub.mdb.Tags["DisableSafetyCheck"]; ok { 725 return false // This sub doesn't need a safety check. 726 } 727 if len(sub.requiredImage.FileSystem.InodeTable) < 728 len(sub.fileSystem.InodeTable)>>1 { 729 return true 730 } 731 if len(request.PathsToDelete) > len(sub.fileSystem.InodeTable)>>1 { 732 return true 733 } 734 return false 735 } 736 737 // cleanup will tell the Sub to remove unused objects and that and disruptive 738 // updates have completed. 739 func (sub *Sub) cleanup(srpcClient *srpc.Client) { 740 logger := sub.herd.logger 741 unusedObjects := make(map[hash.Hash]bool) 742 for _, hash := range sub.objectCache { 743 unusedObjects[hash] = false // Potential cleanup candidate. 744 } 745 for _, inode := range sub.fileSystem.InodeTable { 746 if inode, ok := inode.(*filesystem.RegularInode); ok { 747 if inode.Size > 0 { 748 if _, ok := unusedObjects[inode.Hash]; ok { 749 unusedObjects[inode.Hash] = true // Must clean this one up. 750 } 751 } 752 } 753 } 754 image := sub.plannedImage 755 if image != nil { 756 for _, inode := range image.FileSystem.InodeTable { 757 if inode, ok := inode.(*filesystem.RegularInode); ok { 758 if inode.Size > 0 { 759 if clean, ok := unusedObjects[inode.Hash]; !clean && ok { 760 delete(unusedObjects, inode.Hash) 761 } 762 } 763 } 764 } 765 } 766 if len(unusedObjects) < 1 && 767 sub.lastDisruptionState == subproto.DisruptionStateAnytime { 768 return 769 } 770 hashes := make([]hash.Hash, 0, len(unusedObjects)) 771 for hash := range unusedObjects { 772 hashes = append(hashes, hash) 773 } 774 if err := client.Cleanup(srpcClient, hashes); err != nil { 775 srpcClient.Close() 776 logger.Printf("Error calling %s:Subd.Cleanup(): %s\n", sub, err) 777 } 778 } 779 780 func (sub *Sub) checkForEnoughSpace(freeSpace *uint64, 781 objects map[hash.Hash]uint64) bool { 782 if freeSpace == nil { 783 sub.freeSpaceThreshold = nil 784 return true // Don't know, assume OK. 785 } 786 var totalUsage uint64 787 for _, size := range objects { 788 usage := (size >> 12) << 12 789 if usage < size { 790 usage += 1 << 12 791 } 792 totalUsage += usage 793 } 794 if *freeSpace > totalUsage { 795 sub.freeSpaceThreshold = nil 796 return true 797 } 798 sub.freeSpaceThreshold = &totalUsage 799 return false 800 } 801 802 func (sub *Sub) clearSafetyShutoff(authInfo *srpc.AuthInformation) error { 803 if sub.status != statusUnsafeUpdate { 804 return errors.New("no pending unsafe update") 805 } 806 if !sub.checkAdminAccess(authInfo) { 807 return errors.New("no access to sub") 808 } 809 sub.pendingSafetyClear = true 810 return nil 811 } 812 813 func (sub *Sub) checkCancel() bool { 814 select { 815 case <-sub.cancelChannel: 816 return true 817 default: 818 return false 819 } 820 } 821 822 func (sub *Sub) forceDisruptiveUpdate(authInfo *srpc.AuthInformation) error { 823 switch sub.status { 824 case statusDisruptionRequested: 825 case statusDisruptionDenied: 826 default: 827 return errors.New("not waiting for disruptive update permission") 828 } 829 if !sub.checkAdminAccess(authInfo) { 830 return errors.New("no access to sub") 831 } 832 sub.pendingForceDisruptiveUpdate = true 833 return nil 834 } 835 836 func (sub *Sub) sendCancel() { 837 select { 838 case sub.cancelChannel <- struct{}{}: 839 default: 840 } 841 }