github.com/Lephar/snapd@v0.0.0-20210825215435-c7fba9cef4d2/overlord/snapstate/autorefresh_gating.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2021 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package snapstate 21 22 import ( 23 "encoding/json" 24 "fmt" 25 "os" 26 "sort" 27 "strings" 28 "time" 29 30 "github.com/snapcore/snapd/interfaces" 31 "github.com/snapcore/snapd/interfaces/mount" 32 "github.com/snapcore/snapd/logger" 33 "github.com/snapcore/snapd/overlord/auth" 34 "github.com/snapcore/snapd/overlord/ifacestate/ifacerepo" 35 "github.com/snapcore/snapd/overlord/state" 36 "github.com/snapcore/snapd/release" 37 "github.com/snapcore/snapd/snap" 38 ) 39 40 var gateAutoRefreshHookName = "gate-auto-refresh" 41 42 // gateAutoRefreshAction represents the action executed by 43 // snapctl refresh --hold or --proceed and stored in the context of 44 // gate-auto-refresh hook. 45 type GateAutoRefreshAction int 46 47 const ( 48 GateAutoRefreshProceed GateAutoRefreshAction = iota 49 GateAutoRefreshHold 50 ) 51 52 // cumulative hold time for snaps other than self 53 const maxOtherHoldDuration = time.Hour * 48 54 55 var timeNow = func() time.Time { 56 return time.Now() 57 } 58 59 func lastRefreshed(st *state.State, snapName string) (time.Time, error) { 60 var snapst SnapState 61 if err := Get(st, snapName, &snapst); err != nil { 62 return time.Time{}, fmt.Errorf("internal error, cannot get snap %q: %v", snapName, err) 63 } 64 // try to get last refresh time from snapstate, but it may not be present 65 // for snaps installed before the introduction of last-refresh attribute. 66 if snapst.LastRefreshTime != nil { 67 return *snapst.LastRefreshTime, nil 68 } 69 snapInfo, err := snapst.CurrentInfo() 70 if err != nil { 71 return time.Time{}, err 72 } 73 // fall back to the modification time of .snap blob file as it's the best 74 // approximation of last refresh time. 75 fst, err := os.Stat(snapInfo.MountFile()) 76 if err != nil { 77 return time.Time{}, err 78 } 79 return fst.ModTime(), nil 80 } 81 82 type holdState struct { 83 // FirstHeld keeps the time when the given snap was first held for refresh by a gating snap. 84 FirstHeld time.Time `json:"first-held"` 85 // HoldUntil stores the desired end time for holding. 86 HoldUntil time.Time `json:"hold-until"` 87 } 88 89 func refreshGating(st *state.State) (map[string]map[string]*holdState, error) { 90 // held snaps -> holding snap(s) -> first-held/hold-until time 91 var gating map[string]map[string]*holdState 92 err := st.Get("snaps-hold", &gating) 93 if err != nil && err != state.ErrNoState { 94 return nil, fmt.Errorf("internal error: cannot get snaps-hold: %v", err) 95 } 96 if err == state.ErrNoState { 97 return make(map[string]map[string]*holdState), nil 98 } 99 return gating, nil 100 } 101 102 // HoldDurationError contains the that error prevents requested hold, along with 103 // hold time that's left (if any). 104 type HoldDurationError struct { 105 Err error 106 DurationLeft time.Duration 107 } 108 109 func (h *HoldDurationError) Error() string { 110 return h.Err.Error() 111 } 112 113 // HoldError contains the details of snaps that cannot to be held. 114 type HoldError struct { 115 SnapsInError map[string]HoldDurationError 116 } 117 118 func (h *HoldError) Error() string { 119 l := []string{""} 120 for _, e := range h.SnapsInError { 121 l = append(l, e.Error()) 122 } 123 return fmt.Sprintf("cannot hold some snaps:%s", strings.Join(l, "\n - ")) 124 } 125 126 func maxAllowedPostponement(gatingSnap, affectedSnap string, maxPostponement time.Duration) time.Duration { 127 if affectedSnap == gatingSnap { 128 return maxPostponement 129 } 130 return maxOtherHoldDuration 131 } 132 133 // holdDurationLeft computes the maximum duration that's left for holding a refresh 134 // given current time, last refresh time, time when snap was first held, maximum 135 // duration allowed for the given snap and maximum overall postponement allowed by 136 // snapd. 137 func holdDurationLeft(now time.Time, lastRefresh, firstHeld time.Time, maxDuration, maxPostponement time.Duration) time.Duration { 138 d1 := firstHeld.Add(maxDuration).Sub(now) 139 d2 := lastRefresh.Add(maxPostponement).Sub(now) 140 if d1 < d2 { 141 return d1 142 } 143 return d2 144 } 145 146 // HoldRefresh marks affectingSnaps as held for refresh for up to holdTime. 147 // HoldTime of zero denotes maximum allowed hold time. 148 // Holding may fail for only some snaps in which case HoldError is returned and 149 // it contains the details of failed ones. 150 func HoldRefresh(st *state.State, gatingSnap string, holdDuration time.Duration, affectingSnaps ...string) error { 151 gating, err := refreshGating(st) 152 if err != nil { 153 return err 154 } 155 herr := &HoldError{ 156 SnapsInError: make(map[string]HoldDurationError), 157 } 158 now := timeNow() 159 for _, heldSnap := range affectingSnaps { 160 hold, ok := gating[heldSnap][gatingSnap] 161 if !ok { 162 hold = &holdState{ 163 FirstHeld: now, 164 } 165 } 166 167 lastRefreshTime, err := lastRefreshed(st, heldSnap) 168 if err != nil { 169 return err 170 } 171 172 mp := maxPostponement - maxPostponementBuffer 173 maxDur := maxAllowedPostponement(gatingSnap, heldSnap, mp) 174 175 // calculate max hold duration that's left considering previous hold 176 // requests of this snap and last refresh time. 177 left := holdDurationLeft(now, lastRefreshTime, hold.FirstHeld, maxDur, mp) 178 if left <= 0 { 179 herr.SnapsInError[heldSnap] = HoldDurationError{ 180 Err: fmt.Errorf("snap %q cannot hold snap %q anymore, maximum refresh postponement exceeded", gatingSnap, heldSnap), 181 } 182 continue 183 } 184 185 dur := holdDuration 186 if dur == 0 { 187 // duration not specified, using a default one (maximum) or what's 188 // left of it. 189 dur = left 190 } else { 191 // explicit hold duration requested 192 if dur > maxDur { 193 herr.SnapsInError[heldSnap] = HoldDurationError{ 194 Err: fmt.Errorf("requested holding duration for snap %q of %s by snap %q exceeds maximum holding time", heldSnap, holdDuration, gatingSnap), 195 DurationLeft: left, 196 } 197 continue 198 } 199 } 200 201 newHold := now.Add(dur) 202 cutOff := lastRefreshTime.Add(maxPostponement - maxPostponementBuffer) 203 204 // consider last refresh time and adjust hold duration if needed so it's 205 // not exceeded. 206 if newHold.Before(cutOff) { 207 hold.HoldUntil = newHold 208 } else { 209 hold.HoldUntil = cutOff 210 } 211 212 // finally store/update gating hold data 213 if _, ok := gating[heldSnap]; !ok { 214 gating[heldSnap] = make(map[string]*holdState) 215 } 216 gating[heldSnap][gatingSnap] = hold 217 } 218 219 if len(herr.SnapsInError) > 0 { 220 // if some of the affecting snaps couldn't be held anymore then it 221 // doesn't make sense to hold other affecting snaps (because the gating 222 // snap is going to be disrupted anyway); go over all affectingSnaps 223 // again and remove gating info for them - this also deletes old holdings 224 // (if the hook run on previous refresh attempt) therefore we need to 225 // update snaps-hold state below. 226 for _, heldSnap := range affectingSnaps { 227 delete(gating[heldSnap], gatingSnap) 228 } 229 } 230 st.Set("snaps-hold", gating) 231 if len(herr.SnapsInError) > 0 { 232 return herr 233 } 234 return nil 235 } 236 237 // ProceedWithRefresh unblocks all snaps held by gatingSnap for refresh. This 238 // should be called for --proceed on the gatingSnap. 239 func ProceedWithRefresh(st *state.State, gatingSnap string) error { 240 gating, err := refreshGating(st) 241 if err != nil { 242 return err 243 } 244 if len(gating) == 0 { 245 return nil 246 } 247 248 var changed bool 249 for heldSnap, gatingSnaps := range gating { 250 if _, ok := gatingSnaps[gatingSnap]; ok { 251 delete(gatingSnaps, gatingSnap) 252 changed = true 253 } 254 if len(gatingSnaps) == 0 { 255 delete(gating, heldSnap) 256 } 257 } 258 259 if changed { 260 st.Set("snaps-hold", gating) 261 } 262 return nil 263 } 264 265 // pruneGating removes affecting snaps that are not in candidates (meaning 266 // there is no update for them anymore). 267 func pruneGating(st *state.State, candidates map[string]*refreshCandidate) error { 268 gating, err := refreshGating(st) 269 if err != nil { 270 return err 271 } 272 273 if len(gating) == 0 { 274 return nil 275 } 276 277 var changed bool 278 for affectingSnap := range gating { 279 if candidates[affectingSnap] == nil { 280 // the snap doesn't have an update anymore, forget it 281 delete(gating, affectingSnap) 282 changed = true 283 } 284 } 285 if changed { 286 st.Set("snaps-hold", gating) 287 } 288 return nil 289 } 290 291 // resetGatingForRefreshed resets gating information by removing refreshedSnaps 292 // (they are not held anymore). This should be called for snaps about to be 293 // refreshed. 294 func resetGatingForRefreshed(st *state.State, refreshedSnaps ...string) error { 295 gating, err := refreshGating(st) 296 if err != nil { 297 return err 298 } 299 if len(gating) == 0 { 300 return nil 301 } 302 303 var changed bool 304 for _, snapName := range refreshedSnaps { 305 if _, ok := gating[snapName]; ok { 306 delete(gating, snapName) 307 changed = true 308 } 309 } 310 311 if changed { 312 st.Set("snaps-hold", gating) 313 } 314 return nil 315 } 316 317 // pruneSnapsHold removes the given snap from snaps-hold, whether it was an 318 // affecting snap or gating snap. This should be called when a snap gets 319 // removed. 320 func pruneSnapsHold(st *state.State, snapName string) error { 321 gating, err := refreshGating(st) 322 if err != nil { 323 return err 324 } 325 if len(gating) == 0 { 326 return nil 327 } 328 329 var changed bool 330 331 if _, ok := gating[snapName]; ok { 332 delete(gating, snapName) 333 changed = true 334 } 335 336 for heldSnap, holdingSnaps := range gating { 337 if _, ok := holdingSnaps[snapName]; ok { 338 delete(holdingSnaps, snapName) 339 if len(holdingSnaps) == 0 { 340 delete(gating, heldSnap) 341 } 342 changed = true 343 } 344 } 345 346 if changed { 347 st.Set("snaps-hold", gating) 348 } 349 350 return nil 351 } 352 353 // heldSnaps returns all snaps that are gated and shouldn't be refreshed. 354 func heldSnaps(st *state.State) (map[string]bool, error) { 355 gating, err := refreshGating(st) 356 if err != nil { 357 return nil, err 358 } 359 if len(gating) == 0 { 360 return nil, nil 361 } 362 363 now := timeNow() 364 365 held := make(map[string]bool) 366 Loop: 367 for heldSnap, holdingSnaps := range gating { 368 refreshed, err := lastRefreshed(st, heldSnap) 369 if err != nil { 370 return nil, err 371 } 372 // make sure we don't hold any snap for more than maxPostponement 373 if refreshed.Add(maxPostponement).Before(now) { 374 continue 375 } 376 for _, hold := range holdingSnaps { 377 if hold.HoldUntil.Before(now) { 378 continue 379 } 380 held[heldSnap] = true 381 continue Loop 382 } 383 } 384 return held, nil 385 } 386 387 type AffectedSnapInfo struct { 388 Restart bool 389 Base bool 390 AffectingSnaps map[string]bool 391 } 392 393 // AffectedByRefreshCandidates returns information about all snaps affected by 394 // current refresh-candidates in the state. 395 func AffectedByRefreshCandidates(st *state.State) (map[string]*AffectedSnapInfo, error) { 396 // we care only about the keys so this can use 397 // *json.RawMessage instead of refreshCandidates 398 var candidates map[string]*json.RawMessage 399 if err := st.Get("refresh-candidates", &candidates); err != nil && err != state.ErrNoState { 400 return nil, err 401 } 402 403 snaps := make([]string, 0, len(candidates)) 404 for cand := range candidates { 405 snaps = append(snaps, cand) 406 } 407 affected, err := affectedByRefresh(st, snaps) 408 return affected, err 409 } 410 411 func affectedByRefresh(st *state.State, updates []string) (map[string]*AffectedSnapInfo, error) { 412 allSnaps, err := All(st) 413 if err != nil { 414 return nil, err 415 } 416 snapsWithHook := make(map[string]*SnapState) 417 418 var bootBase string 419 if !release.OnClassic { 420 deviceCtx, err := DeviceCtx(st, nil, nil) 421 if err != nil { 422 return nil, fmt.Errorf("cannot get device context: %v", err) 423 } 424 bootBaseInfo, err := BootBaseInfo(st, deviceCtx) 425 if err != nil { 426 return nil, fmt.Errorf("cannot get boot base info: %v", err) 427 } 428 bootBase = bootBaseInfo.InstanceName() 429 } 430 431 byBase := make(map[string][]string) 432 for name, snapSt := range allSnaps { 433 if !snapSt.Active { 434 delete(allSnaps, name) 435 continue 436 } 437 inf, err := snapSt.CurrentInfo() 438 if err != nil { 439 return nil, err 440 } 441 // optimization: do not consider snaps that don't have gate-auto-refresh hook. 442 if inf.Hooks[gateAutoRefreshHookName] == nil { 443 continue 444 } 445 snapsWithHook[name] = snapSt 446 447 base := inf.Base 448 if base == "none" { 449 continue 450 } 451 if inf.Base == "" { 452 base = "core" 453 } 454 byBase[base] = append(byBase[base], snapSt.InstanceName()) 455 } 456 457 affected := make(map[string]*AffectedSnapInfo) 458 459 addAffected := func(snapName, affectedBy string, restart bool, base bool) { 460 if affected[snapName] == nil { 461 affected[snapName] = &AffectedSnapInfo{ 462 AffectingSnaps: map[string]bool{}, 463 } 464 } 465 affectedInfo := affected[snapName] 466 if restart { 467 affectedInfo.Restart = restart 468 } 469 if base { 470 affectedInfo.Base = base 471 } 472 affectedInfo.AffectingSnaps[affectedBy] = true 473 } 474 475 for _, snapName := range updates { 476 snapSt := allSnaps[snapName] 477 if snapSt == nil { 478 // this could happen if an update for inactive snap was requested (those 479 // are filtered out above). 480 return nil, fmt.Errorf("internal error: no state for snap %q", snapName) 481 } 482 up, err := snapSt.CurrentInfo() 483 if err != nil { 484 return nil, err 485 } 486 487 // the snap affects itself (as long as it has the hook) 488 if snapSt := snapsWithHook[up.InstanceName()]; snapSt != nil { 489 addAffected(up.InstanceName(), up.InstanceName(), false, false) 490 } 491 492 // on core system, affected by update of boot base 493 if bootBase != "" && up.InstanceName() == bootBase { 494 for _, snapSt := range snapsWithHook { 495 addAffected(snapSt.InstanceName(), up.InstanceName(), true, false) 496 } 497 } 498 499 // snaps that can trigger reboot 500 // XXX: gadget refresh doesn't always require reboot, refine this 501 if up.Type() == snap.TypeKernel || up.Type() == snap.TypeGadget { 502 for _, snapSt := range snapsWithHook { 503 addAffected(snapSt.InstanceName(), up.InstanceName(), true, false) 504 } 505 continue 506 } 507 if up.Type() == snap.TypeBase || up.SnapName() == "core" { 508 // affected by refresh of this base snap 509 for _, snapName := range byBase[up.InstanceName()] { 510 addAffected(snapName, up.InstanceName(), false, true) 511 } 512 } 513 514 repo := ifacerepo.Get(st) 515 516 // consider slots provided by refreshed snap, but exclude core and snapd 517 // since they provide system-level slots that are generally not disrupted 518 // by snap updates. 519 if up.SnapType != snap.TypeSnapd && up.SnapName() != "core" { 520 for _, slotInfo := range up.Slots { 521 conns, err := repo.Connected(up.InstanceName(), slotInfo.Name) 522 if err != nil { 523 return nil, err 524 } 525 for _, cref := range conns { 526 // affected only if it wasn't optimized out above 527 if snapsWithHook[cref.PlugRef.Snap] != nil { 528 addAffected(cref.PlugRef.Snap, up.InstanceName(), true, false) 529 } 530 } 531 } 532 } 533 534 // consider mount backend plugs/slots; 535 // for slot side only consider snapd/core because they are ignored by the 536 // earlier loop around slots. 537 if up.SnapType == snap.TypeSnapd || up.SnapType == snap.TypeOS { 538 for _, slotInfo := range up.Slots { 539 iface := repo.Interface(slotInfo.Interface) 540 if iface == nil { 541 return nil, fmt.Errorf("internal error: unknown interface %s", slotInfo.Interface) 542 } 543 if !usesMountBackend(iface) { 544 continue 545 } 546 conns, err := repo.Connected(up.InstanceName(), slotInfo.Name) 547 if err != nil { 548 return nil, err 549 } 550 for _, cref := range conns { 551 if snapsWithHook[cref.PlugRef.Snap] != nil { 552 addAffected(cref.PlugRef.Snap, up.InstanceName(), true, false) 553 } 554 } 555 } 556 } 557 for _, plugInfo := range up.Plugs { 558 iface := repo.Interface(plugInfo.Interface) 559 if iface == nil { 560 return nil, fmt.Errorf("internal error: unknown interface %s", plugInfo.Interface) 561 } 562 if !usesMountBackend(iface) { 563 continue 564 } 565 conns, err := repo.Connected(up.InstanceName(), plugInfo.Name) 566 if err != nil { 567 return nil, err 568 } 569 for _, cref := range conns { 570 if snapsWithHook[cref.SlotRef.Snap] != nil { 571 addAffected(cref.SlotRef.Snap, up.InstanceName(), true, false) 572 } 573 } 574 } 575 } 576 577 return affected, nil 578 } 579 580 // XXX: this is too wide and affects all commonInterface-based interfaces; we 581 // need metadata on the relevant interfaces. 582 func usesMountBackend(iface interfaces.Interface) bool { 583 type definer1 interface { 584 MountConnectedSlot(*mount.Specification, *interfaces.ConnectedPlug, *interfaces.ConnectedSlot) error 585 } 586 type definer2 interface { 587 MountConnectedPlug(*mount.Specification, *interfaces.ConnectedPlug, *interfaces.ConnectedSlot) error 588 } 589 type definer3 interface { 590 MountPermanentPlug(*mount.Specification, *snap.PlugInfo) error 591 } 592 type definer4 interface { 593 MountPermanentSlot(*mount.Specification, *snap.SlotInfo) error 594 } 595 596 if _, ok := iface.(definer1); ok { 597 return true 598 } 599 if _, ok := iface.(definer2); ok { 600 return true 601 } 602 if _, ok := iface.(definer3); ok { 603 return true 604 } 605 if _, ok := iface.(definer4); ok { 606 return true 607 } 608 return false 609 } 610 611 // createGateAutoRefreshHooks creates gate-auto-refresh hooks for all affectedSnaps. 612 // The hooks will have their context data set from affectedSnapInfo flags (base, restart). 613 // Hook tasks will be chained to run sequentially. 614 func createGateAutoRefreshHooks(st *state.State, affectedSnaps map[string]*AffectedSnapInfo) *state.TaskSet { 615 ts := state.NewTaskSet() 616 var prev *state.Task 617 // sort names for easy testing 618 names := make([]string, 0, len(affectedSnaps)) 619 for snapName := range affectedSnaps { 620 names = append(names, snapName) 621 } 622 sort.Strings(names) 623 for _, snapName := range names { 624 affected := affectedSnaps[snapName] 625 hookTask := SetupGateAutoRefreshHook(st, snapName, affected.Base, affected.Restart, affected.AffectingSnaps) 626 // XXX: it should be fine to run the hooks in parallel 627 if prev != nil { 628 hookTask.WaitFor(prev) 629 } 630 ts.AddTask(hookTask) 631 prev = hookTask 632 } 633 return ts 634 } 635 636 func conditionalAutoRefreshAffectedSnaps(t *state.Task) ([]string, error) { 637 var snaps map[string]*json.RawMessage 638 if err := t.Get("snaps", &snaps); err != nil { 639 return nil, fmt.Errorf("internal error: cannot get snaps to update for %s task %s", t.Kind(), t.ID()) 640 } 641 names := make([]string, 0, len(snaps)) 642 for sn := range snaps { 643 // TODO: drop snaps once we know the outcome of gate-auto-refresh hooks. 644 names = append(names, sn) 645 } 646 return names, nil 647 } 648 649 // snapsToRefresh returns all snaps that should proceed with refresh considering 650 // hold behavior. 651 var snapsToRefresh = func(gatingTask *state.Task) ([]*refreshCandidate, error) { 652 var snaps map[string]*refreshCandidate 653 if err := gatingTask.Get("snaps", &snaps); err != nil { 654 return nil, err 655 } 656 657 held, err := heldSnaps(gatingTask.State()) 658 if err != nil { 659 return nil, err 660 } 661 662 var skipped []string 663 var candidates []*refreshCandidate 664 for _, s := range snaps { 665 if !held[s.InstanceName()] { 666 candidates = append(candidates, s) 667 } else { 668 skipped = append(skipped, s.InstanceName()) 669 } 670 } 671 672 if len(skipped) > 0 { 673 sort.Strings(skipped) 674 logger.Noticef("skipping refresh of held snaps: %s", strings.Join(skipped, ",")) 675 } 676 677 return candidates, nil 678 } 679 680 // AutoRefreshForGatingSnap triggers an auto-refresh change for all 681 // snaps held by the given gating snap. This should only be called if the 682 // gate-auto-refresh-hook feature is enabled. 683 // TODO: this should be restricted as it doesn't take refresh timer/refresh hold 684 // into account. 685 func AutoRefreshForGatingSnap(st *state.State, gatingSnap string) error { 686 // ensure nothing is in flight already 687 if autoRefreshInFlight(st) { 688 return fmt.Errorf("there is an auto-refresh in progress") 689 } 690 691 gating, err := refreshGating(st) 692 if err != nil { 693 return err 694 } 695 696 var hasHeld bool 697 for _, holdingSnaps := range gating { 698 if _, ok := holdingSnaps[gatingSnap]; ok { 699 hasHeld = true 700 break 701 } 702 } 703 if !hasHeld { 704 return fmt.Errorf("no snaps are held by snap %q", gatingSnap) 705 } 706 707 // NOTE: this will unlock and re-lock state for network ops 708 // XXX: should we refresh assertions (just call AutoRefresh()?) 709 updated, tasksets, err := autoRefreshPhase1(auth.EnsureContextTODO(), st, gatingSnap) 710 if err != nil { 711 return err 712 } 713 msg := autoRefreshSummary(updated) 714 if msg == "" { 715 logger.Noticef("auto-refresh: all snaps previously held by %q are up-to-date", gatingSnap) 716 return nil 717 } 718 719 // note, we do not update last-refresh timestamp because this auto-refresh 720 // is not treated as a full auto-refresh. 721 722 chg := st.NewChange("auto-refresh", msg) 723 for _, ts := range tasksets { 724 chg.AddAll(ts) 725 } 726 chg.Set("snap-names", updated) 727 chg.Set("api-data", map[string]interface{}{"snap-names": updated}) 728 729 return nil 730 }