github.com/anonymouse64/snapd@v0.0.0-20210824153203-04c4c42d842d/overlord/snapstate/autorefresh_gating.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2021 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package snapstate 21 22 import ( 23 "encoding/json" 24 "fmt" 25 "os" 26 "sort" 27 "strings" 28 "time" 29 30 "github.com/snapcore/snapd/interfaces" 31 "github.com/snapcore/snapd/interfaces/mount" 32 "github.com/snapcore/snapd/logger" 33 "github.com/snapcore/snapd/overlord/auth" 34 "github.com/snapcore/snapd/overlord/ifacestate/ifacerepo" 35 "github.com/snapcore/snapd/overlord/state" 36 "github.com/snapcore/snapd/release" 37 "github.com/snapcore/snapd/snap" 38 ) 39 40 var gateAutoRefreshHookName = "gate-auto-refresh" 41 42 // gateAutoRefreshAction represents the action executed by 43 // snapctl refresh --hold or --proceed and stored in the context of 44 // gate-auto-refresh hook. 45 type GateAutoRefreshAction int 46 47 const ( 48 GateAutoRefreshProceed GateAutoRefreshAction = iota 49 GateAutoRefreshHold 50 ) 51 52 // cumulative hold time for snaps other than self 53 const maxOtherHoldDuration = time.Hour * 48 54 55 var timeNow = func() time.Time { 56 return time.Now() 57 } 58 59 func lastRefreshed(st *state.State, snapName string) (time.Time, error) { 60 var snapst SnapState 61 if err := Get(st, snapName, &snapst); err != nil { 62 return time.Time{}, fmt.Errorf("internal error, cannot get snap %q: %v", snapName, err) 63 } 64 // try to get last refresh time from snapstate, but it may not be present 65 // for snaps installed before the introduction of last-refresh attribute. 66 if snapst.LastRefreshTime != nil { 67 return *snapst.LastRefreshTime, nil 68 } 69 snapInfo, err := snapst.CurrentInfo() 70 if err != nil { 71 return time.Time{}, err 72 } 73 // fall back to the modification time of .snap blob file as it's the best 74 // approximation of last refresh time. 75 fst, err := os.Stat(snapInfo.MountFile()) 76 if err != nil { 77 return time.Time{}, err 78 } 79 return fst.ModTime(), nil 80 } 81 82 type holdState struct { 83 // FirstHeld keeps the time when the given snap was first held for refresh by a gating snap. 84 FirstHeld time.Time `json:"first-held"` 85 // HoldUntil stores the desired end time for holding. 86 HoldUntil time.Time `json:"hold-until"` 87 } 88 89 func refreshGating(st *state.State) (map[string]map[string]*holdState, error) { 90 // held snaps -> holding snap(s) -> first-held/hold-until time 91 var gating map[string]map[string]*holdState 92 err := st.Get("snaps-hold", &gating) 93 if err != nil && err != state.ErrNoState { 94 return nil, fmt.Errorf("internal error: cannot get snaps-hold: %v", err) 95 } 96 if err == state.ErrNoState { 97 return make(map[string]map[string]*holdState), nil 98 } 99 return gating, nil 100 } 101 102 // HoldDurationError contains the that error prevents requested hold, along with 103 // hold time that's left (if any). 104 type HoldDurationError struct { 105 Err error 106 DurationLeft time.Duration 107 } 108 109 func (h *HoldDurationError) Error() string { 110 return h.Err.Error() 111 } 112 113 // HoldError contains the details of snaps that cannot to be held. 114 type HoldError struct { 115 SnapsInError map[string]HoldDurationError 116 } 117 118 func (h *HoldError) Error() string { 119 l := []string{""} 120 for _, e := range h.SnapsInError { 121 l = append(l, e.Error()) 122 } 123 return fmt.Sprintf("cannot hold some snaps:%s", strings.Join(l, "\n - ")) 124 } 125 126 func maxAllowedPostponement(gatingSnap, affectedSnap string, maxPostponement time.Duration) time.Duration { 127 if affectedSnap == gatingSnap { 128 return maxPostponement 129 } 130 return maxOtherHoldDuration 131 } 132 133 // holdDurationLeft computes the maximum duration that's left for holding a refresh 134 // given current time, last refresh time, time when snap was first held, maximum 135 // duration allowed for the given snap and maximum overall postponement allowed by 136 // snapd. 137 func holdDurationLeft(now time.Time, lastRefresh, firstHeld time.Time, maxDuration, maxPostponement time.Duration) time.Duration { 138 d1 := firstHeld.Add(maxDuration).Sub(now) 139 d2 := lastRefresh.Add(maxPostponement).Sub(now) 140 if d1 < d2 { 141 return d1 142 } 143 return d2 144 } 145 146 // HoldRefresh marks affectingSnaps as held for refresh for up to holdTime. 147 // HoldTime of zero denotes maximum allowed hold time. 148 // Holding may fail for only some snaps in which case HoldError is returned and 149 // it contains the details of failed ones. 150 func HoldRefresh(st *state.State, gatingSnap string, holdDuration time.Duration, affectingSnaps ...string) error { 151 gating, err := refreshGating(st) 152 if err != nil { 153 return err 154 } 155 herr := &HoldError{ 156 SnapsInError: make(map[string]HoldDurationError), 157 } 158 now := timeNow() 159 for _, heldSnap := range affectingSnaps { 160 hold, ok := gating[heldSnap][gatingSnap] 161 if !ok { 162 hold = &holdState{ 163 FirstHeld: now, 164 } 165 } 166 167 lastRefreshTime, err := lastRefreshed(st, heldSnap) 168 if err != nil { 169 return err 170 } 171 172 mp := maxPostponement - maxPostponementBuffer 173 maxDur := maxAllowedPostponement(gatingSnap, heldSnap, mp) 174 175 // calculate max hold duration that's left considering previous hold 176 // requests of this snap and last refresh time. 177 left := holdDurationLeft(now, lastRefreshTime, hold.FirstHeld, maxDur, mp) 178 if left <= 0 { 179 herr.SnapsInError[heldSnap] = HoldDurationError{ 180 Err: fmt.Errorf("snap %q cannot hold snap %q anymore, maximum refresh postponement exceeded", gatingSnap, heldSnap), 181 } 182 continue 183 } 184 185 dur := holdDuration 186 if dur == 0 { 187 // duration not specified, using a default one (maximum) or what's 188 // left of it. 189 dur = left 190 } else { 191 // explicit hold duration requested 192 if dur > maxDur { 193 herr.SnapsInError[heldSnap] = HoldDurationError{ 194 Err: fmt.Errorf("requested holding duration for snap %q of %s by snap %q exceeds maximum holding time", heldSnap, holdDuration, gatingSnap), 195 DurationLeft: left, 196 } 197 continue 198 } 199 } 200 201 newHold := now.Add(dur) 202 cutOff := lastRefreshTime.Add(maxPostponement - maxPostponementBuffer) 203 204 // consider last refresh time and adjust hold duration if needed so it's 205 // not exceeded. 206 if newHold.Before(cutOff) { 207 hold.HoldUntil = newHold 208 } else { 209 hold.HoldUntil = cutOff 210 } 211 212 // finally store/update gating hold data 213 if _, ok := gating[heldSnap]; !ok { 214 gating[heldSnap] = make(map[string]*holdState) 215 } 216 gating[heldSnap][gatingSnap] = hold 217 } 218 219 if len(herr.SnapsInError) != len(affectingSnaps) { 220 st.Set("snaps-hold", gating) 221 } 222 if len(herr.SnapsInError) > 0 { 223 return herr 224 } 225 return nil 226 } 227 228 // ProceedWithRefresh unblocks all snaps held by gatingSnap for refresh. This 229 // should be called for --proceed on the gatingSnap. 230 func ProceedWithRefresh(st *state.State, gatingSnap string) error { 231 gating, err := refreshGating(st) 232 if err != nil { 233 return err 234 } 235 if len(gating) == 0 { 236 return nil 237 } 238 239 var changed bool 240 for heldSnap, gatingSnaps := range gating { 241 if _, ok := gatingSnaps[gatingSnap]; ok { 242 delete(gatingSnaps, gatingSnap) 243 changed = true 244 } 245 if len(gatingSnaps) == 0 { 246 delete(gating, heldSnap) 247 } 248 } 249 250 if changed { 251 st.Set("snaps-hold", gating) 252 } 253 return nil 254 } 255 256 // pruneGating removes affecting snaps that are not in candidates (meaning 257 // there is no update for them anymore). 258 func pruneGating(st *state.State, candidates map[string]*refreshCandidate) error { 259 gating, err := refreshGating(st) 260 if err != nil { 261 return err 262 } 263 264 if len(gating) == 0 { 265 return nil 266 } 267 268 var changed bool 269 for affectingSnap := range gating { 270 if candidates[affectingSnap] == nil { 271 // the snap doesn't have an update anymore, forget it 272 delete(gating, affectingSnap) 273 changed = true 274 } 275 } 276 if changed { 277 st.Set("snaps-hold", gating) 278 } 279 return nil 280 } 281 282 // resetGatingForRefreshed resets gating information by removing refreshedSnaps 283 // (they are not held anymore). This should be called for snaps about to be 284 // refreshed. 285 func resetGatingForRefreshed(st *state.State, refreshedSnaps ...string) error { 286 gating, err := refreshGating(st) 287 if err != nil { 288 return err 289 } 290 if len(gating) == 0 { 291 return nil 292 } 293 294 var changed bool 295 for _, snapName := range refreshedSnaps { 296 if _, ok := gating[snapName]; ok { 297 delete(gating, snapName) 298 changed = true 299 } 300 } 301 302 if changed { 303 st.Set("snaps-hold", gating) 304 } 305 return nil 306 } 307 308 // pruneSnapsHold removes the given snap from snaps-hold, whether it was an 309 // affecting snap or gating snap. This should be called when a snap gets 310 // removed. 311 func pruneSnapsHold(st *state.State, snapName string) error { 312 gating, err := refreshGating(st) 313 if err != nil { 314 return err 315 } 316 if len(gating) == 0 { 317 return nil 318 } 319 320 var changed bool 321 322 if _, ok := gating[snapName]; ok { 323 delete(gating, snapName) 324 changed = true 325 } 326 327 for heldSnap, holdingSnaps := range gating { 328 if _, ok := holdingSnaps[snapName]; ok { 329 delete(holdingSnaps, snapName) 330 if len(holdingSnaps) == 0 { 331 delete(gating, heldSnap) 332 } 333 changed = true 334 } 335 } 336 337 if changed { 338 st.Set("snaps-hold", gating) 339 } 340 341 return nil 342 } 343 344 // heldSnaps returns all snaps that are gated and shouldn't be refreshed. 345 func heldSnaps(st *state.State) (map[string]bool, error) { 346 gating, err := refreshGating(st) 347 if err != nil { 348 return nil, err 349 } 350 if len(gating) == 0 { 351 return nil, nil 352 } 353 354 now := timeNow() 355 356 held := make(map[string]bool) 357 Loop: 358 for heldSnap, holdingSnaps := range gating { 359 refreshed, err := lastRefreshed(st, heldSnap) 360 if err != nil { 361 return nil, err 362 } 363 // make sure we don't hold any snap for more than maxPostponement 364 if refreshed.Add(maxPostponement).Before(now) { 365 continue 366 } 367 for _, hold := range holdingSnaps { 368 if hold.HoldUntil.Before(now) { 369 continue 370 } 371 held[heldSnap] = true 372 continue Loop 373 } 374 } 375 return held, nil 376 } 377 378 type AffectedSnapInfo struct { 379 Restart bool 380 Base bool 381 AffectingSnaps map[string]bool 382 } 383 384 // AffectedByRefreshCandidates returns information about all snaps affected by 385 // current refresh-candidates in the state. 386 func AffectedByRefreshCandidates(st *state.State) (map[string]*AffectedSnapInfo, error) { 387 // we care only about the keys so this can use 388 // *json.RawMessage instead of refreshCandidates 389 var candidates map[string]*json.RawMessage 390 if err := st.Get("refresh-candidates", &candidates); err != nil && err != state.ErrNoState { 391 return nil, err 392 } 393 394 snaps := make([]string, 0, len(candidates)) 395 for cand := range candidates { 396 snaps = append(snaps, cand) 397 } 398 affected, err := affectedByRefresh(st, snaps) 399 return affected, err 400 } 401 402 func affectedByRefresh(st *state.State, updates []string) (map[string]*AffectedSnapInfo, error) { 403 allSnaps, err := All(st) 404 if err != nil { 405 return nil, err 406 } 407 snapsWithHook := make(map[string]*SnapState) 408 409 var bootBase string 410 if !release.OnClassic { 411 deviceCtx, err := DeviceCtx(st, nil, nil) 412 if err != nil { 413 return nil, fmt.Errorf("cannot get device context: %v", err) 414 } 415 bootBaseInfo, err := BootBaseInfo(st, deviceCtx) 416 if err != nil { 417 return nil, fmt.Errorf("cannot get boot base info: %v", err) 418 } 419 bootBase = bootBaseInfo.InstanceName() 420 } 421 422 byBase := make(map[string][]string) 423 for name, snapSt := range allSnaps { 424 if !snapSt.Active { 425 delete(allSnaps, name) 426 continue 427 } 428 inf, err := snapSt.CurrentInfo() 429 if err != nil { 430 return nil, err 431 } 432 // optimization: do not consider snaps that don't have gate-auto-refresh hook. 433 if inf.Hooks[gateAutoRefreshHookName] == nil { 434 continue 435 } 436 snapsWithHook[name] = snapSt 437 438 base := inf.Base 439 if base == "none" { 440 continue 441 } 442 if inf.Base == "" { 443 base = "core" 444 } 445 byBase[base] = append(byBase[base], snapSt.InstanceName()) 446 } 447 448 affected := make(map[string]*AffectedSnapInfo) 449 450 addAffected := func(snapName, affectedBy string, restart bool, base bool) { 451 if affected[snapName] == nil { 452 affected[snapName] = &AffectedSnapInfo{ 453 AffectingSnaps: map[string]bool{}, 454 } 455 } 456 affectedInfo := affected[snapName] 457 if restart { 458 affectedInfo.Restart = restart 459 } 460 if base { 461 affectedInfo.Base = base 462 } 463 affectedInfo.AffectingSnaps[affectedBy] = true 464 } 465 466 for _, snapName := range updates { 467 snapSt := allSnaps[snapName] 468 if snapSt == nil { 469 // this could happen if an update for inactive snap was requested (those 470 // are filtered out above). 471 return nil, fmt.Errorf("internal error: no state for snap %q", snapName) 472 } 473 up, err := snapSt.CurrentInfo() 474 if err != nil { 475 return nil, err 476 } 477 478 // the snap affects itself (as long as it has the hook) 479 if snapSt := snapsWithHook[up.InstanceName()]; snapSt != nil { 480 addAffected(up.InstanceName(), up.InstanceName(), false, false) 481 } 482 483 // on core system, affected by update of boot base 484 if bootBase != "" && up.InstanceName() == bootBase { 485 for _, snapSt := range snapsWithHook { 486 addAffected(snapSt.InstanceName(), up.InstanceName(), true, false) 487 } 488 } 489 490 // snaps that can trigger reboot 491 // XXX: gadget refresh doesn't always require reboot, refine this 492 if up.Type() == snap.TypeKernel || up.Type() == snap.TypeGadget { 493 for _, snapSt := range snapsWithHook { 494 addAffected(snapSt.InstanceName(), up.InstanceName(), true, false) 495 } 496 continue 497 } 498 if up.Type() == snap.TypeBase || up.SnapName() == "core" { 499 // affected by refresh of this base snap 500 for _, snapName := range byBase[up.InstanceName()] { 501 addAffected(snapName, up.InstanceName(), false, true) 502 } 503 } 504 505 repo := ifacerepo.Get(st) 506 507 // consider slots provided by refreshed snap, but exclude core and snapd 508 // since they provide system-level slots that are generally not disrupted 509 // by snap updates. 510 if up.SnapType != snap.TypeSnapd && up.SnapName() != "core" { 511 for _, slotInfo := range up.Slots { 512 conns, err := repo.Connected(up.InstanceName(), slotInfo.Name) 513 if err != nil { 514 return nil, err 515 } 516 for _, cref := range conns { 517 // affected only if it wasn't optimized out above 518 if snapsWithHook[cref.PlugRef.Snap] != nil { 519 addAffected(cref.PlugRef.Snap, up.InstanceName(), true, false) 520 } 521 } 522 } 523 } 524 525 // consider mount backend plugs/slots; 526 // for slot side only consider snapd/core because they are ignored by the 527 // earlier loop around slots. 528 if up.SnapType == snap.TypeSnapd || up.SnapType == snap.TypeOS { 529 for _, slotInfo := range up.Slots { 530 iface := repo.Interface(slotInfo.Interface) 531 if iface == nil { 532 return nil, fmt.Errorf("internal error: unknown interface %s", slotInfo.Interface) 533 } 534 if !usesMountBackend(iface) { 535 continue 536 } 537 conns, err := repo.Connected(up.InstanceName(), slotInfo.Name) 538 if err != nil { 539 return nil, err 540 } 541 for _, cref := range conns { 542 if snapsWithHook[cref.PlugRef.Snap] != nil { 543 addAffected(cref.PlugRef.Snap, up.InstanceName(), true, false) 544 } 545 } 546 } 547 } 548 for _, plugInfo := range up.Plugs { 549 iface := repo.Interface(plugInfo.Interface) 550 if iface == nil { 551 return nil, fmt.Errorf("internal error: unknown interface %s", plugInfo.Interface) 552 } 553 if !usesMountBackend(iface) { 554 continue 555 } 556 conns, err := repo.Connected(up.InstanceName(), plugInfo.Name) 557 if err != nil { 558 return nil, err 559 } 560 for _, cref := range conns { 561 if snapsWithHook[cref.SlotRef.Snap] != nil { 562 addAffected(cref.SlotRef.Snap, up.InstanceName(), true, false) 563 } 564 } 565 } 566 } 567 568 return affected, nil 569 } 570 571 // XXX: this is too wide and affects all commonInterface-based interfaces; we 572 // need metadata on the relevant interfaces. 573 func usesMountBackend(iface interfaces.Interface) bool { 574 type definer1 interface { 575 MountConnectedSlot(*mount.Specification, *interfaces.ConnectedPlug, *interfaces.ConnectedSlot) error 576 } 577 type definer2 interface { 578 MountConnectedPlug(*mount.Specification, *interfaces.ConnectedPlug, *interfaces.ConnectedSlot) error 579 } 580 type definer3 interface { 581 MountPermanentPlug(*mount.Specification, *snap.PlugInfo) error 582 } 583 type definer4 interface { 584 MountPermanentSlot(*mount.Specification, *snap.SlotInfo) error 585 } 586 587 if _, ok := iface.(definer1); ok { 588 return true 589 } 590 if _, ok := iface.(definer2); ok { 591 return true 592 } 593 if _, ok := iface.(definer3); ok { 594 return true 595 } 596 if _, ok := iface.(definer4); ok { 597 return true 598 } 599 return false 600 } 601 602 // createGateAutoRefreshHooks creates gate-auto-refresh hooks for all affectedSnaps. 603 // The hooks will have their context data set from affectedSnapInfo flags (base, restart). 604 // Hook tasks will be chained to run sequentially. 605 func createGateAutoRefreshHooks(st *state.State, affectedSnaps map[string]*AffectedSnapInfo) *state.TaskSet { 606 ts := state.NewTaskSet() 607 var prev *state.Task 608 // sort names for easy testing 609 names := make([]string, 0, len(affectedSnaps)) 610 for snapName := range affectedSnaps { 611 names = append(names, snapName) 612 } 613 sort.Strings(names) 614 for _, snapName := range names { 615 affected := affectedSnaps[snapName] 616 hookTask := SetupGateAutoRefreshHook(st, snapName, affected.Base, affected.Restart, affected.AffectingSnaps) 617 // XXX: it should be fine to run the hooks in parallel 618 if prev != nil { 619 hookTask.WaitFor(prev) 620 } 621 ts.AddTask(hookTask) 622 prev = hookTask 623 } 624 return ts 625 } 626 627 func conditionalAutoRefreshAffectedSnaps(t *state.Task) ([]string, error) { 628 var snaps map[string]*json.RawMessage 629 if err := t.Get("snaps", &snaps); err != nil { 630 return nil, fmt.Errorf("internal error: cannot get snaps to update for %s task %s", t.Kind(), t.ID()) 631 } 632 names := make([]string, 0, len(snaps)) 633 for sn := range snaps { 634 // TODO: drop snaps once we know the outcome of gate-auto-refresh hooks. 635 names = append(names, sn) 636 } 637 return names, nil 638 } 639 640 // snapsToRefresh returns all snaps that should proceed with refresh considering 641 // hold behavior. 642 var snapsToRefresh = func(gatingTask *state.Task) ([]*refreshCandidate, error) { 643 var snaps map[string]*refreshCandidate 644 if err := gatingTask.Get("snaps", &snaps); err != nil { 645 return nil, err 646 } 647 648 held, err := heldSnaps(gatingTask.State()) 649 if err != nil { 650 return nil, err 651 } 652 653 var skipped []string 654 var candidates []*refreshCandidate 655 for _, s := range snaps { 656 if !held[s.InstanceName()] { 657 candidates = append(candidates, s) 658 } else { 659 skipped = append(skipped, s.InstanceName()) 660 } 661 } 662 663 if len(skipped) > 0 { 664 sort.Strings(skipped) 665 logger.Noticef("skipping refresh of held snaps: %s", strings.Join(skipped, ",")) 666 } 667 668 return candidates, nil 669 } 670 671 // AutoRefreshForGatingSnap triggers an auto-refresh change for all 672 // snaps held by the given gating snap. This should only be called if the 673 // gate-auto-refresh-hook feature is enabled. 674 // TODO: this should be restricted as it doesn't take refresh timer/refresh hold 675 // into account. 676 func AutoRefreshForGatingSnap(st *state.State, gatingSnap string) error { 677 // ensure nothing is in flight already 678 if autoRefreshInFlight(st) { 679 return fmt.Errorf("there is an auto-refresh in progress") 680 } 681 682 gating, err := refreshGating(st) 683 if err != nil { 684 return err 685 } 686 687 var hasHeld bool 688 for _, holdingSnaps := range gating { 689 if _, ok := holdingSnaps[gatingSnap]; ok { 690 hasHeld = true 691 break 692 } 693 } 694 if !hasHeld { 695 return fmt.Errorf("no snaps are held by snap %q", gatingSnap) 696 } 697 698 // NOTE: this will unlock and re-lock state for network ops 699 // XXX: should we refresh assertions (just call AutoRefresh()?) 700 updated, tasksets, err := autoRefreshPhase1(auth.EnsureContextTODO(), st, gatingSnap) 701 if err != nil { 702 return err 703 } 704 msg := autoRefreshSummary(updated) 705 if msg == "" { 706 logger.Noticef("auto-refresh: all snaps previously held by %q are up-to-date", gatingSnap) 707 return nil 708 } 709 710 // note, we do not update last-refresh timestamp because this auto-refresh 711 // is not treated as a full auto-refresh. 712 713 chg := st.NewChange("auto-refresh", msg) 714 for _, ts := range tasksets { 715 chg.AddAll(ts) 716 } 717 chg.Set("snap-names", updated) 718 chg.Set("api-data", map[string]interface{}{"snap-names": updated}) 719 720 return nil 721 }