github.com/ubuntu-core/snappy@v0.0.0-20210827154228-9e584df982bb/overlord/snapstate/autorefresh_gating.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2021 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package snapstate 21 22 import ( 23 "encoding/json" 24 "fmt" 25 "os" 26 "sort" 27 "strings" 28 "time" 29 30 "github.com/snapcore/snapd/interfaces" 31 "github.com/snapcore/snapd/logger" 32 "github.com/snapcore/snapd/overlord/auth" 33 "github.com/snapcore/snapd/overlord/ifacestate/ifacerepo" 34 "github.com/snapcore/snapd/overlord/state" 35 "github.com/snapcore/snapd/release" 36 "github.com/snapcore/snapd/snap" 37 ) 38 39 var gateAutoRefreshHookName = "gate-auto-refresh" 40 41 // gateAutoRefreshAction represents the action executed by 42 // snapctl refresh --hold or --proceed and stored in the context of 43 // gate-auto-refresh hook. 44 type GateAutoRefreshAction int 45 46 const ( 47 GateAutoRefreshProceed GateAutoRefreshAction = iota 48 GateAutoRefreshHold 49 ) 50 51 // cumulative hold time for snaps other than self 52 const maxOtherHoldDuration = time.Hour * 48 53 54 var timeNow = func() time.Time { 55 return time.Now() 56 } 57 58 func lastRefreshed(st *state.State, snapName string) (time.Time, error) { 59 var snapst SnapState 60 if err := Get(st, snapName, &snapst); err != nil { 61 return time.Time{}, fmt.Errorf("internal error, cannot get snap %q: %v", snapName, err) 62 } 63 // try to get last refresh time from snapstate, but it may not be present 64 // for snaps installed before the introduction of last-refresh attribute. 65 if snapst.LastRefreshTime != nil { 66 return *snapst.LastRefreshTime, nil 67 } 68 snapInfo, err := snapst.CurrentInfo() 69 if err != nil { 70 return time.Time{}, err 71 } 72 // fall back to the modification time of .snap blob file as it's the best 73 // approximation of last refresh time. 74 fst, err := os.Stat(snapInfo.MountFile()) 75 if err != nil { 76 return time.Time{}, err 77 } 78 return fst.ModTime(), nil 79 } 80 81 type holdState struct { 82 // FirstHeld keeps the time when the given snap was first held for refresh by a gating snap. 83 FirstHeld time.Time `json:"first-held"` 84 // HoldUntil stores the desired end time for holding. 85 HoldUntil time.Time `json:"hold-until"` 86 } 87 88 func refreshGating(st *state.State) (map[string]map[string]*holdState, error) { 89 // held snaps -> holding snap(s) -> first-held/hold-until time 90 var gating map[string]map[string]*holdState 91 err := st.Get("snaps-hold", &gating) 92 if err != nil && err != state.ErrNoState { 93 return nil, fmt.Errorf("internal error: cannot get snaps-hold: %v", err) 94 } 95 if err == state.ErrNoState { 96 return make(map[string]map[string]*holdState), nil 97 } 98 return gating, nil 99 } 100 101 // HoldDurationError contains the that error prevents requested hold, along with 102 // hold time that's left (if any). 103 type HoldDurationError struct { 104 Err error 105 DurationLeft time.Duration 106 } 107 108 func (h *HoldDurationError) Error() string { 109 return h.Err.Error() 110 } 111 112 // HoldError contains the details of snaps that cannot to be held. 113 type HoldError struct { 114 SnapsInError map[string]HoldDurationError 115 } 116 117 func (h *HoldError) Error() string { 118 l := []string{""} 119 for _, e := range h.SnapsInError { 120 l = append(l, e.Error()) 121 } 122 return fmt.Sprintf("cannot hold some snaps:%s", strings.Join(l, "\n - ")) 123 } 124 125 func maxAllowedPostponement(gatingSnap, affectedSnap string, maxPostponement time.Duration) time.Duration { 126 if affectedSnap == gatingSnap { 127 return maxPostponement 128 } 129 return maxOtherHoldDuration 130 } 131 132 // holdDurationLeft computes the maximum duration that's left for holding a refresh 133 // given current time, last refresh time, time when snap was first held, maximum 134 // duration allowed for the given snap and maximum overall postponement allowed by 135 // snapd. 136 func holdDurationLeft(now time.Time, lastRefresh, firstHeld time.Time, maxDuration, maxPostponement time.Duration) time.Duration { 137 d1 := firstHeld.Add(maxDuration).Sub(now) 138 d2 := lastRefresh.Add(maxPostponement).Sub(now) 139 if d1 < d2 { 140 return d1 141 } 142 return d2 143 } 144 145 // HoldRefresh marks affectingSnaps as held for refresh for up to holdTime. 146 // HoldTime of zero denotes maximum allowed hold time. 147 // Holding may fail for only some snaps in which case HoldError is returned and 148 // it contains the details of failed ones. 149 func HoldRefresh(st *state.State, gatingSnap string, holdDuration time.Duration, affectingSnaps ...string) error { 150 gating, err := refreshGating(st) 151 if err != nil { 152 return err 153 } 154 herr := &HoldError{ 155 SnapsInError: make(map[string]HoldDurationError), 156 } 157 now := timeNow() 158 for _, heldSnap := range affectingSnaps { 159 hold, ok := gating[heldSnap][gatingSnap] 160 if !ok { 161 hold = &holdState{ 162 FirstHeld: now, 163 } 164 } 165 166 lastRefreshTime, err := lastRefreshed(st, heldSnap) 167 if err != nil { 168 return err 169 } 170 171 mp := maxPostponement - maxPostponementBuffer 172 maxDur := maxAllowedPostponement(gatingSnap, heldSnap, mp) 173 174 // calculate max hold duration that's left considering previous hold 175 // requests of this snap and last refresh time. 176 left := holdDurationLeft(now, lastRefreshTime, hold.FirstHeld, maxDur, mp) 177 if left <= 0 { 178 herr.SnapsInError[heldSnap] = HoldDurationError{ 179 Err: fmt.Errorf("snap %q cannot hold snap %q anymore, maximum refresh postponement exceeded", gatingSnap, heldSnap), 180 } 181 continue 182 } 183 184 dur := holdDuration 185 if dur == 0 { 186 // duration not specified, using a default one (maximum) or what's 187 // left of it. 188 dur = left 189 } else { 190 // explicit hold duration requested 191 if dur > maxDur { 192 herr.SnapsInError[heldSnap] = HoldDurationError{ 193 Err: fmt.Errorf("requested holding duration for snap %q of %s by snap %q exceeds maximum holding time", heldSnap, holdDuration, gatingSnap), 194 DurationLeft: left, 195 } 196 continue 197 } 198 } 199 200 newHold := now.Add(dur) 201 cutOff := lastRefreshTime.Add(maxPostponement - maxPostponementBuffer) 202 203 // consider last refresh time and adjust hold duration if needed so it's 204 // not exceeded. 205 if newHold.Before(cutOff) { 206 hold.HoldUntil = newHold 207 } else { 208 hold.HoldUntil = cutOff 209 } 210 211 // finally store/update gating hold data 212 if _, ok := gating[heldSnap]; !ok { 213 gating[heldSnap] = make(map[string]*holdState) 214 } 215 gating[heldSnap][gatingSnap] = hold 216 } 217 218 if len(herr.SnapsInError) > 0 { 219 // if some of the affecting snaps couldn't be held anymore then it 220 // doesn't make sense to hold other affecting snaps (because the gating 221 // snap is going to be disrupted anyway); go over all affectingSnaps 222 // again and remove gating info for them - this also deletes old holdings 223 // (if the hook run on previous refresh attempt) therefore we need to 224 // update snaps-hold state below. 225 for _, heldSnap := range affectingSnaps { 226 delete(gating[heldSnap], gatingSnap) 227 } 228 } 229 st.Set("snaps-hold", gating) 230 if len(herr.SnapsInError) > 0 { 231 return herr 232 } 233 return nil 234 } 235 236 // ProceedWithRefresh unblocks all snaps held by gatingSnap for refresh. This 237 // should be called for --proceed on the gatingSnap. 238 func ProceedWithRefresh(st *state.State, gatingSnap string) error { 239 gating, err := refreshGating(st) 240 if err != nil { 241 return err 242 } 243 if len(gating) == 0 { 244 return nil 245 } 246 247 var changed bool 248 for heldSnap, gatingSnaps := range gating { 249 if _, ok := gatingSnaps[gatingSnap]; ok { 250 delete(gatingSnaps, gatingSnap) 251 changed = true 252 } 253 if len(gatingSnaps) == 0 { 254 delete(gating, heldSnap) 255 } 256 } 257 258 if changed { 259 st.Set("snaps-hold", gating) 260 } 261 return nil 262 } 263 264 // pruneGating removes affecting snaps that are not in candidates (meaning 265 // there is no update for them anymore). 266 func pruneGating(st *state.State, candidates map[string]*refreshCandidate) error { 267 gating, err := refreshGating(st) 268 if err != nil { 269 return err 270 } 271 272 if len(gating) == 0 { 273 return nil 274 } 275 276 var changed bool 277 for affectingSnap := range gating { 278 if candidates[affectingSnap] == nil { 279 // the snap doesn't have an update anymore, forget it 280 delete(gating, affectingSnap) 281 changed = true 282 } 283 } 284 if changed { 285 st.Set("snaps-hold", gating) 286 } 287 return nil 288 } 289 290 // resetGatingForRefreshed resets gating information by removing refreshedSnaps 291 // (they are not held anymore). This should be called for snaps about to be 292 // refreshed. 293 func resetGatingForRefreshed(st *state.State, refreshedSnaps ...string) error { 294 gating, err := refreshGating(st) 295 if err != nil { 296 return err 297 } 298 if len(gating) == 0 { 299 return nil 300 } 301 302 var changed bool 303 for _, snapName := range refreshedSnaps { 304 if _, ok := gating[snapName]; ok { 305 delete(gating, snapName) 306 changed = true 307 } 308 } 309 310 if changed { 311 st.Set("snaps-hold", gating) 312 } 313 return nil 314 } 315 316 // pruneSnapsHold removes the given snap from snaps-hold, whether it was an 317 // affecting snap or gating snap. This should be called when a snap gets 318 // removed. 319 func pruneSnapsHold(st *state.State, snapName string) error { 320 gating, err := refreshGating(st) 321 if err != nil { 322 return err 323 } 324 if len(gating) == 0 { 325 return nil 326 } 327 328 var changed bool 329 330 if _, ok := gating[snapName]; ok { 331 delete(gating, snapName) 332 changed = true 333 } 334 335 for heldSnap, holdingSnaps := range gating { 336 if _, ok := holdingSnaps[snapName]; ok { 337 delete(holdingSnaps, snapName) 338 if len(holdingSnaps) == 0 { 339 delete(gating, heldSnap) 340 } 341 changed = true 342 } 343 } 344 345 if changed { 346 st.Set("snaps-hold", gating) 347 } 348 349 return nil 350 } 351 352 // heldSnaps returns all snaps that are gated and shouldn't be refreshed. 353 func heldSnaps(st *state.State) (map[string]bool, error) { 354 gating, err := refreshGating(st) 355 if err != nil { 356 return nil, err 357 } 358 if len(gating) == 0 { 359 return nil, nil 360 } 361 362 now := timeNow() 363 364 held := make(map[string]bool) 365 Loop: 366 for heldSnap, holdingSnaps := range gating { 367 refreshed, err := lastRefreshed(st, heldSnap) 368 if err != nil { 369 return nil, err 370 } 371 // make sure we don't hold any snap for more than maxPostponement 372 if refreshed.Add(maxPostponement).Before(now) { 373 continue 374 } 375 for _, hold := range holdingSnaps { 376 if hold.HoldUntil.Before(now) { 377 continue 378 } 379 held[heldSnap] = true 380 continue Loop 381 } 382 } 383 return held, nil 384 } 385 386 type AffectedSnapInfo struct { 387 Restart bool 388 Base bool 389 AffectingSnaps map[string]bool 390 } 391 392 // AffectedByRefreshCandidates returns information about all snaps affected by 393 // current refresh-candidates in the state. 394 func AffectedByRefreshCandidates(st *state.State) (map[string]*AffectedSnapInfo, error) { 395 // we care only about the keys so this can use 396 // *json.RawMessage instead of refreshCandidates 397 var candidates map[string]*json.RawMessage 398 if err := st.Get("refresh-candidates", &candidates); err != nil && err != state.ErrNoState { 399 return nil, err 400 } 401 402 snaps := make([]string, 0, len(candidates)) 403 for cand := range candidates { 404 snaps = append(snaps, cand) 405 } 406 affected, err := affectedByRefresh(st, snaps) 407 return affected, err 408 } 409 410 // AffectingSnapsForAffectedByRefreshCandidates returns the list of all snaps 411 // affecting affectedSnap (i.e. a gating snap), based on upcoming updates 412 // from refresh-candidates. 413 func AffectingSnapsForAffectedByRefreshCandidates(st *state.State, affectedSnap string) ([]string, error) { 414 affected, err := AffectedByRefreshCandidates(st) 415 if err != nil { 416 return nil, err 417 } 418 affectedInfo := affected[affectedSnap] 419 if affectedInfo == nil || len(affectedInfo.AffectingSnaps) == 0 { 420 return nil, nil 421 } 422 affecting := make([]string, 0, len(affectedInfo.AffectingSnaps)) 423 for sn := range affectedInfo.AffectingSnaps { 424 affecting = append(affecting, sn) 425 } 426 sort.Strings(affecting) 427 return affecting, nil 428 } 429 430 func affectedByRefresh(st *state.State, updates []string) (map[string]*AffectedSnapInfo, error) { 431 allSnaps, err := All(st) 432 if err != nil { 433 return nil, err 434 } 435 snapsWithHook := make(map[string]*SnapState) 436 437 var bootBase string 438 if !release.OnClassic { 439 deviceCtx, err := DeviceCtx(st, nil, nil) 440 if err != nil { 441 return nil, fmt.Errorf("cannot get device context: %v", err) 442 } 443 bootBaseInfo, err := BootBaseInfo(st, deviceCtx) 444 if err != nil { 445 return nil, fmt.Errorf("cannot get boot base info: %v", err) 446 } 447 bootBase = bootBaseInfo.InstanceName() 448 } 449 450 byBase := make(map[string][]string) 451 for name, snapSt := range allSnaps { 452 if !snapSt.Active { 453 delete(allSnaps, name) 454 continue 455 } 456 inf, err := snapSt.CurrentInfo() 457 if err != nil { 458 return nil, err 459 } 460 // optimization: do not consider snaps that don't have gate-auto-refresh hook. 461 if inf.Hooks[gateAutoRefreshHookName] == nil { 462 continue 463 } 464 snapsWithHook[name] = snapSt 465 466 base := inf.Base 467 if base == "none" { 468 continue 469 } 470 if inf.Base == "" { 471 base = "core" 472 } 473 byBase[base] = append(byBase[base], snapSt.InstanceName()) 474 } 475 476 affected := make(map[string]*AffectedSnapInfo) 477 478 addAffected := func(snapName, affectedBy string, restart bool, base bool) { 479 if affected[snapName] == nil { 480 affected[snapName] = &AffectedSnapInfo{ 481 AffectingSnaps: map[string]bool{}, 482 } 483 } 484 affectedInfo := affected[snapName] 485 if restart { 486 affectedInfo.Restart = restart 487 } 488 if base { 489 affectedInfo.Base = base 490 } 491 affectedInfo.AffectingSnaps[affectedBy] = true 492 } 493 494 for _, snapName := range updates { 495 snapSt := allSnaps[snapName] 496 if snapSt == nil { 497 // this could happen if an update for inactive snap was requested (those 498 // are filtered out above). 499 return nil, fmt.Errorf("internal error: no state for snap %q", snapName) 500 } 501 up, err := snapSt.CurrentInfo() 502 if err != nil { 503 return nil, err 504 } 505 506 // the snap affects itself (as long as it has the hook) 507 if snapSt := snapsWithHook[up.InstanceName()]; snapSt != nil { 508 addAffected(up.InstanceName(), up.InstanceName(), false, false) 509 } 510 511 // on core system, affected by update of boot base 512 if bootBase != "" && up.InstanceName() == bootBase { 513 for _, snapSt := range snapsWithHook { 514 addAffected(snapSt.InstanceName(), up.InstanceName(), true, false) 515 } 516 } 517 518 // snaps that can trigger reboot 519 // XXX: gadget refresh doesn't always require reboot, refine this 520 if up.Type() == snap.TypeKernel || up.Type() == snap.TypeGadget { 521 for _, snapSt := range snapsWithHook { 522 addAffected(snapSt.InstanceName(), up.InstanceName(), true, false) 523 } 524 continue 525 } 526 if up.Type() == snap.TypeBase || up.SnapName() == "core" { 527 // affected by refresh of this base snap 528 for _, snapName := range byBase[up.InstanceName()] { 529 addAffected(snapName, up.InstanceName(), false, true) 530 } 531 } 532 533 repo := ifacerepo.Get(st) 534 535 // consider slots provided by refreshed snap, but exclude core and snapd 536 // since they provide system-level slots that are generally not disrupted 537 // by snap updates. 538 if up.SnapType != snap.TypeSnapd && up.SnapName() != "core" { 539 for _, slotInfo := range up.Slots { 540 conns, err := repo.Connected(up.InstanceName(), slotInfo.Name) 541 if err != nil { 542 return nil, err 543 } 544 for _, cref := range conns { 545 // affected only if it wasn't optimized out above 546 if snapsWithHook[cref.PlugRef.Snap] != nil { 547 addAffected(cref.PlugRef.Snap, up.InstanceName(), true, false) 548 } 549 } 550 } 551 } 552 553 // consider plugs/slots with AffectsPlugOnRefresh flag; 554 // for slot side only consider snapd/core because they are ignored by the 555 // earlier loop around slots. 556 if up.SnapType == snap.TypeSnapd || up.SnapType == snap.TypeOS { 557 for _, slotInfo := range up.Slots { 558 iface := repo.Interface(slotInfo.Interface) 559 if iface == nil { 560 return nil, fmt.Errorf("internal error: unknown interface %s", slotInfo.Interface) 561 } 562 si := interfaces.StaticInfoOf(iface) 563 if !si.AffectsPlugOnRefresh { 564 continue 565 } 566 conns, err := repo.Connected(up.InstanceName(), slotInfo.Name) 567 if err != nil { 568 return nil, err 569 } 570 for _, cref := range conns { 571 if snapsWithHook[cref.PlugRef.Snap] != nil { 572 addAffected(cref.PlugRef.Snap, up.InstanceName(), true, false) 573 } 574 } 575 } 576 } 577 } 578 579 return affected, nil 580 } 581 582 // createGateAutoRefreshHooks creates gate-auto-refresh hooks for all affectedSnaps. 583 // Hook tasks will be chained to run sequentially. 584 func createGateAutoRefreshHooks(st *state.State, affectedSnaps []string) *state.TaskSet { 585 ts := state.NewTaskSet() 586 var prev *state.Task 587 for _, snapName := range affectedSnaps { 588 hookTask := SetupGateAutoRefreshHook(st, snapName) 589 // XXX: it should be fine to run the hooks in parallel 590 if prev != nil { 591 hookTask.WaitFor(prev) 592 } 593 ts.AddTask(hookTask) 594 prev = hookTask 595 } 596 return ts 597 } 598 599 func conditionalAutoRefreshAffectedSnaps(t *state.Task) ([]string, error) { 600 var snaps map[string]*json.RawMessage 601 if err := t.Get("snaps", &snaps); err != nil { 602 return nil, fmt.Errorf("internal error: cannot get snaps to update for %s task %s", t.Kind(), t.ID()) 603 } 604 names := make([]string, 0, len(snaps)) 605 for sn := range snaps { 606 // TODO: drop snaps once we know the outcome of gate-auto-refresh hooks. 607 names = append(names, sn) 608 } 609 return names, nil 610 } 611 612 // snapsToRefresh returns all snaps that should proceed with refresh considering 613 // hold behavior. 614 var snapsToRefresh = func(gatingTask *state.Task) ([]*refreshCandidate, error) { 615 var snaps map[string]*refreshCandidate 616 if err := gatingTask.Get("snaps", &snaps); err != nil { 617 return nil, err 618 } 619 620 held, err := heldSnaps(gatingTask.State()) 621 if err != nil { 622 return nil, err 623 } 624 625 var skipped []string 626 var candidates []*refreshCandidate 627 for _, s := range snaps { 628 if !held[s.InstanceName()] { 629 candidates = append(candidates, s) 630 } else { 631 skipped = append(skipped, s.InstanceName()) 632 } 633 } 634 635 if len(skipped) > 0 { 636 sort.Strings(skipped) 637 logger.Noticef("skipping refresh of held snaps: %s", strings.Join(skipped, ",")) 638 } 639 640 return candidates, nil 641 } 642 643 // AutoRefreshForGatingSnap triggers an auto-refresh change for all 644 // snaps held by the given gating snap. This should only be called if the 645 // gate-auto-refresh-hook feature is enabled. 646 // TODO: this should be restricted as it doesn't take refresh timer/refresh hold 647 // into account. 648 func AutoRefreshForGatingSnap(st *state.State, gatingSnap string) error { 649 // ensure nothing is in flight already 650 if autoRefreshInFlight(st) { 651 return fmt.Errorf("there is an auto-refresh in progress") 652 } 653 654 gating, err := refreshGating(st) 655 if err != nil { 656 return err 657 } 658 659 var hasHeld bool 660 for _, holdingSnaps := range gating { 661 if _, ok := holdingSnaps[gatingSnap]; ok { 662 hasHeld = true 663 break 664 } 665 } 666 if !hasHeld { 667 return fmt.Errorf("no snaps are held by snap %q", gatingSnap) 668 } 669 670 // NOTE: this will unlock and re-lock state for network ops 671 // XXX: should we refresh assertions (just call AutoRefresh()?) 672 updated, tasksets, err := autoRefreshPhase1(auth.EnsureContextTODO(), st, gatingSnap) 673 if err != nil { 674 return err 675 } 676 msg := autoRefreshSummary(updated) 677 if msg == "" { 678 logger.Noticef("auto-refresh: all snaps previously held by %q are up-to-date", gatingSnap) 679 return nil 680 } 681 682 // note, we do not update last-refresh timestamp because this auto-refresh 683 // is not treated as a full auto-refresh. 684 685 chg := st.NewChange("auto-refresh", msg) 686 for _, ts := range tasksets { 687 chg.AddAll(ts) 688 } 689 chg.Set("snap-names", updated) 690 chg.Set("api-data", map[string]interface{}{"snap-names": updated}) 691 692 return nil 693 }