github.com/rigado/snapd@v2.42.5-go-mod+incompatible/overlord/state/taskrunner.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2016 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package state 21 22 import ( 23 "sync" 24 "time" 25 26 "gopkg.in/tomb.v2" 27 28 "github.com/snapcore/snapd/logger" 29 ) 30 31 // HandlerFunc is the type of function for the handlers 32 type HandlerFunc func(task *Task, tomb *tomb.Tomb) error 33 34 // Retry is returned from a handler to signal that is ok to rerun the 35 // task at a later point. It's to be used also when a task goroutine 36 // is asked to stop through its tomb. After can be used to indicate 37 // how much to postpone the retry, 0 (the default) means at the next 38 // ensure pass and is what should be used if stopped through its tomb. 39 // Reason is an optional explanation of the conflict. 40 type Retry struct { 41 After time.Duration 42 Reason string 43 } 44 45 func (r *Retry) Error() string { 46 return "task should be retried" 47 } 48 49 type blockedFunc func(t *Task, running []*Task) bool 50 51 // TaskRunner controls the running of goroutines to execute known task kinds. 52 type TaskRunner struct { 53 state *State 54 55 // locking 56 mu sync.Mutex 57 handlers map[string]handlerPair 58 optional []optionalHandler 59 cleanups map[string]HandlerFunc 60 stopped bool 61 62 blocked []blockedFunc 63 someBlocked bool 64 65 // go-routines lifecycle 66 tombs map[string]*tomb.Tomb 67 } 68 69 type handlerPair struct { 70 do, undo HandlerFunc 71 } 72 73 type optionalHandler struct { 74 match func(t *Task) bool 75 handlerPair 76 } 77 78 // NewTaskRunner creates a new TaskRunner 79 func NewTaskRunner(s *State) *TaskRunner { 80 return &TaskRunner{ 81 state: s, 82 handlers: make(map[string]handlerPair), 83 cleanups: make(map[string]HandlerFunc), 84 tombs: make(map[string]*tomb.Tomb), 85 } 86 } 87 88 // AddHandler registers the functions to concurrently call for doing and 89 // undoing tasks of the given kind. The undo handler may be nil. 90 func (r *TaskRunner) AddHandler(kind string, do, undo HandlerFunc) { 91 r.mu.Lock() 92 defer r.mu.Unlock() 93 94 r.handlers[kind] = handlerPair{do, undo} 95 } 96 97 // AddOptionalHandler register functions for doing and undoing tasks that match 98 // the given predicate if no explicit handler was registered for the task kind. 99 func (r *TaskRunner) AddOptionalHandler(match func(t *Task) bool, do, undo HandlerFunc) { 100 r.optional = append(r.optional, optionalHandler{match, handlerPair{do, undo}}) 101 } 102 103 func (r *TaskRunner) handlerPair(t *Task) handlerPair { 104 if handler, ok := r.handlers[t.Kind()]; ok { 105 return handler 106 } 107 for _, h := range r.optional { 108 if h.match(t) { 109 return h.handlerPair 110 } 111 } 112 return handlerPair{} 113 } 114 115 // KnownTaskKinds returns all tasks kinds handled by this runner. 116 func (r *TaskRunner) KnownTaskKinds() []string { 117 kinds := make([]string, 0, len(r.handlers)) 118 for h := range r.handlers { 119 kinds = append(kinds, h) 120 } 121 return kinds 122 } 123 124 // AddCleanup registers a function to be called after the change completes, 125 // for cleaning up data left behind by tasks of the specified kind. 126 // The provided function will be called no matter what the final status of the 127 // task is. This mechanism enables keeping data around for a potential undo 128 // until there's no more chance of the task being undone. 129 // 130 // The cleanup function is run concurrently with other cleanup functions, 131 // despite any wait ordering between the tasks. If it returns an error, 132 // it will be retried later. 133 // 134 // The handler for tasks of the provided kind must have been previously 135 // registered before AddCleanup is called for it. 136 func (r *TaskRunner) AddCleanup(kind string, cleanup HandlerFunc) { 137 r.mu.Lock() 138 defer r.mu.Unlock() 139 if _, ok := r.handlers[kind]; !ok { 140 panic("internal error: attempted to register cleanup for unknown task kind") 141 } 142 r.cleanups[kind] = cleanup 143 } 144 145 // SetBlocked sets a predicate function to decide whether to block a task from running based on the current running tasks. It can be used to control task serialisation. 146 func (r *TaskRunner) SetBlocked(pred func(t *Task, running []*Task) bool) { 147 r.mu.Lock() 148 defer r.mu.Unlock() 149 150 r.blocked = []blockedFunc{pred} 151 } 152 153 // AddBlocked adds a predicate function to decide whether to block a task from running based on the current running tasks. It can be used to control task serialisation. All added predicates are considered in turn until one returns true, or none. 154 func (r *TaskRunner) AddBlocked(pred func(t *Task, running []*Task) bool) { 155 r.mu.Lock() 156 defer r.mu.Unlock() 157 158 r.blocked = append(r.blocked, pred) 159 } 160 161 // run must be called with the state lock in place 162 func (r *TaskRunner) run(t *Task) { 163 var handler HandlerFunc 164 var accuRuntime func(dur time.Duration) 165 switch t.Status() { 166 case DoStatus: 167 t.SetStatus(DoingStatus) 168 fallthrough 169 case DoingStatus: 170 handler = r.handlerPair(t).do 171 accuRuntime = t.accumulateDoingTime 172 173 case UndoStatus: 174 t.SetStatus(UndoingStatus) 175 fallthrough 176 case UndoingStatus: 177 handler = r.handlerPair(t).undo 178 accuRuntime = t.accumulateUndoingTime 179 180 default: 181 panic("internal error: attempted to run task in status " + t.Status().String()) 182 } 183 if handler == nil { 184 panic("internal error: attempted to run task with nil handler for status " + t.Status().String()) 185 } 186 187 t.At(time.Time{}) // clear schedule 188 tomb := &tomb.Tomb{} 189 r.tombs[t.ID()] = tomb 190 tomb.Go(func() error { 191 // Capture the error result with tomb.Kill so we can 192 // use tomb.Err uniformily to consider both it or a 193 // overriding previous Kill reason. 194 t0 := time.Now() 195 tomb.Kill(handler(t, tomb)) 196 t1 := time.Now() 197 198 // Locks must be acquired in the same order everywhere. 199 r.mu.Lock() 200 defer r.mu.Unlock() 201 r.state.Lock() 202 defer r.state.Unlock() 203 accuRuntime(t1.Sub(t0)) 204 205 delete(r.tombs, t.ID()) 206 207 // some tasks were blocked, now there's chance the 208 // blocked predicate will change its value 209 if r.someBlocked { 210 r.state.EnsureBefore(0) 211 } 212 213 err := tomb.Err() 214 switch err.(type) { 215 case nil: 216 // we are ok 217 case *Retry: 218 // preserve 219 default: 220 if r.stopped { 221 // we are shutting down, errors might be due 222 // to cancellations, to be safe retry 223 err = &Retry{} 224 } 225 } 226 227 switch x := err.(type) { 228 case *Retry: 229 // Handler asked to be called again later. 230 // TODO Allow postponing retries past the next Ensure. 231 if t.Status() == AbortStatus { 232 // Would work without it but might take two ensures. 233 r.tryUndo(t) 234 } else if x.After != 0 { 235 t.At(timeNow().Add(x.After)) 236 } 237 case nil: 238 var next []*Task 239 switch t.Status() { 240 case DoingStatus: 241 t.SetStatus(DoneStatus) 242 fallthrough 243 case DoneStatus: 244 next = t.HaltTasks() 245 case AbortStatus: 246 // It was actually Done if it got here. 247 t.SetStatus(UndoStatus) 248 r.state.EnsureBefore(0) 249 case UndoingStatus: 250 t.SetStatus(UndoneStatus) 251 fallthrough 252 case UndoneStatus: 253 next = t.WaitTasks() 254 } 255 if len(next) > 0 { 256 r.state.EnsureBefore(0) 257 } 258 default: 259 r.abortLanes(t.Change(), t.Lanes()) 260 t.SetStatus(ErrorStatus) 261 t.Errorf("%s", err) 262 } 263 264 return nil 265 }) 266 } 267 268 func (r *TaskRunner) clean(t *Task) { 269 if !t.Change().IsReady() { 270 // Whole Change is not ready so don't run cleanups yet. 271 return 272 } 273 274 cleanup, ok := r.cleanups[t.Kind()] 275 if !ok { 276 t.SetClean() 277 return 278 } 279 280 tomb := &tomb.Tomb{} 281 r.tombs[t.ID()] = tomb 282 tomb.Go(func() error { 283 tomb.Kill(cleanup(t, tomb)) 284 285 // Locks must be acquired in the same order everywhere. 286 r.mu.Lock() 287 defer r.mu.Unlock() 288 r.state.Lock() 289 defer r.state.Unlock() 290 291 delete(r.tombs, t.ID()) 292 293 if tomb.Err() != nil { 294 logger.Debugf("Cleaning task %s: %s", t.ID(), tomb.Err()) 295 } else { 296 t.SetClean() 297 } 298 return nil 299 }) 300 } 301 302 func (r *TaskRunner) abortLanes(chg *Change, lanes []int) { 303 chg.AbortLanes(lanes) 304 ensureScheduled := false 305 for _, t := range chg.Tasks() { 306 status := t.Status() 307 if status == AbortStatus { 308 if tb, ok := r.tombs[t.ID()]; ok { 309 tb.Kill(nil) 310 } 311 } 312 if !ensureScheduled && !status.Ready() { 313 ensureScheduled = true 314 r.state.EnsureBefore(0) 315 } 316 } 317 } 318 319 // tryUndo replaces the status of a knowingly aborted task. 320 func (r *TaskRunner) tryUndo(t *Task) { 321 if t.Status() == AbortStatus && r.handlerPair(t).undo == nil { 322 // Cannot undo but it was stopped in flight. 323 // Hold so it doesn't look like it finished. 324 t.SetStatus(HoldStatus) 325 if len(t.WaitTasks()) > 0 { 326 r.state.EnsureBefore(0) 327 } 328 } else { 329 t.SetStatus(UndoStatus) 330 r.state.EnsureBefore(0) 331 } 332 } 333 334 // Ensure starts new goroutines for all known tasks with no pending 335 // dependencies. 336 // Note that Ensure will lock the state. 337 func (r *TaskRunner) Ensure() error { 338 r.mu.Lock() 339 defer r.mu.Unlock() 340 341 if r.stopped { 342 // we are stopping, don't run another ensure 343 return nil 344 } 345 346 // Locks must be acquired in the same order everywhere. 347 r.state.Lock() 348 defer r.state.Unlock() 349 350 r.someBlocked = false 351 running := make([]*Task, 0, len(r.tombs)) 352 for tid := range r.tombs { 353 t := r.state.Task(tid) 354 if t != nil { 355 running = append(running, t) 356 } 357 } 358 359 ensureTime := timeNow() 360 nextTaskTime := time.Time{} 361 ConsiderTasks: 362 for _, t := range r.state.Tasks() { 363 handlers := r.handlerPair(t) 364 if handlers.do == nil { 365 // Handled by a different runner instance. 366 continue 367 } 368 369 tb := r.tombs[t.ID()] 370 371 if t.Status() == AbortStatus { 372 if tb != nil { 373 tb.Kill(nil) 374 continue 375 } 376 r.tryUndo(t) 377 } 378 379 if tb != nil { 380 // Already being handled. 381 continue 382 } 383 384 status := t.Status() 385 if status.Ready() { 386 if !t.IsClean() { 387 r.clean(t) 388 } 389 continue 390 } 391 392 if mustWait(t) { 393 // Dependencies still unhandled. 394 continue 395 } 396 397 if status == UndoStatus && handlers.undo == nil { 398 // Although this has no dependencies itself, it must have waited 399 // above too since follow up tasks may have handlers again. 400 // Cannot undo. Revert to done status. 401 t.SetStatus(DoneStatus) 402 if len(t.WaitTasks()) > 0 { 403 r.state.EnsureBefore(0) 404 } 405 continue 406 } 407 408 // skip tasks scheduled for later and also track the earliest one 409 tWhen := t.AtTime() 410 if !tWhen.IsZero() && ensureTime.Before(tWhen) { 411 if nextTaskTime.IsZero() || nextTaskTime.After(tWhen) { 412 nextTaskTime = tWhen 413 } 414 continue 415 } 416 417 // check if any of the blocked predicates returns true 418 // and skip the task if so 419 for _, blocked := range r.blocked { 420 if blocked(t, running) { 421 r.someBlocked = true 422 continue ConsiderTasks 423 } 424 } 425 426 logger.Debugf("Running task %s on %s: %s", t.ID(), t.Status(), t.Summary()) 427 r.run(t) 428 429 running = append(running, t) 430 } 431 432 // schedule next Ensure no later than the next task time 433 if !nextTaskTime.IsZero() { 434 r.state.EnsureBefore(nextTaskTime.Sub(ensureTime)) 435 } 436 437 return nil 438 } 439 440 // mustWait returns whether task t must wait for other tasks to be done. 441 func mustWait(t *Task) bool { 442 switch t.Status() { 443 case DoStatus: 444 for _, wt := range t.WaitTasks() { 445 if wt.Status() != DoneStatus { 446 return true 447 } 448 } 449 case UndoStatus: 450 for _, ht := range t.HaltTasks() { 451 if !ht.Status().Ready() { 452 return true 453 } 454 } 455 } 456 return false 457 } 458 459 // wait expects to be called with th r.mu lock held 460 func (r *TaskRunner) wait() { 461 for len(r.tombs) > 0 { 462 for _, t := range r.tombs { 463 r.mu.Unlock() 464 t.Wait() 465 r.mu.Lock() 466 break 467 } 468 } 469 } 470 471 // Stop kills all concurrent activities and returns after that's done. 472 func (r *TaskRunner) Stop() { 473 r.mu.Lock() 474 defer r.mu.Unlock() 475 476 r.stopped = true 477 478 for _, tb := range r.tombs { 479 tb.Kill(nil) 480 } 481 482 r.wait() 483 } 484 485 // Wait waits for all concurrent activities and returns after that's done. 486 func (r *TaskRunner) Wait() { 487 r.mu.Lock() 488 defer r.mu.Unlock() 489 490 r.wait() 491 } 492 493 // StopKinds kills all concurrent tasks of the given kinds and returns 494 // after that's done. 495 func (r *TaskRunner) StopKinds(kind ...string) { 496 r.mu.Lock() 497 defer r.mu.Unlock() 498 499 kinds := make(map[string]bool, len(kind)) 500 for _, k := range kind { 501 kinds[k] = true 502 } 503 504 var tombs []*tomb.Tomb 505 // Locks must be acquired in the same order everywhere: 506 // r.mu, r.state 507 r.state.Lock() 508 for tid, tb := range r.tombs { 509 task := r.state.Task(tid) 510 if task == nil || !kinds[task.Kind()] { 511 continue 512 } 513 tombs = append(tombs, tb) 514 tb.Kill(nil) 515 } 516 r.state.Unlock() 517 518 for _, tb := range tombs { 519 r.mu.Unlock() 520 tb.Wait() 521 r.mu.Lock() 522 } 523 }