github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/monitor/tasks.go (about) 1 package monitor 2 3 import ( 4 "time" 5 6 "github.com/evergreen-ci/evergreen" 7 "github.com/evergreen-ci/evergreen/apimodels" 8 "github.com/evergreen-ci/evergreen/model" 9 "github.com/evergreen-ci/evergreen/model/host" 10 "github.com/evergreen-ci/evergreen/model/task" 11 "github.com/mongodb/grip" 12 "github.com/pkg/errors" 13 ) 14 15 // responsible for cleaning up any tasks that need to be stopped 16 type TaskMonitor struct { 17 // will be used for flagging tasks that need to be cleaned up 18 flaggingFuncs []taskFlaggingFunc 19 } 20 21 // run through the list of task flagging functions, finding all tasks that 22 // need to be cleaned up and taking appropriate action. takes in a map 23 // of project name -> project info 24 func (tm *TaskMonitor) CleanupTasks(projects map[string]model.Project) []error { 25 grip.Info("Cleaning up tasks...") 26 27 // used to store any errors that occur 28 var errs []error 29 30 for _, f := range tm.flaggingFuncs { 31 // find the next batch of tasks to be cleaned up 32 tasksToCleanUp, err := f() 33 34 // continue on error so that one wonky flagging function doesn't 35 // stop others from working 36 if err != nil { 37 errs = append(errs, errors.Wrap(err, "error finding tasks to be cleaned up")) 38 continue 39 } 40 41 // clean up all of the tasks. continue on error to allow further cleanup 42 // to progress 43 if errs = cleanUpTasks(tasksToCleanUp, projects); errs != nil { 44 for _, err := range errs { 45 errs = append(errs, errors.Wrap(err, "error cleaning up tasks")) 46 } 47 } 48 } 49 50 grip.Info("Done cleaning up tasks") 51 52 return errs 53 } 54 55 // clean up the passed-in slice of tasks 56 func cleanUpTasks(taskWrappers []doomedTaskWrapper, projects map[string]model.Project) []error { 57 grip.Infof("Cleaning up %d tasks...", len(taskWrappers)) 58 59 // used to store any errors that occur 60 var errs []error 61 62 for _, wrapper := range taskWrappers { 63 grip.Infof("Cleaning up task %s, for reason '%s'", wrapper.task.Id, wrapper.reason) 64 65 // clean up the task. continue on error to let others be cleaned up 66 if err := cleanUpTask(wrapper, projects); err != nil { 67 errs = append(errs, errors.Wrapf(err, 68 "error cleaning up task %v", wrapper.task.Id)) 69 continue 70 } 71 grip.Infoln("Successfully cleaned up task", wrapper.task.Id) 72 } 73 74 return errs 75 } 76 77 // function to clean up a single task 78 func cleanUpTask(wrapper doomedTaskWrapper, projects map[string]model.Project) error { 79 80 // find the appropriate project for the task 81 project, ok := projects[wrapper.task.Project] 82 if !ok { 83 return errors.Errorf("could not find project %v for task %v", 84 wrapper.task.Project, wrapper.task.Id) 85 } 86 87 // get the host for the task 88 host, err := host.FindOne(host.ById(wrapper.task.HostId)) 89 if err != nil { 90 return errors.Wrapf(err, "error finding host %s for task %s", 91 wrapper.task.HostId, wrapper.task.Id) 92 } 93 94 // if there's no relevant host, something went wrong 95 if host == nil { 96 grip.Errorln("no entry found for host:", wrapper.task.HostId) 97 return errors.WithStack(wrapper.task.MarkUnscheduled()) 98 } 99 100 // if the host still has the task as its running task, clear it. 101 if host.RunningTask == wrapper.task.Id { 102 // clear out the host's running task 103 if err = host.ClearRunningTask(wrapper.task.Id, time.Now()); err != nil { 104 return errors.Wrapf(err, "error clearing running task %v from host %v: %v", 105 wrapper.task.Id, host.Id) 106 } 107 } 108 109 // take different action, depending on the type of task death 110 switch wrapper.reason { 111 case HeartbeatTimeout: 112 err = cleanUpTimedOutHeartbeat(wrapper.task, project) 113 default: 114 return errors.Errorf("unknown reason for cleaning up task: %v", wrapper.reason) 115 } 116 117 if err != nil { 118 return errors.Wrapf(err, "error cleaning up task %s", wrapper.task.Id) 119 } 120 121 return nil 122 123 } 124 125 // clean up a task whose heartbeat has timed out 126 func cleanUpTimedOutHeartbeat(t task.Task, project model.Project) error { 127 // mock up the failure details of the task 128 detail := &apimodels.TaskEndDetail{ 129 Description: task.AgentHeartbeat, 130 TimedOut: true, 131 Status: evergreen.TaskFailed, 132 } 133 134 // try to reset the task 135 if err := model.TryResetTask(t.Id, "", RunnerName, &project, detail); err != nil { 136 return errors.Wrapf(err, "error trying to reset task %s", t.Id) 137 } 138 // success 139 return nil 140 }