github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/service/api_task.go (about) 1 package service 2 3 import ( 4 "fmt" 5 "net/http" 6 "time" 7 8 "github.com/evergreen-ci/evergreen" 9 "github.com/evergreen-ci/evergreen/alerts" 10 "github.com/evergreen-ci/evergreen/apimodels" 11 "github.com/evergreen-ci/evergreen/bookkeeping" 12 "github.com/evergreen-ci/evergreen/cloud" 13 "github.com/evergreen-ci/evergreen/cloud/providers" 14 "github.com/evergreen-ci/evergreen/model" 15 "github.com/evergreen-ci/evergreen/model/host" 16 "github.com/evergreen-ci/evergreen/model/task" 17 "github.com/evergreen-ci/evergreen/taskrunner" 18 "github.com/evergreen-ci/evergreen/util" 19 "github.com/mongodb/grip" 20 "github.com/pkg/errors" 21 ) 22 23 // StartTask is the handler function that retrieves the task from the request 24 // and acquires the global lock 25 // With the lock, it marks associated tasks, builds, and versions as started. 26 // It then updates the host document with relevant information, including the pid 27 // of the agent, and ensures that the host has the running task field set. 28 func (as *APIServer) StartTask(w http.ResponseWriter, r *http.Request) { 29 t := MustHaveTask(r) 30 31 grip.Infoln("Marking task started:", t.Id) 32 33 taskStartInfo := &apimodels.TaskStartRequest{} 34 if err := util.ReadJSONInto(util.NewRequestReader(r), taskStartInfo); err != nil { 35 http.Error(w, fmt.Sprintf("Error reading task start request for %v: %v", t.Id, err), http.StatusBadRequest) 36 return 37 } 38 39 if err := model.MarkStart(t.Id); err != nil { 40 message := errors.Wrapf(err, "Error marking task '%s' started", t.Id) 41 as.LoggedError(w, r, http.StatusInternalServerError, message) 42 return 43 } 44 45 h, err := host.FindOne(host.ByRunningTaskId(t.Id)) 46 if err != nil { 47 message := errors.Wrapf(err, "Error finding host running task %s", t.Id) 48 as.LoggedError(w, r, http.StatusInternalServerError, message) 49 return 50 } 51 52 if h == nil { 53 message := errors.Errorf("No host found running task %v", t.Id) 54 if t.HostId != "" { 55 message = errors.Errorf("No host found running task %s but task is said to be running on %s", 56 t.Id, t.HostId) 57 } 58 59 as.LoggedError(w, r, http.StatusInternalServerError, message) 60 return 61 } 62 63 as.WriteJSON(w, http.StatusOK, fmt.Sprintf("Task %v started on host %v", t.Id, h.Id)) 64 } 65 66 // validateTaskEndDetails returns true if the task is finished or undispatched 67 func validateTaskEndDetails(details *apimodels.TaskEndDetail) bool { 68 return details.Status == evergreen.TaskSucceeded || 69 details.Status == evergreen.TaskFailed || 70 details.Status == evergreen.TaskUndispatched 71 } 72 73 // checkHostHealth checks that host is running and creates a task response that is sent back to the agent after the task ends. 74 func checkHostHealth(h *host.Host, agentRevision string) (bool, string) { 75 if h.Status != evergreen.HostRunning { 76 return true, fmt.Sprintf("host %s is in state %s and agent should exit", 77 h.Id, h.Status) 78 } 79 if h.AgentRevision != agentRevision { 80 return true, fmt.Sprintf("agent should be rebuilt:"+ 81 "host has agent revision %s and latest revision is %s", 82 h.AgentRevision, agentRevision) 83 } 84 return false, "" 85 86 } 87 88 // EndTask creates test results from the request and the project config. 89 // It then acquires the lock, and with it, marks tasks as finished or inactive if aborted. 90 // If the task is a patch, it will alert the users based on failures 91 // It also updates the expected task duration of the task for scheduling. 92 // NOTE this should eventually become the default code path. 93 func (as *APIServer) EndTask(w http.ResponseWriter, r *http.Request) { 94 finishTime := time.Now() 95 96 t := MustHaveTask(r) 97 currentHost := MustHaveHost(r) 98 99 details := &apimodels.TaskEndDetail{} 100 endTaskResp := &apimodels.EndTaskResponse{} 101 if err := util.ReadJSONInto(util.NewRequestReader(r), details); err != nil { 102 http.Error(w, err.Error(), http.StatusBadRequest) 103 return 104 } 105 106 // Check that finishing status is a valid constant 107 if !validateTaskEndDetails(details) { 108 msg := fmt.Errorf("Invalid end status '%v' for task %v", details.Status, t.Id) 109 as.LoggedError(w, r, http.StatusBadRequest, msg) 110 return 111 } 112 113 projectRef, err := model.FindOneProjectRef(t.Project) 114 if err != nil { 115 as.LoggedError(w, r, http.StatusInternalServerError, err) 116 } 117 if projectRef == nil { 118 as.LoggedError(w, r, http.StatusNotFound, fmt.Errorf("empty projectRef for task")) 119 return 120 } 121 122 project, err := model.FindProject("", projectRef) 123 if err != nil { 124 as.LoggedError(w, r, http.StatusInternalServerError, err) 125 return 126 } 127 128 // mark task as finished 129 err = model.MarkEnd(t.Id, APIServerLockTitle, finishTime, details, 130 project, projectRef.DeactivatePrevious) 131 if err != nil { 132 message := fmt.Errorf("Error calling mark finish on task %v : %v", t.Id, err) 133 as.LoggedError(w, r, http.StatusInternalServerError, message) 134 return 135 } 136 // the task was aborted if it is still in undispatched. 137 // the active state should be inactive. 138 if details.Status == evergreen.TaskUndispatched { 139 if t.Activated { 140 grip.Warningf("task %v is active and undispatched after being marked as finished", t.Id) 141 return 142 } 143 message := fmt.Sprintf("task %v has been aborted and will not run", t.Id) 144 grip.Infof(message) 145 endTaskResp = &apimodels.EndTaskResponse{ 146 ShouldExit: true, 147 Message: message, 148 } 149 as.WriteJSON(w, http.StatusOK, endTaskResp) 150 return 151 } 152 153 // clear the running task on the host now that the task has finished 154 if err = currentHost.ClearRunningTask(t.Id, time.Now()); err != nil { 155 message := fmt.Errorf("error clearing running task %s for host %s : %v", t.Id, currentHost.Id, err) 156 grip.Errorf(message.Error()) 157 as.LoggedError(w, r, http.StatusInternalServerError, message) 158 return 159 } 160 161 // task cost calculations have no impact on task results, so do them in their own goroutine 162 go as.updateTaskCost(t, currentHost, finishTime) 163 164 if t.Requester != evergreen.PatchVersionRequester { 165 grip.Infoln("Processing alert triggers for task", t.Id) 166 err = alerts.RunTaskFailureTriggers(t.Id) 167 grip.ErrorWhenf(err != nil, "processing alert triggers for task %s: %+v", t.Id, err) 168 } 169 // TODO(EVG-223) process patch-specific triggers 170 171 // update the bookkeeping entry for the task 172 err = bookkeeping.UpdateExpectedDuration(t, t.TimeTaken) 173 if err != nil { 174 grip.Errorln("Error updating expected duration:", err) 175 } 176 taskRunnerInstance := taskrunner.NewTaskRunner(&as.Settings) 177 agentRevision, err := taskRunnerInstance.HostGateway.GetAgentRevision() 178 if err != nil { 179 grip.Errorf("error getting current agent revision %+v", err) 180 as.WriteJSON(w, http.StatusInternalServerError, err) 181 return 182 } 183 184 shouldExit, message := checkHostHealth(currentHost, agentRevision) 185 if shouldExit { 186 // set the host's last communication time to be zero 187 if err := currentHost.ResetLastCommunicated(); err != nil { 188 grip.Errorf("error resetting last communication time for host %s: %+v", currentHost.Id, err) 189 as.WriteJSON(w, http.StatusInternalServerError, err) 190 return 191 } 192 endTaskResp.ShouldExit = true 193 endTaskResp.Message = message 194 } 195 196 grip.Infof("Successfully marked task %s as finished", t.Id) 197 as.WriteJSON(w, http.StatusOK, endTaskResp) 198 199 } 200 201 // updateTaskCost determines a task's cost based on the host it ran on. Hosts that 202 // are unable to calculate their own costs will not set a task's Cost field. Errors 203 // are logged but not returned, since any number of API failures could happen and 204 // we shouldn't sacrifice a task's status for them. 205 func (as *APIServer) updateTaskCost(t *task.Task, h *host.Host, finishTime time.Time) { 206 manager, err := providers.GetCloudManager(h.Provider, &as.Settings) 207 if err != nil { 208 grip.Errorf("Error loading provider for host %s cost calculation: %+v", t.HostId, err) 209 return 210 } 211 if calc, ok := manager.(cloud.CloudCostCalculator); ok { 212 grip.Infoln("Calculating cost for task:", t.Id) 213 cost, err := calc.CostForDuration(h, t.StartTime, finishTime) 214 if err != nil { 215 grip.Errorf("calculating cost for task %s: %+v ", t.Id, err) 216 return 217 } 218 if err := t.SetCost(cost); err != nil { 219 grip.Errorf("Error updating cost for task %s: %+v ", t.Id, err) 220 return 221 } 222 } 223 } 224 225 // assignNextAvailableTask gets the next task from the queue and sets the running task field 226 // of currentHost. 227 func assignNextAvailableTask(taskQueue *model.TaskQueue, currentHost *host.Host) (*task.Task, error) { 228 if currentHost.RunningTask != "" { 229 return nil, errors.Errorf("Error host %v must have an unset running task field but has running task %v", 230 currentHost.Id, currentHost.RunningTask) 231 } 232 // only proceed if there are pending tasks left 233 for !taskQueue.IsEmpty() { 234 nextTaskId := taskQueue.NextTask().Id 235 236 nextTask, err := task.FindOne(task.ById(nextTaskId)) 237 if err != nil { 238 return nil, err 239 } 240 if nextTask == nil { 241 return nil, errors.New("nil task on the queue") 242 } 243 244 // dequeue the task from the queue 245 if err = taskQueue.DequeueTask(nextTask.Id); err != nil { 246 return nil, errors.Wrapf(err, 247 "error pulling task with id %v from queue for distro %v", 248 nextTask.Id, nextTask.DistroId) 249 } 250 251 // validate that the task can be run, if not fetch the next one in 252 // the queue. 253 if !nextTask.IsDispatchable() { 254 grip.Warningf("Skipping task %s, which was "+ 255 "picked up to be run but is not runnable - "+ 256 "status (%s) activated (%t)", nextTask.Id, nextTask.Status, 257 nextTask.Activated) 258 continue 259 } 260 // attempt to update the host. TODO: double check Last task completed thing... 261 // TODO: get rid of last task completed field in update running task. 262 ok, err := currentHost.UpdateRunningTask(currentHost.LastTaskCompleted, nextTaskId, time.Now()) 263 264 if err != nil { 265 return nil, errors.WithStack(err) 266 } 267 if !ok { 268 continue 269 } 270 return nextTask, nil 271 } 272 return nil, nil 273 } 274 275 // NextTask retrieves the next task's id given the host name and host secret by retrieving the task queue 276 // and popping the next task off the task queue. 277 func (as *APIServer) NextTask(w http.ResponseWriter, r *http.Request) { 278 h := MustHaveHost(r) 279 response := apimodels.NextTaskResponse{ 280 ShouldExit: false, 281 } 282 283 taskRunnerInstance := taskrunner.NewTaskRunner(&as.Settings) 284 // check host health before getting next task 285 agentRevision, err := taskRunnerInstance.HostGateway.GetAgentRevision() 286 if err != nil { 287 grip.Errorf("error getting current agent revision %+v", err) 288 as.WriteJSON(w, http.StatusInternalServerError, err) 289 return 290 } 291 292 shouldExit, message := checkHostHealth(h, agentRevision) 293 if shouldExit { 294 // set the host's last communication time to be zero 295 if err = h.ResetLastCommunicated(); err != nil { 296 grip.Errorf("error resetting last communication time for host %s: %+v", h.Id, err) 297 as.WriteJSON(w, http.StatusInternalServerError, err) 298 return 299 } 300 response.ShouldExit = true 301 response.Message = message 302 as.WriteJSON(w, http.StatusOK, response) 303 return 304 } 305 306 // if there is already a task assigned to the host send back that task 307 if h.RunningTask != "" { 308 var t *task.Task 309 t, err = task.FindOne(task.ById(h.RunningTask)) 310 if err != nil { 311 err = errors.WithStack(err) 312 grip.Error(err) 313 as.WriteJSON(w, http.StatusInternalServerError, 314 errors.Wrapf(err, "error getting running task %s", h.RunningTask)) 315 return 316 } 317 318 // if the task can be dispatched and activated dispatch it 319 if t.IsDispatchable() { 320 err = errors.WithStack(model.MarkTaskDispatched(t, h.Id, h.Distro.Id)) 321 if err != nil { 322 grip.Error(err) 323 as.WriteJSON(w, http.StatusInternalServerError, 324 errors.Wrapf(err, "error while marking task %s as dispatched for host %s", t.Id, h.Id)) 325 return 326 } 327 } 328 // if the task is activated return that task 329 if t.Activated { 330 response.TaskId = t.Id 331 response.TaskSecret = t.Secret 332 as.WriteJSON(w, http.StatusOK, response) 333 return 334 } 335 // the task is not activated so the host's running task should be unset 336 // so it can retrieve a new task. 337 if err = h.ClearRunningTask(h.LastTaskCompleted, time.Now()); err != nil { 338 err = errors.WithStack(err) 339 grip.Error(err) 340 as.WriteJSON(w, http.StatusInternalServerError, err) 341 return 342 } 343 344 // return an empty 345 grip.Infof("Unset running task field for inactive task %s on host %s", t.Id, h.Id) 346 as.WriteJSON(w, http.StatusOK, response) 347 return 348 } 349 350 // retrieve the next task off the task queue and attempt to assign it to the host. 351 // If there is already a host that has the task, it will error 352 taskQueue, err := model.FindTaskQueueForDistro(h.Distro.Id) 353 if err != nil { 354 err = errors.Wrapf(err, "Error locating distro queue (%v) for host '%v'", h.Distro.Id, h.Id) 355 grip.Error(err) 356 as.WriteJSON(w, http.StatusBadRequest, err) 357 return 358 } 359 if taskQueue == nil { 360 message = fmt.Sprintf("Nil task queue found for task '%v's distro queue - '%v'", 361 h.Id, h.Distro.Id) 362 grip.Info(message) 363 response.Message = message 364 as.WriteJSON(w, http.StatusOK, response) 365 return 366 } 367 // assign the task to a host and retrieve the task 368 nextTask, err := assignNextAvailableTask(taskQueue, h) 369 if err != nil { 370 err = errors.WithStack(err) 371 grip.Error(err) 372 as.WriteJSON(w, http.StatusBadRequest, err) 373 return 374 } 375 if nextTask == nil { 376 // if the task is empty, still send it with an status ok and check it on the other side 377 grip.Infof("no task to assign host %v", h.Id) 378 as.WriteJSON(w, http.StatusOK, response) 379 return 380 } 381 382 // mark the task as dispatched 383 if err := model.MarkTaskDispatched(nextTask, h.Id, h.Distro.Id); err != nil { 384 err = errors.WithStack(err) 385 grip.Error(err) 386 as.WriteJSON(w, http.StatusInternalServerError, err) 387 return 388 } 389 response.TaskId = nextTask.Id 390 response.TaskSecret = nextTask.Secret 391 grip.Infof("assigned task %s to host %s", nextTask.Id, h.Id) 392 as.WriteJSON(w, http.StatusOK, response) 393 }