github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/service/api_task.go (about)

     1  package service
     2  
     3  import (
     4  	"fmt"
     5  	"net/http"
     6  	"time"
     7  
     8  	"github.com/evergreen-ci/evergreen"
     9  	"github.com/evergreen-ci/evergreen/alerts"
    10  	"github.com/evergreen-ci/evergreen/apimodels"
    11  	"github.com/evergreen-ci/evergreen/bookkeeping"
    12  	"github.com/evergreen-ci/evergreen/cloud"
    13  	"github.com/evergreen-ci/evergreen/cloud/providers"
    14  	"github.com/evergreen-ci/evergreen/model"
    15  	"github.com/evergreen-ci/evergreen/model/host"
    16  	"github.com/evergreen-ci/evergreen/model/task"
    17  	"github.com/evergreen-ci/evergreen/taskrunner"
    18  	"github.com/evergreen-ci/evergreen/util"
    19  	"github.com/mongodb/grip"
    20  	"github.com/pkg/errors"
    21  )
    22  
    23  // StartTask is the handler function that retrieves the task from the request
    24  // and acquires the global lock
    25  // With the lock, it marks associated tasks, builds, and versions as started.
    26  // It then updates the host document with relevant information, including the pid
    27  // of the agent, and ensures that the host has the running task field set.
    28  func (as *APIServer) StartTask(w http.ResponseWriter, r *http.Request) {
    29  	t := MustHaveTask(r)
    30  
    31  	grip.Infoln("Marking task started:", t.Id)
    32  
    33  	taskStartInfo := &apimodels.TaskStartRequest{}
    34  	if err := util.ReadJSONInto(util.NewRequestReader(r), taskStartInfo); err != nil {
    35  		http.Error(w, fmt.Sprintf("Error reading task start request for %v: %v", t.Id, err), http.StatusBadRequest)
    36  		return
    37  	}
    38  
    39  	if err := model.MarkStart(t.Id); err != nil {
    40  		message := errors.Wrapf(err, "Error marking task '%s' started", t.Id)
    41  		as.LoggedError(w, r, http.StatusInternalServerError, message)
    42  		return
    43  	}
    44  
    45  	h, err := host.FindOne(host.ByRunningTaskId(t.Id))
    46  	if err != nil {
    47  		message := errors.Wrapf(err, "Error finding host running task %s", t.Id)
    48  		as.LoggedError(w, r, http.StatusInternalServerError, message)
    49  		return
    50  	}
    51  
    52  	if h == nil {
    53  		message := errors.Errorf("No host found running task %v", t.Id)
    54  		if t.HostId != "" {
    55  			message = errors.Errorf("No host found running task %s but task is said to be running on %s",
    56  				t.Id, t.HostId)
    57  		}
    58  
    59  		as.LoggedError(w, r, http.StatusInternalServerError, message)
    60  		return
    61  	}
    62  
    63  	as.WriteJSON(w, http.StatusOK, fmt.Sprintf("Task %v started on host %v", t.Id, h.Id))
    64  }
    65  
    66  // validateTaskEndDetails returns true if the task is finished or undispatched
    67  func validateTaskEndDetails(details *apimodels.TaskEndDetail) bool {
    68  	return details.Status == evergreen.TaskSucceeded ||
    69  		details.Status == evergreen.TaskFailed ||
    70  		details.Status == evergreen.TaskUndispatched
    71  }
    72  
    73  // checkHostHealth checks that host is running and creates a task response that is sent back to the agent after the task ends.
    74  func checkHostHealth(h *host.Host, agentRevision string) (bool, string) {
    75  	if h.Status != evergreen.HostRunning {
    76  		return true, fmt.Sprintf("host %s is in state %s and agent should exit",
    77  			h.Id, h.Status)
    78  	}
    79  	if h.AgentRevision != agentRevision {
    80  		return true, fmt.Sprintf("agent should be rebuilt:"+
    81  			"host has agent revision %s and latest revision is %s",
    82  			h.AgentRevision, agentRevision)
    83  	}
    84  	return false, ""
    85  
    86  }
    87  
    88  // EndTask creates test results from the request and the project config.
    89  // It then acquires the lock, and with it, marks tasks as finished or inactive if aborted.
    90  // If the task is a patch, it will alert the users based on failures
    91  // It also updates the expected task duration of the task for scheduling.
    92  // NOTE this should eventually become the default code path.
    93  func (as *APIServer) EndTask(w http.ResponseWriter, r *http.Request) {
    94  	finishTime := time.Now()
    95  
    96  	t := MustHaveTask(r)
    97  	currentHost := MustHaveHost(r)
    98  
    99  	details := &apimodels.TaskEndDetail{}
   100  	endTaskResp := &apimodels.EndTaskResponse{}
   101  	if err := util.ReadJSONInto(util.NewRequestReader(r), details); err != nil {
   102  		http.Error(w, err.Error(), http.StatusBadRequest)
   103  		return
   104  	}
   105  
   106  	// Check that finishing status is a valid constant
   107  	if !validateTaskEndDetails(details) {
   108  		msg := fmt.Errorf("Invalid end status '%v' for task %v", details.Status, t.Id)
   109  		as.LoggedError(w, r, http.StatusBadRequest, msg)
   110  		return
   111  	}
   112  
   113  	projectRef, err := model.FindOneProjectRef(t.Project)
   114  	if err != nil {
   115  		as.LoggedError(w, r, http.StatusInternalServerError, err)
   116  	}
   117  	if projectRef == nil {
   118  		as.LoggedError(w, r, http.StatusNotFound, fmt.Errorf("empty projectRef for task"))
   119  		return
   120  	}
   121  
   122  	project, err := model.FindProject("", projectRef)
   123  	if err != nil {
   124  		as.LoggedError(w, r, http.StatusInternalServerError, err)
   125  		return
   126  	}
   127  
   128  	// mark task as finished
   129  	err = model.MarkEnd(t.Id, APIServerLockTitle, finishTime, details,
   130  		project, projectRef.DeactivatePrevious)
   131  	if err != nil {
   132  		message := fmt.Errorf("Error calling mark finish on task %v : %v", t.Id, err)
   133  		as.LoggedError(w, r, http.StatusInternalServerError, message)
   134  		return
   135  	}
   136  	// the task was aborted if it is still in undispatched.
   137  	// the active state should be inactive.
   138  	if details.Status == evergreen.TaskUndispatched {
   139  		if t.Activated {
   140  			grip.Warningf("task %v is active and undispatched after being marked as finished", t.Id)
   141  			return
   142  		}
   143  		message := fmt.Sprintf("task %v has been aborted and will not run", t.Id)
   144  		grip.Infof(message)
   145  		endTaskResp = &apimodels.EndTaskResponse{
   146  			ShouldExit: true,
   147  			Message:    message,
   148  		}
   149  		as.WriteJSON(w, http.StatusOK, endTaskResp)
   150  		return
   151  	}
   152  
   153  	// clear the running task on the host now that the task has finished
   154  	if err = currentHost.ClearRunningTask(t.Id, time.Now()); err != nil {
   155  		message := fmt.Errorf("error clearing running task %s for host %s : %v", t.Id, currentHost.Id, err)
   156  		grip.Errorf(message.Error())
   157  		as.LoggedError(w, r, http.StatusInternalServerError, message)
   158  		return
   159  	}
   160  
   161  	// task cost calculations have no impact on task results, so do them in their own goroutine
   162  	go as.updateTaskCost(t, currentHost, finishTime)
   163  
   164  	if t.Requester != evergreen.PatchVersionRequester {
   165  		grip.Infoln("Processing alert triggers for task", t.Id)
   166  		err = alerts.RunTaskFailureTriggers(t.Id)
   167  		grip.ErrorWhenf(err != nil, "processing alert triggers for task %s: %+v", t.Id, err)
   168  	}
   169  	// TODO(EVG-223) process patch-specific triggers
   170  
   171  	// update the bookkeeping entry for the task
   172  	err = bookkeeping.UpdateExpectedDuration(t, t.TimeTaken)
   173  	if err != nil {
   174  		grip.Errorln("Error updating expected duration:", err)
   175  	}
   176  	taskRunnerInstance := taskrunner.NewTaskRunner(&as.Settings)
   177  	agentRevision, err := taskRunnerInstance.HostGateway.GetAgentRevision()
   178  	if err != nil {
   179  		grip.Errorf("error getting current agent revision %+v", err)
   180  		as.WriteJSON(w, http.StatusInternalServerError, err)
   181  		return
   182  	}
   183  
   184  	shouldExit, message := checkHostHealth(currentHost, agentRevision)
   185  	if shouldExit {
   186  		// set the host's last communication time to be zero
   187  		if err := currentHost.ResetLastCommunicated(); err != nil {
   188  			grip.Errorf("error resetting last communication time for host %s: %+v", currentHost.Id, err)
   189  			as.WriteJSON(w, http.StatusInternalServerError, err)
   190  			return
   191  		}
   192  		endTaskResp.ShouldExit = true
   193  		endTaskResp.Message = message
   194  	}
   195  
   196  	grip.Infof("Successfully marked task %s as finished", t.Id)
   197  	as.WriteJSON(w, http.StatusOK, endTaskResp)
   198  
   199  }
   200  
   201  // updateTaskCost determines a task's cost based on the host it ran on. Hosts that
   202  // are unable to calculate their own costs will not set a task's Cost field. Errors
   203  // are logged but not returned, since any number of API failures could happen and
   204  // we shouldn't sacrifice a task's status for them.
   205  func (as *APIServer) updateTaskCost(t *task.Task, h *host.Host, finishTime time.Time) {
   206  	manager, err := providers.GetCloudManager(h.Provider, &as.Settings)
   207  	if err != nil {
   208  		grip.Errorf("Error loading provider for host %s cost calculation: %+v", t.HostId, err)
   209  		return
   210  	}
   211  	if calc, ok := manager.(cloud.CloudCostCalculator); ok {
   212  		grip.Infoln("Calculating cost for task:", t.Id)
   213  		cost, err := calc.CostForDuration(h, t.StartTime, finishTime)
   214  		if err != nil {
   215  			grip.Errorf("calculating cost for task %s: %+v ", t.Id, err)
   216  			return
   217  		}
   218  		if err := t.SetCost(cost); err != nil {
   219  			grip.Errorf("Error updating cost for task %s: %+v ", t.Id, err)
   220  			return
   221  		}
   222  	}
   223  }
   224  
   225  // assignNextAvailableTask gets the next task from the queue and sets the running task field
   226  // of currentHost.
   227  func assignNextAvailableTask(taskQueue *model.TaskQueue, currentHost *host.Host) (*task.Task, error) {
   228  	if currentHost.RunningTask != "" {
   229  		return nil, errors.Errorf("Error host %v must have an unset running task field but has running task %v",
   230  			currentHost.Id, currentHost.RunningTask)
   231  	}
   232  	// only proceed if there are pending tasks left
   233  	for !taskQueue.IsEmpty() {
   234  		nextTaskId := taskQueue.NextTask().Id
   235  
   236  		nextTask, err := task.FindOne(task.ById(nextTaskId))
   237  		if err != nil {
   238  			return nil, err
   239  		}
   240  		if nextTask == nil {
   241  			return nil, errors.New("nil task on the queue")
   242  		}
   243  
   244  		// dequeue the task from the queue
   245  		if err = taskQueue.DequeueTask(nextTask.Id); err != nil {
   246  			return nil, errors.Wrapf(err,
   247  				"error pulling task with id %v from queue for distro %v",
   248  				nextTask.Id, nextTask.DistroId)
   249  		}
   250  
   251  		// validate that the task can be run, if not fetch the next one in
   252  		// the queue.
   253  		if !nextTask.IsDispatchable() {
   254  			grip.Warningf("Skipping task %s, which was "+
   255  				"picked up to be run but is not runnable - "+
   256  				"status (%s) activated (%t)", nextTask.Id, nextTask.Status,
   257  				nextTask.Activated)
   258  			continue
   259  		}
   260  		// attempt to update the host. TODO: double check Last task completed thing...
   261  		// TODO: get rid of last task completed field in update running task.
   262  		ok, err := currentHost.UpdateRunningTask(currentHost.LastTaskCompleted, nextTaskId, time.Now())
   263  
   264  		if err != nil {
   265  			return nil, errors.WithStack(err)
   266  		}
   267  		if !ok {
   268  			continue
   269  		}
   270  		return nextTask, nil
   271  	}
   272  	return nil, nil
   273  }
   274  
   275  // NextTask retrieves the next task's id given the host name and host secret by retrieving the task queue
   276  // and popping the next task off the task queue.
   277  func (as *APIServer) NextTask(w http.ResponseWriter, r *http.Request) {
   278  	h := MustHaveHost(r)
   279  	response := apimodels.NextTaskResponse{
   280  		ShouldExit: false,
   281  	}
   282  
   283  	taskRunnerInstance := taskrunner.NewTaskRunner(&as.Settings)
   284  	// check host health before getting next task
   285  	agentRevision, err := taskRunnerInstance.HostGateway.GetAgentRevision()
   286  	if err != nil {
   287  		grip.Errorf("error getting current agent revision %+v", err)
   288  		as.WriteJSON(w, http.StatusInternalServerError, err)
   289  		return
   290  	}
   291  
   292  	shouldExit, message := checkHostHealth(h, agentRevision)
   293  	if shouldExit {
   294  		// set the host's last communication time to be zero
   295  		if err = h.ResetLastCommunicated(); err != nil {
   296  			grip.Errorf("error resetting last communication time for host %s: %+v", h.Id, err)
   297  			as.WriteJSON(w, http.StatusInternalServerError, err)
   298  			return
   299  		}
   300  		response.ShouldExit = true
   301  		response.Message = message
   302  		as.WriteJSON(w, http.StatusOK, response)
   303  		return
   304  	}
   305  
   306  	// if there is already a task assigned to the host send back that task
   307  	if h.RunningTask != "" {
   308  		var t *task.Task
   309  		t, err = task.FindOne(task.ById(h.RunningTask))
   310  		if err != nil {
   311  			err = errors.WithStack(err)
   312  			grip.Error(err)
   313  			as.WriteJSON(w, http.StatusInternalServerError,
   314  				errors.Wrapf(err, "error getting running task %s", h.RunningTask))
   315  			return
   316  		}
   317  
   318  		// if the task can be dispatched and activated dispatch it
   319  		if t.IsDispatchable() {
   320  			err = errors.WithStack(model.MarkTaskDispatched(t, h.Id, h.Distro.Id))
   321  			if err != nil {
   322  				grip.Error(err)
   323  				as.WriteJSON(w, http.StatusInternalServerError,
   324  					errors.Wrapf(err, "error while marking task %s as dispatched for host %s", t.Id, h.Id))
   325  				return
   326  			}
   327  		}
   328  		// if the task is activated return that task
   329  		if t.Activated {
   330  			response.TaskId = t.Id
   331  			response.TaskSecret = t.Secret
   332  			as.WriteJSON(w, http.StatusOK, response)
   333  			return
   334  		}
   335  		// the task is not activated so the host's running task should be unset
   336  		// so it can retrieve a new task.
   337  		if err = h.ClearRunningTask(h.LastTaskCompleted, time.Now()); err != nil {
   338  			err = errors.WithStack(err)
   339  			grip.Error(err)
   340  			as.WriteJSON(w, http.StatusInternalServerError, err)
   341  			return
   342  		}
   343  
   344  		// return an empty
   345  		grip.Infof("Unset running task field for inactive task %s on host %s", t.Id, h.Id)
   346  		as.WriteJSON(w, http.StatusOK, response)
   347  		return
   348  	}
   349  
   350  	// retrieve the next task off the task queue and attempt to assign it to the host.
   351  	// If there is already a host that has the task, it will error
   352  	taskQueue, err := model.FindTaskQueueForDistro(h.Distro.Id)
   353  	if err != nil {
   354  		err = errors.Wrapf(err, "Error locating distro queue (%v) for host '%v'", h.Distro.Id, h.Id)
   355  		grip.Error(err)
   356  		as.WriteJSON(w, http.StatusBadRequest, err)
   357  		return
   358  	}
   359  	if taskQueue == nil {
   360  		message = fmt.Sprintf("Nil task queue found for task '%v's distro queue - '%v'",
   361  			h.Id, h.Distro.Id)
   362  		grip.Info(message)
   363  		response.Message = message
   364  		as.WriteJSON(w, http.StatusOK, response)
   365  		return
   366  	}
   367  	// assign the task to a host and retrieve the task
   368  	nextTask, err := assignNextAvailableTask(taskQueue, h)
   369  	if err != nil {
   370  		err = errors.WithStack(err)
   371  		grip.Error(err)
   372  		as.WriteJSON(w, http.StatusBadRequest, err)
   373  		return
   374  	}
   375  	if nextTask == nil {
   376  		// if the task is empty, still send it with an status ok and check it on the other side
   377  		grip.Infof("no task to assign host %v", h.Id)
   378  		as.WriteJSON(w, http.StatusOK, response)
   379  		return
   380  	}
   381  
   382  	// mark the task as dispatched
   383  	if err := model.MarkTaskDispatched(nextTask, h.Id, h.Distro.Id); err != nil {
   384  		err = errors.WithStack(err)
   385  		grip.Error(err)
   386  		as.WriteJSON(w, http.StatusInternalServerError, err)
   387  		return
   388  	}
   389  	response.TaskId = nextTask.Id
   390  	response.TaskSecret = nextTask.Secret
   391  	grip.Infof("assigned task %s to host %s", nextTask.Id, h.Id)
   392  	as.WriteJSON(w, http.StatusOK, response)
   393  }