github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/monitor/tasks.go (about)

     1  package monitor
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/evergreen-ci/evergreen"
     7  	"github.com/evergreen-ci/evergreen/apimodels"
     8  	"github.com/evergreen-ci/evergreen/model"
     9  	"github.com/evergreen-ci/evergreen/model/host"
    10  	"github.com/evergreen-ci/evergreen/model/task"
    11  	"github.com/mongodb/grip"
    12  	"github.com/pkg/errors"
    13  )
    14  
    15  // responsible for cleaning up any tasks that need to be stopped
    16  type TaskMonitor struct {
    17  	// will be used for flagging tasks that need to be cleaned up
    18  	flaggingFuncs []taskFlaggingFunc
    19  }
    20  
    21  // run through the list of task flagging functions, finding all tasks that
    22  // need to be cleaned up and taking appropriate action. takes in a map
    23  // of project name -> project info
    24  func (tm *TaskMonitor) CleanupTasks(projects map[string]model.Project) []error {
    25  	grip.Info("Cleaning up tasks...")
    26  
    27  	// used to store any errors that occur
    28  	var errs []error
    29  
    30  	for _, f := range tm.flaggingFuncs {
    31  		// find the next batch of tasks to be cleaned up
    32  		tasksToCleanUp, err := f()
    33  
    34  		// continue on error so that one wonky flagging function doesn't
    35  		// stop others from working
    36  		if err != nil {
    37  			errs = append(errs, errors.Wrap(err, "error finding tasks to be cleaned up"))
    38  			continue
    39  		}
    40  
    41  		// clean up all of the tasks. continue on error to allow further cleanup
    42  		// to progress
    43  		if errs = cleanUpTasks(tasksToCleanUp, projects); errs != nil {
    44  			for _, err := range errs {
    45  				errs = append(errs, errors.Wrap(err, "error cleaning up tasks"))
    46  			}
    47  		}
    48  	}
    49  
    50  	grip.Info("Done cleaning up tasks")
    51  
    52  	return errs
    53  }
    54  
    55  // clean up the passed-in slice of tasks
    56  func cleanUpTasks(taskWrappers []doomedTaskWrapper, projects map[string]model.Project) []error {
    57  	grip.Infof("Cleaning up %d tasks...", len(taskWrappers))
    58  
    59  	// used to store any errors that occur
    60  	var errs []error
    61  
    62  	for _, wrapper := range taskWrappers {
    63  		grip.Infof("Cleaning up task %s, for reason '%s'", wrapper.task.Id, wrapper.reason)
    64  
    65  		// clean up the task. continue on error to let others be cleaned up
    66  		if err := cleanUpTask(wrapper, projects); err != nil {
    67  			errs = append(errs, errors.Wrapf(err,
    68  				"error cleaning up task %v", wrapper.task.Id))
    69  			continue
    70  		}
    71  		grip.Infoln("Successfully cleaned up task", wrapper.task.Id)
    72  	}
    73  
    74  	return errs
    75  }
    76  
    77  // function to clean up a single task
    78  func cleanUpTask(wrapper doomedTaskWrapper, projects map[string]model.Project) error {
    79  
    80  	// find the appropriate project for the task
    81  	project, ok := projects[wrapper.task.Project]
    82  	if !ok {
    83  		return errors.Errorf("could not find project %v for task %v",
    84  			wrapper.task.Project, wrapper.task.Id)
    85  	}
    86  
    87  	// get the host for the task
    88  	host, err := host.FindOne(host.ById(wrapper.task.HostId))
    89  	if err != nil {
    90  		return errors.Wrapf(err, "error finding host %s for task %s",
    91  			wrapper.task.HostId, wrapper.task.Id)
    92  	}
    93  
    94  	// if there's no relevant host, something went wrong
    95  	if host == nil {
    96  		grip.Errorln("no entry found for host:", wrapper.task.HostId)
    97  		return errors.WithStack(wrapper.task.MarkUnscheduled())
    98  	}
    99  
   100  	// if the host still has the task as its running task, clear it.
   101  	if host.RunningTask == wrapper.task.Id {
   102  		// clear out the host's running task
   103  		if err = host.ClearRunningTask(wrapper.task.Id, time.Now()); err != nil {
   104  			return errors.Wrapf(err, "error clearing running task %v from host %v: %v",
   105  				wrapper.task.Id, host.Id)
   106  		}
   107  	}
   108  
   109  	// take different action, depending on the type of task death
   110  	switch wrapper.reason {
   111  	case HeartbeatTimeout:
   112  		err = cleanUpTimedOutHeartbeat(wrapper.task, project)
   113  	default:
   114  		return errors.Errorf("unknown reason for cleaning up task: %v", wrapper.reason)
   115  	}
   116  
   117  	if err != nil {
   118  		return errors.Wrapf(err, "error cleaning up task %s", wrapper.task.Id)
   119  	}
   120  
   121  	return nil
   122  
   123  }
   124  
   125  // clean up a task whose heartbeat has timed out
   126  func cleanUpTimedOutHeartbeat(t task.Task, project model.Project) error {
   127  	// mock up the failure details of the task
   128  	detail := &apimodels.TaskEndDetail{
   129  		Description: task.AgentHeartbeat,
   130  		TimedOut:    true,
   131  		Status:      evergreen.TaskFailed,
   132  	}
   133  
   134  	// try to reset the task
   135  	if err := model.TryResetTask(t.Id, "", RunnerName, &project, detail); err != nil {
   136  		return errors.Wrapf(err, "error trying to reset task %s", t.Id)
   137  	}
   138  	// success
   139  	return nil
   140  }