github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/alerts/project_triggers.go (about)

     1  package alerts
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/evergreen-ci/evergreen"
     7  	"github.com/evergreen-ci/evergreen/model"
     8  	"github.com/evergreen-ci/evergreen/model/alertrecord"
     9  	"github.com/evergreen-ci/evergreen/model/task"
    10  	"github.com/pkg/errors"
    11  )
    12  
    13  /* Task trigger Implementations */
    14  
    15  // TaskFailed is a trigger that queues an alert whenever a task fails, regardless of any alerts
    16  // generated by previous runs of the task or other tasks within the version/variant/task type.
    17  type TaskFailed struct{}
    18  
    19  func (tf TaskFailed) Id() string        { return alertrecord.TaskFailedId }
    20  func (trig TaskFailed) Display() string { return "any task fails" }
    21  
    22  func (trig TaskFailed) ShouldExecute(ctx triggerContext) (bool, error) {
    23  	if ctx.task.Status != evergreen.TaskFailed {
    24  		return false, nil
    25  	}
    26  	return true, nil
    27  }
    28  
    29  func (trig TaskFailed) CreateAlertRecord(_ triggerContext) *alertrecord.AlertRecord { return nil }
    30  
    31  // FirstFailureInVersion is a trigger that queues an alert whenever a task fails for the first time
    32  // within a version. After one failure has triggered an alert for this event, subsequent failures
    33  // will not trigger additional alerts.
    34  type FirstFailureInVersion struct{}
    35  
    36  func (trig FirstFailureInVersion) Id() string      { return alertrecord.FirstVersionFailureId }
    37  func (trig FirstFailureInVersion) Display() string { return "the first task failure occurs" }
    38  func (trig FirstFailureInVersion) CreateAlertRecord(ctx triggerContext) *alertrecord.AlertRecord {
    39  	return newAlertRecord(ctx, alertrecord.FirstVersionFailureId)
    40  }
    41  
    42  func (trig FirstFailureInVersion) ShouldExecute(ctx triggerContext) (bool, error) {
    43  	if ctx.task.Status != evergreen.TaskFailed {
    44  		return false, nil
    45  	}
    46  	rec, err := alertrecord.FindOne(alertrecord.ByFirstFailureInVersion(ctx.task.Project, ctx.task.Version))
    47  	if err != nil {
    48  		return false, err
    49  	}
    50  	return rec == nil, nil
    51  }
    52  
    53  // FirstFailureInVersion is a trigger that queues an alert whenever a task fails for the first time
    54  // within a variant. After one failure has triggered an alert for this event, subsequent failures
    55  // will not trigger additional alerts.
    56  type FirstFailureInVariant struct{}
    57  
    58  func (trig FirstFailureInVariant) Id() string { return alertrecord.FirstVariantFailureId }
    59  func (trig FirstFailureInVariant) Display() string {
    60  	return "the first failure within each variant occurs"
    61  }
    62  func (trig FirstFailureInVariant) CreateAlertRecord(ctx triggerContext) *alertrecord.AlertRecord {
    63  	return newAlertRecord(ctx, alertrecord.FirstVariantFailureId)
    64  }
    65  func (trig FirstFailureInVariant) ShouldExecute(ctx triggerContext) (bool, error) {
    66  	if ctx.task.Status != evergreen.TaskFailed {
    67  		return false, nil
    68  	}
    69  	rec, err := alertrecord.FindOne(alertrecord.ByFirstFailureInVariant(ctx.task.Version, ctx.task.BuildVariant))
    70  	if err != nil {
    71  		return false, nil
    72  	}
    73  	return rec == nil, nil
    74  }
    75  
    76  // FirstFailureInVersion is a trigger that queues an alert whenever a task fails for the first time
    77  // for a task of a given name within a version. For example:
    78  // "compile" fails on linux-64;   ShouldExecute returns true
    79  // "compile" fails on windows;    ShouldExecute returns false because one was already sent for compile.
    80  // "unit-tests" fails on windows; ShouldExecute returns true because nothing was sent yet for unit-tests.
    81  // "unit-tests" fails on linux-64; ShouldExecute returns false
    82  type FirstFailureInTaskType struct{}
    83  
    84  func (trig FirstFailureInTaskType) Id() string { return alertrecord.FirstTaskTypeFailureId }
    85  func (trig FirstFailureInTaskType) Display() string {
    86  	return "the first failure for each task name occurs"
    87  }
    88  func (trig FirstFailureInTaskType) CreateAlertRecord(ctx triggerContext) *alertrecord.AlertRecord {
    89  	return newAlertRecord(ctx, alertrecord.FirstTaskTypeFailureId)
    90  }
    91  func (trig FirstFailureInTaskType) ShouldExecute(ctx triggerContext) (bool, error) {
    92  	if ctx.task.Status != evergreen.TaskFailed {
    93  		return false, nil
    94  	}
    95  	rec, err := alertrecord.FindOne(alertrecord.ByFirstFailureInTaskType(ctx.task.Version, ctx.task.DisplayName))
    96  	if err != nil {
    97  		return false, nil
    98  	}
    99  	return rec == nil, nil
   100  }
   101  
   102  // TaskFailTransition is a trigger that queues an alert iff the following conditions are met:
   103  // 1) A task fails and the previous completion of this task on the same variant was passing or
   104  // the task has never run before
   105  // 2) The most recent alert for this trigger, if existing, was stored when the 'last passing task'
   106  // at the time was older than the 'last passing task' for the newly failed task.
   107  // 3) The previous run was a failure, and there has been Multipler*Batchtime time since
   108  // the previous alert was sent.
   109  type TaskFailTransition struct{}
   110  
   111  // failureLimitMultiplier is a magic scalar for determining how often to resend transition failures.
   112  // If a failure reoccurs after 3*batchTime amount of time, we will resend transition emails.
   113  const failureLimitMultiplier = 3
   114  
   115  func (trig TaskFailTransition) Id() string { return alertrecord.TaskFailTransitionId }
   116  func (trig TaskFailTransition) Display() string {
   117  	return "a previously passing task fails"
   118  }
   119  func (trig TaskFailTransition) ShouldExecute(ctx triggerContext) (bool, error) {
   120  	if ctx.task.Status != evergreen.TaskFailed {
   121  		return false, nil
   122  	}
   123  	if ctx.previousCompleted == nil {
   124  		return true, nil
   125  	}
   126  	if ctx.previousCompleted.Status == evergreen.TaskSucceeded {
   127  		// the task transitioned to failure - but we will only trigger an alert if we haven't recorded
   128  		// a sent alert for a transition after the same previously passing task.
   129  		q := alertrecord.ByLastFailureTransition(ctx.task.DisplayName, ctx.task.BuildVariant, ctx.task.Project)
   130  		lastAlerted, err := alertrecord.FindOne(q)
   131  		if err != nil {
   132  			return false, err
   133  		}
   134  
   135  		if lastAlerted == nil || (lastAlerted.RevisionOrderNumber < ctx.previousCompleted.RevisionOrderNumber) {
   136  			// Either this alert has never been triggered before, or it was triggered for a
   137  			// transition from failure after an older success than this one - so we need to
   138  			// execute this trigger again.
   139  			return true, nil
   140  		}
   141  	}
   142  	if ctx.previousCompleted.Status == evergreen.TaskFailed {
   143  		// check if enough time has passed since our last transition alert
   144  		q := alertrecord.ByLastFailureTransition(ctx.task.DisplayName, ctx.task.BuildVariant, ctx.task.Project)
   145  		lastAlerted, err := alertrecord.FindOne(q)
   146  		if err != nil {
   147  			return false, err
   148  		}
   149  		if lastAlerted == nil || lastAlerted.TaskId == "" {
   150  			return false, nil
   151  		}
   152  		return reachedFailureLimit(lastAlerted.TaskId)
   153  	}
   154  	return false, nil
   155  }
   156  
   157  func (trig TaskFailTransition) CreateAlertRecord(ctx triggerContext) *alertrecord.AlertRecord {
   158  	rec := newAlertRecord(ctx, alertrecord.TaskFailTransitionId)
   159  	// For pass/fail transition bookkeeping, we store the revision order number of the
   160  	// previous (passing) task, not the currently passing task.
   161  	rec.RevisionOrderNumber = -1
   162  	if ctx.previousCompleted != nil {
   163  		rec.RevisionOrderNumber = ctx.previousCompleted.RevisionOrderNumber
   164  	}
   165  	return rec
   166  }
   167  
   168  // reachedFailureLimit returns true if task for the previous failure transition alert
   169  // happened too long ago, as determined by some magic math.
   170  func reachedFailureLimit(taskId string) (bool, error) {
   171  	t, err := task.FindOne(task.ById(taskId))
   172  	if err != nil {
   173  		return false, err
   174  	}
   175  	if t == nil {
   176  		return false, errors.Errorf("task %s not found", taskId)
   177  	}
   178  	pr, err := model.FindOneProjectRef(t.Project)
   179  	if err != nil {
   180  		return false, err
   181  	}
   182  	if pr == nil {
   183  		return false, errors.Errorf("project ref %s not found", t.Project)
   184  	}
   185  	p, err := model.FindProject(t.Revision, pr)
   186  	if err != nil {
   187  		return false, err
   188  	}
   189  	if p == nil {
   190  		return false, errors.Errorf("project %v not found for revision %v", t.Project, t.Revision)
   191  	}
   192  	v := p.FindBuildVariant(t.BuildVariant)
   193  	if v == nil {
   194  		return false, errors.Errorf("build variant %v does not exist in project", t.BuildVariant)
   195  	}
   196  	batchTime := pr.GetBatchTime(v)
   197  	reached := time.Since(t.FinishTime) > (time.Duration(batchTime) * time.Minute * failureLimitMultiplier)
   198  	return reached, nil
   199  
   200  }
   201  
   202  type LastRevisionNotFound struct{}
   203  
   204  func (lrnf LastRevisionNotFound) Id() string      { return alertrecord.TaskFailedId }
   205  func (lrnf LastRevisionNotFound) Display() string { return "any task fails" }
   206  
   207  func (lrnf LastRevisionNotFound) ShouldExecute(ctx triggerContext) (bool, error) {
   208  	if ctx.task.Status != evergreen.TaskFailed {
   209  		return false, nil
   210  	}
   211  	return true, nil
   212  }
   213  
   214  func (lrnf LastRevisionNotFound) CreateAlertRecord(ctx triggerContext) *alertrecord.AlertRecord {
   215  	rec := newAlertRecord(ctx, alertrecord.LastRevisionNotFound)
   216  	return rec
   217  }