go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/engine/triage_test.go (about)

     1  // Copyright 2017 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package engine
    16  
    17  import (
    18  	"context"
    19  	"strings"
    20  	"testing"
    21  	"time"
    22  
    23  	"google.golang.org/protobuf/types/known/timestamppb"
    24  
    25  	"go.chromium.org/luci/gae/filter/featureBreaker"
    26  	"go.chromium.org/luci/gae/service/datastore"
    27  
    28  	"go.chromium.org/luci/common/clock"
    29  	"go.chromium.org/luci/common/clock/testclock"
    30  	"go.chromium.org/luci/scheduler/appengine/engine/policy"
    31  	"go.chromium.org/luci/scheduler/appengine/internal"
    32  	"go.chromium.org/luci/scheduler/appengine/messages"
    33  	"go.chromium.org/luci/scheduler/appengine/task"
    34  
    35  	. "github.com/smartystreets/goconvey/convey"
    36  
    37  	. "go.chromium.org/luci/common/testing/assertions"
    38  )
    39  
    40  func TestTriageOp(t *testing.T) {
    41  	t.Parallel()
    42  
    43  	Convey("with fake env", t, func() {
    44  		c := newTestContext(epoch)
    45  		tb := triageTestBed{maxAllowedTriggers: 1000}
    46  
    47  		Convey("noop triage", func() {
    48  			before := &Job{
    49  				JobID:             "job",
    50  				Enabled:           true,
    51  				ActiveInvocations: []int64{1, 2, 3},
    52  			}
    53  			after, err := tb.runTestTriage(c, before)
    54  			So(err, ShouldBeNil)
    55  
    56  			// Only LastTriage timestamp is bumped.
    57  			expected := *before
    58  			expected.LastTriage = epoch
    59  			So(after, ShouldResemble, &expected)
    60  
    61  			// Have some triage log.
    62  			job := JobTriageLog{JobID: "job"}
    63  			So(datastore.Get(c, &job), ShouldBeNil)
    64  			So(job, ShouldResemble, JobTriageLog{
    65  				JobID:      "job",
    66  				LastTriage: epoch,
    67  				DebugLog: strings.Join([]string{
    68  					"[000 ms] Starting",
    69  					"[000 ms] Pending triggers set:  0 items, 0 garbage",
    70  					"[000 ms] Recently finished set: 0 items, 0 garbage",
    71  					"[000 ms] The preparation is finished",
    72  					"[000 ms] Started the transaction",
    73  					"[000 ms] Number of active invocations: 3",
    74  					"[000 ms] Number of recently finished:  0",
    75  					"[000 ms] Triggers available in this txn: 0",
    76  					"[000 ms] Invoking the triggering policy function",
    77  					"[000 ms] The policy requested 0 new invocations",
    78  					"[000 ms] Removing consumed dsset items",
    79  					"[000 ms] Landing the transaction",
    80  					"[000 ms] Done",
    81  					"",
    82  				}, "\n"),
    83  			})
    84  		})
    85  
    86  		Convey("pops finished invocations", func() {
    87  			before := &Job{
    88  				JobID:             "job",
    89  				Enabled:           true,
    90  				ActiveInvocations: []int64{1, 2, 3, 4, 5},
    91  			}
    92  			recentlyFinishedSet(c, before.JobID).Add(c, []int64{1, 2, 4})
    93  
    94  			// We record the triage time inside FinishedInvocation.Finished, not when
    95  			// the invocations was actually finished. Usually it shouldn't matter,
    96  			// since the difference is only a couple of seconds (due to TQ delays).
    97  			clock.Get(c).(testclock.TestClock).Add(15 * time.Second)
    98  			expectedFinishedTS := epoch.Add(15 * time.Second)
    99  
   100  			after, err := tb.runTestTriage(c, before)
   101  			So(err, ShouldBeNil)
   102  			So(after.ActiveInvocations, ShouldResemble, []int64{3, 5})
   103  
   104  			// FinishedInvocationsRaw has the finished invocations.
   105  			finishedRaw := after.FinishedInvocationsRaw
   106  			finished, err := unmarshalFinishedInvs(finishedRaw)
   107  			So(err, ShouldBeNil)
   108  			So(finished, ShouldResemble, []*internal.FinishedInvocation{
   109  				{InvocationId: 1, Finished: timestamppb.New(expectedFinishedTS)},
   110  				{InvocationId: 2, Finished: timestamppb.New(expectedFinishedTS)},
   111  				{InvocationId: 4, Finished: timestamppb.New(expectedFinishedTS)},
   112  			})
   113  
   114  			// Some time later, they are still there.
   115  			clock.Get(c).(testclock.TestClock).Add(FinishedInvocationsHorizon - time.Second)
   116  			after, err = tb.runTestTriage(c, after)
   117  			So(err, ShouldBeNil)
   118  			So(after.FinishedInvocationsRaw, ShouldResemble, finishedRaw)
   119  
   120  			// But eventually the get kicked out.
   121  			clock.Get(c).(testclock.TestClock).Add(2 * time.Second)
   122  			after, err = tb.runTestTriage(c, after)
   123  			So(err, ShouldBeNil)
   124  			So(after.FinishedInvocationsRaw, ShouldBeNil)
   125  		})
   126  
   127  		Convey("processes triggers", func() {
   128  			triggers := []*internal.Trigger{
   129  				{
   130  					Id:      "t0",
   131  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   132  				},
   133  				{
   134  					Id:      "t1",
   135  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   136  				},
   137  				{
   138  					Id:      "t2",
   139  					Created: timestamppb.New(epoch.Add(10 * time.Second)),
   140  				},
   141  				{
   142  					Id:      "a_t3", // to make its ID be before t0 and t1
   143  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   144  				},
   145  			}
   146  
   147  			before := &Job{
   148  				JobID:   "job",
   149  				Enabled: true,
   150  			}
   151  			pendingTriggersSet(c, before.JobID).Add(c, triggers)
   152  
   153  			// Cycle 1. Pops triggers, converts them into a bunch of invocations.
   154  			// Triggers are in sorted order!
   155  			after, err := tb.runTestTriage(c, before)
   156  			So(err, ShouldBeNil)
   157  			So(after.ActiveInvocations, ShouldResemble, []int64{1, 2, 3, 4})
   158  			So(tb.requests, ShouldHaveLength, 4)
   159  			So(tb.requests[0].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[2]})
   160  			So(tb.requests[1].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[3]})
   161  			So(tb.requests[2].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[0]})
   162  			So(tb.requests[3].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[1]})
   163  			tb.requests = nil
   164  
   165  			// Cycle 2. Nothing new.
   166  			after, err = tb.runTestTriage(c, after)
   167  			So(err, ShouldBeNil)
   168  			So(after.ActiveInvocations, ShouldResemble, []int64{1, 2, 3, 4})
   169  			So(tb.requests, ShouldBeNil)
   170  		})
   171  
   172  		Convey("ignores triggers if paused", func() {
   173  			triggers := []*internal.Trigger{
   174  				{
   175  					Id:      "t0",
   176  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   177  				},
   178  				{
   179  					Id:      "t1",
   180  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   181  				},
   182  			}
   183  
   184  			pendingTriggersSet(c, "job").Add(c, triggers)
   185  
   186  			// Just pops and discards triggers without starting anything.
   187  			after, err := tb.runTestTriage(c, &Job{
   188  				JobID:   "job",
   189  				Enabled: true,
   190  				Paused:  true,
   191  			})
   192  			So(err, ShouldBeNil)
   193  			So(after.ActiveInvocations, ShouldHaveLength, 0)
   194  			So(tb.requests, ShouldHaveLength, 0)
   195  
   196  			// No triggers there anymore.
   197  			listing, err := pendingTriggersSet(c, "job").List(c)
   198  			So(err, ShouldBeNil)
   199  			So(listing.Items, ShouldHaveLength, 0)
   200  		})
   201  
   202  		Convey("drops excessive triggers", func() {
   203  			tb.maxAllowedTriggers = 1
   204  
   205  			triggers := []*internal.Trigger{
   206  				{
   207  					Id:      "most-recent",
   208  					Created: timestamppb.New(epoch.Add(3 * time.Second)),
   209  				},
   210  				{
   211  					Id:      "dropped-1",
   212  					Created: timestamppb.New(epoch.Add(2 * time.Second)),
   213  				},
   214  				{
   215  					Id:      "dropped-2",
   216  					Created: timestamppb.New(epoch.Add(1 * time.Second)),
   217  				},
   218  			}
   219  
   220  			pendingTriggersSet(c, "job").Add(c, triggers)
   221  
   222  			// Only the most recent trigger will be kept alive and converted into
   223  			// an invocation.
   224  			after, err := tb.runTestTriage(c, &Job{
   225  				JobID:   "job",
   226  				Enabled: true,
   227  			})
   228  			So(err, ShouldBeNil)
   229  			So(after.ActiveInvocations, ShouldHaveLength, 1)
   230  			So(tb.requests, ShouldHaveLength, 1)
   231  			So(tb.requests[0].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[0]})
   232  
   233  			// No triggers there anymore.
   234  			listing, err := pendingTriggersSet(c, "job").List(c)
   235  			So(err, ShouldBeNil)
   236  			So(listing.Items, ShouldHaveLength, 0)
   237  		})
   238  
   239  		Convey("doesn't touch ds sets if txn fails to land", func() {
   240  			triggers := []*internal.Trigger{
   241  				{
   242  					Id:      "t0",
   243  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   244  				},
   245  				{
   246  					Id:      "t1",
   247  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   248  				},
   249  			}
   250  			pendingTriggersSet(c, "job").Add(c, triggers)
   251  
   252  			brokenC, fb := featureBreaker.FilterRDS(c, nil)
   253  			fb.BreakFeatures(nil, "CommitTransaction")
   254  
   255  			after, err := tb.runTestTriage(brokenC, &Job{
   256  				JobID:   "job",
   257  				Enabled: true,
   258  			})
   259  			So(err, ShouldNotBeNil)
   260  			So(after.ActiveInvocations, ShouldHaveLength, 0)
   261  
   262  			// Triggers are still there.
   263  			listing, err := pendingTriggersSet(c, "job").List(c)
   264  			So(err, ShouldBeNil)
   265  			So(listing.Items, ShouldHaveLength, 2)
   266  		})
   267  
   268  		Convey("discards triggers", func() {
   269  			tb.policyDiscardsTriggers = true
   270  
   271  			triggers := []*internal.Trigger{
   272  				{
   273  					Id:      "t0",
   274  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   275  				},
   276  				{
   277  					Id:      "t1",
   278  					Created: timestamppb.New(epoch.Add(20 * time.Second)),
   279  				},
   280  			}
   281  
   282  			before := &Job{
   283  				JobID:    "job",
   284  				Schedule: "with 10m interval", // This needs to be set, since the engine will poke cron.
   285  				Enabled:  true,
   286  			}
   287  			pendingTriggersSet(c, before.JobID).Add(c, triggers)
   288  
   289  			// Cycle 1. Pops triggers and discards them.
   290  			after, err := tb.runTestTriage(c, before)
   291  			So(err, ShouldBeNil)
   292  			So(after.ActiveInvocations, ShouldHaveLength, 0)
   293  			So(tb.requests, ShouldHaveLength, 0)
   294  
   295  			// Cycle 2. Nothing new.
   296  			after, err = tb.runTestTriage(c, after)
   297  			So(err, ShouldBeNil)
   298  			So(after.ActiveInvocations, ShouldHaveLength, 0)
   299  			So(tb.requests, ShouldHaveLength, 0)
   300  		})
   301  	})
   302  }
   303  
   304  type triageTestBed struct {
   305  	// Inputs.
   306  	maxAllowedTriggers     int
   307  	policyDiscardsTriggers bool
   308  
   309  	// Outputs.
   310  	nextInvID int64
   311  	requests  []task.Request
   312  }
   313  
   314  // runTestTriage runs the triage operation and refetches the job after it.
   315  //
   316  // On triage errors, both 'after' and 'err' are returned.
   317  func (t *triageTestBed) runTestTriage(c context.Context, before *Job) (after *Job, err error) {
   318  	if err = datastore.Put(c, before); err != nil {
   319  		return nil, err
   320  	}
   321  
   322  	op := triageOp{
   323  		jobID: before.JobID,
   324  		policyFactory: func(*messages.TriggeringPolicy) (policy.Func, error) {
   325  			return func(env policy.Environment, in policy.In) (out policy.Out) {
   326  				if t.policyDiscardsTriggers {
   327  					out.Discard = append(out.Discard, in.Triggers...)
   328  				} else {
   329  					for _, t := range in.Triggers {
   330  						out.Requests = append(out.Requests, task.Request{
   331  							IncomingTriggers: []*internal.Trigger{t},
   332  						})
   333  					}
   334  				}
   335  				return
   336  			}, nil
   337  		},
   338  		enqueueInvocations: func(c context.Context, job *Job, req []task.Request) error {
   339  			t.requests = append(t.requests, req...)
   340  			for range req {
   341  				t.nextInvID++
   342  				job.ActiveInvocations = append(job.ActiveInvocations, t.nextInvID)
   343  			}
   344  			return nil
   345  		},
   346  		maxAllowedTriggers: t.maxAllowedTriggers,
   347  	}
   348  
   349  	if err = op.prepare(c); err == nil {
   350  		err = runTxn(c, func(c context.Context) error {
   351  			job := Job{JobID: op.jobID}
   352  			if err := datastore.Get(c, &job); err != nil {
   353  				return err
   354  			}
   355  			if err := op.transaction(c, &job); err != nil {
   356  				return err
   357  			}
   358  			return datastore.Put(c, &job)
   359  		})
   360  	}
   361  	op.finalize(c, err == nil)
   362  
   363  	after = &Job{JobID: before.JobID}
   364  	if getErr := datastore.Get(c, after); getErr != nil {
   365  		panic("must not happen")
   366  	}
   367  	return after, err
   368  }