go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/engine/triage_test.go (about) 1 // Copyright 2017 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package engine 16 17 import ( 18 "context" 19 "strings" 20 "testing" 21 "time" 22 23 "google.golang.org/protobuf/types/known/timestamppb" 24 25 "go.chromium.org/luci/gae/filter/featureBreaker" 26 "go.chromium.org/luci/gae/service/datastore" 27 28 "go.chromium.org/luci/common/clock" 29 "go.chromium.org/luci/common/clock/testclock" 30 "go.chromium.org/luci/scheduler/appengine/engine/policy" 31 "go.chromium.org/luci/scheduler/appengine/internal" 32 "go.chromium.org/luci/scheduler/appengine/messages" 33 "go.chromium.org/luci/scheduler/appengine/task" 34 35 . "github.com/smartystreets/goconvey/convey" 36 37 . "go.chromium.org/luci/common/testing/assertions" 38 ) 39 40 func TestTriageOp(t *testing.T) { 41 t.Parallel() 42 43 Convey("with fake env", t, func() { 44 c := newTestContext(epoch) 45 tb := triageTestBed{maxAllowedTriggers: 1000} 46 47 Convey("noop triage", func() { 48 before := &Job{ 49 JobID: "job", 50 Enabled: true, 51 ActiveInvocations: []int64{1, 2, 3}, 52 } 53 after, err := tb.runTestTriage(c, before) 54 So(err, ShouldBeNil) 55 56 // Only LastTriage timestamp is bumped. 57 expected := *before 58 expected.LastTriage = epoch 59 So(after, ShouldResemble, &expected) 60 61 // Have some triage log. 62 job := JobTriageLog{JobID: "job"} 63 So(datastore.Get(c, &job), ShouldBeNil) 64 So(job, ShouldResemble, JobTriageLog{ 65 JobID: "job", 66 LastTriage: epoch, 67 DebugLog: strings.Join([]string{ 68 "[000 ms] Starting", 69 "[000 ms] Pending triggers set: 0 items, 0 garbage", 70 "[000 ms] Recently finished set: 0 items, 0 garbage", 71 "[000 ms] The preparation is finished", 72 "[000 ms] Started the transaction", 73 "[000 ms] Number of active invocations: 3", 74 "[000 ms] Number of recently finished: 0", 75 "[000 ms] Triggers available in this txn: 0", 76 "[000 ms] Invoking the triggering policy function", 77 "[000 ms] The policy requested 0 new invocations", 78 "[000 ms] Removing consumed dsset items", 79 "[000 ms] Landing the transaction", 80 "[000 ms] Done", 81 "", 82 }, "\n"), 83 }) 84 }) 85 86 Convey("pops finished invocations", func() { 87 before := &Job{ 88 JobID: "job", 89 Enabled: true, 90 ActiveInvocations: []int64{1, 2, 3, 4, 5}, 91 } 92 recentlyFinishedSet(c, before.JobID).Add(c, []int64{1, 2, 4}) 93 94 // We record the triage time inside FinishedInvocation.Finished, not when 95 // the invocations was actually finished. Usually it shouldn't matter, 96 // since the difference is only a couple of seconds (due to TQ delays). 97 clock.Get(c).(testclock.TestClock).Add(15 * time.Second) 98 expectedFinishedTS := epoch.Add(15 * time.Second) 99 100 after, err := tb.runTestTriage(c, before) 101 So(err, ShouldBeNil) 102 So(after.ActiveInvocations, ShouldResemble, []int64{3, 5}) 103 104 // FinishedInvocationsRaw has the finished invocations. 105 finishedRaw := after.FinishedInvocationsRaw 106 finished, err := unmarshalFinishedInvs(finishedRaw) 107 So(err, ShouldBeNil) 108 So(finished, ShouldResemble, []*internal.FinishedInvocation{ 109 {InvocationId: 1, Finished: timestamppb.New(expectedFinishedTS)}, 110 {InvocationId: 2, Finished: timestamppb.New(expectedFinishedTS)}, 111 {InvocationId: 4, Finished: timestamppb.New(expectedFinishedTS)}, 112 }) 113 114 // Some time later, they are still there. 115 clock.Get(c).(testclock.TestClock).Add(FinishedInvocationsHorizon - time.Second) 116 after, err = tb.runTestTriage(c, after) 117 So(err, ShouldBeNil) 118 So(after.FinishedInvocationsRaw, ShouldResemble, finishedRaw) 119 120 // But eventually the get kicked out. 121 clock.Get(c).(testclock.TestClock).Add(2 * time.Second) 122 after, err = tb.runTestTriage(c, after) 123 So(err, ShouldBeNil) 124 So(after.FinishedInvocationsRaw, ShouldBeNil) 125 }) 126 127 Convey("processes triggers", func() { 128 triggers := []*internal.Trigger{ 129 { 130 Id: "t0", 131 Created: timestamppb.New(epoch.Add(20 * time.Second)), 132 }, 133 { 134 Id: "t1", 135 Created: timestamppb.New(epoch.Add(20 * time.Second)), 136 }, 137 { 138 Id: "t2", 139 Created: timestamppb.New(epoch.Add(10 * time.Second)), 140 }, 141 { 142 Id: "a_t3", // to make its ID be before t0 and t1 143 Created: timestamppb.New(epoch.Add(20 * time.Second)), 144 }, 145 } 146 147 before := &Job{ 148 JobID: "job", 149 Enabled: true, 150 } 151 pendingTriggersSet(c, before.JobID).Add(c, triggers) 152 153 // Cycle 1. Pops triggers, converts them into a bunch of invocations. 154 // Triggers are in sorted order! 155 after, err := tb.runTestTriage(c, before) 156 So(err, ShouldBeNil) 157 So(after.ActiveInvocations, ShouldResemble, []int64{1, 2, 3, 4}) 158 So(tb.requests, ShouldHaveLength, 4) 159 So(tb.requests[0].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[2]}) 160 So(tb.requests[1].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[3]}) 161 So(tb.requests[2].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[0]}) 162 So(tb.requests[3].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[1]}) 163 tb.requests = nil 164 165 // Cycle 2. Nothing new. 166 after, err = tb.runTestTriage(c, after) 167 So(err, ShouldBeNil) 168 So(after.ActiveInvocations, ShouldResemble, []int64{1, 2, 3, 4}) 169 So(tb.requests, ShouldBeNil) 170 }) 171 172 Convey("ignores triggers if paused", func() { 173 triggers := []*internal.Trigger{ 174 { 175 Id: "t0", 176 Created: timestamppb.New(epoch.Add(20 * time.Second)), 177 }, 178 { 179 Id: "t1", 180 Created: timestamppb.New(epoch.Add(20 * time.Second)), 181 }, 182 } 183 184 pendingTriggersSet(c, "job").Add(c, triggers) 185 186 // Just pops and discards triggers without starting anything. 187 after, err := tb.runTestTriage(c, &Job{ 188 JobID: "job", 189 Enabled: true, 190 Paused: true, 191 }) 192 So(err, ShouldBeNil) 193 So(after.ActiveInvocations, ShouldHaveLength, 0) 194 So(tb.requests, ShouldHaveLength, 0) 195 196 // No triggers there anymore. 197 listing, err := pendingTriggersSet(c, "job").List(c) 198 So(err, ShouldBeNil) 199 So(listing.Items, ShouldHaveLength, 0) 200 }) 201 202 Convey("drops excessive triggers", func() { 203 tb.maxAllowedTriggers = 1 204 205 triggers := []*internal.Trigger{ 206 { 207 Id: "most-recent", 208 Created: timestamppb.New(epoch.Add(3 * time.Second)), 209 }, 210 { 211 Id: "dropped-1", 212 Created: timestamppb.New(epoch.Add(2 * time.Second)), 213 }, 214 { 215 Id: "dropped-2", 216 Created: timestamppb.New(epoch.Add(1 * time.Second)), 217 }, 218 } 219 220 pendingTriggersSet(c, "job").Add(c, triggers) 221 222 // Only the most recent trigger will be kept alive and converted into 223 // an invocation. 224 after, err := tb.runTestTriage(c, &Job{ 225 JobID: "job", 226 Enabled: true, 227 }) 228 So(err, ShouldBeNil) 229 So(after.ActiveInvocations, ShouldHaveLength, 1) 230 So(tb.requests, ShouldHaveLength, 1) 231 So(tb.requests[0].IncomingTriggers, ShouldResembleProto, []*internal.Trigger{triggers[0]}) 232 233 // No triggers there anymore. 234 listing, err := pendingTriggersSet(c, "job").List(c) 235 So(err, ShouldBeNil) 236 So(listing.Items, ShouldHaveLength, 0) 237 }) 238 239 Convey("doesn't touch ds sets if txn fails to land", func() { 240 triggers := []*internal.Trigger{ 241 { 242 Id: "t0", 243 Created: timestamppb.New(epoch.Add(20 * time.Second)), 244 }, 245 { 246 Id: "t1", 247 Created: timestamppb.New(epoch.Add(20 * time.Second)), 248 }, 249 } 250 pendingTriggersSet(c, "job").Add(c, triggers) 251 252 brokenC, fb := featureBreaker.FilterRDS(c, nil) 253 fb.BreakFeatures(nil, "CommitTransaction") 254 255 after, err := tb.runTestTriage(brokenC, &Job{ 256 JobID: "job", 257 Enabled: true, 258 }) 259 So(err, ShouldNotBeNil) 260 So(after.ActiveInvocations, ShouldHaveLength, 0) 261 262 // Triggers are still there. 263 listing, err := pendingTriggersSet(c, "job").List(c) 264 So(err, ShouldBeNil) 265 So(listing.Items, ShouldHaveLength, 2) 266 }) 267 268 Convey("discards triggers", func() { 269 tb.policyDiscardsTriggers = true 270 271 triggers := []*internal.Trigger{ 272 { 273 Id: "t0", 274 Created: timestamppb.New(epoch.Add(20 * time.Second)), 275 }, 276 { 277 Id: "t1", 278 Created: timestamppb.New(epoch.Add(20 * time.Second)), 279 }, 280 } 281 282 before := &Job{ 283 JobID: "job", 284 Schedule: "with 10m interval", // This needs to be set, since the engine will poke cron. 285 Enabled: true, 286 } 287 pendingTriggersSet(c, before.JobID).Add(c, triggers) 288 289 // Cycle 1. Pops triggers and discards them. 290 after, err := tb.runTestTriage(c, before) 291 So(err, ShouldBeNil) 292 So(after.ActiveInvocations, ShouldHaveLength, 0) 293 So(tb.requests, ShouldHaveLength, 0) 294 295 // Cycle 2. Nothing new. 296 after, err = tb.runTestTriage(c, after) 297 So(err, ShouldBeNil) 298 So(after.ActiveInvocations, ShouldHaveLength, 0) 299 So(tb.requests, ShouldHaveLength, 0) 300 }) 301 }) 302 } 303 304 type triageTestBed struct { 305 // Inputs. 306 maxAllowedTriggers int 307 policyDiscardsTriggers bool 308 309 // Outputs. 310 nextInvID int64 311 requests []task.Request 312 } 313 314 // runTestTriage runs the triage operation and refetches the job after it. 315 // 316 // On triage errors, both 'after' and 'err' are returned. 317 func (t *triageTestBed) runTestTriage(c context.Context, before *Job) (after *Job, err error) { 318 if err = datastore.Put(c, before); err != nil { 319 return nil, err 320 } 321 322 op := triageOp{ 323 jobID: before.JobID, 324 policyFactory: func(*messages.TriggeringPolicy) (policy.Func, error) { 325 return func(env policy.Environment, in policy.In) (out policy.Out) { 326 if t.policyDiscardsTriggers { 327 out.Discard = append(out.Discard, in.Triggers...) 328 } else { 329 for _, t := range in.Triggers { 330 out.Requests = append(out.Requests, task.Request{ 331 IncomingTriggers: []*internal.Trigger{t}, 332 }) 333 } 334 } 335 return 336 }, nil 337 }, 338 enqueueInvocations: func(c context.Context, job *Job, req []task.Request) error { 339 t.requests = append(t.requests, req...) 340 for range req { 341 t.nextInvID++ 342 job.ActiveInvocations = append(job.ActiveInvocations, t.nextInvID) 343 } 344 return nil 345 }, 346 maxAllowedTriggers: t.maxAllowedTriggers, 347 } 348 349 if err = op.prepare(c); err == nil { 350 err = runTxn(c, func(c context.Context) error { 351 job := Job{JobID: op.jobID} 352 if err := datastore.Get(c, &job); err != nil { 353 return err 354 } 355 if err := op.transaction(c, &job); err != nil { 356 return err 357 } 358 return datastore.Put(c, &job) 359 }) 360 } 361 op.finalize(c, err == nil) 362 363 after = &Job{JobID: before.JobID} 364 if getErr := datastore.Get(c, after); getErr != nil { 365 panic("must not happen") 366 } 367 return after, err 368 }