go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/reclustering/orchestrator/orchestrator_test.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package orchestrator 16 17 import ( 18 "context" 19 "sort" 20 "strings" 21 "testing" 22 "time" 23 24 "cloud.google.com/go/spanner" 25 "google.golang.org/protobuf/types/known/timestamppb" 26 27 "go.chromium.org/luci/common/clock/testclock" 28 "go.chromium.org/luci/gae/impl/memory" 29 "go.chromium.org/luci/server/span" 30 "go.chromium.org/luci/server/tq" 31 "go.chromium.org/luci/server/tq/tqtesting" 32 33 "go.chromium.org/luci/analysis/internal/clustering/algorithms" 34 "go.chromium.org/luci/analysis/internal/clustering/rules" 35 "go.chromium.org/luci/analysis/internal/clustering/runs" 36 "go.chromium.org/luci/analysis/internal/clustering/shards" 37 "go.chromium.org/luci/analysis/internal/clustering/state" 38 "go.chromium.org/luci/analysis/internal/config" 39 "go.chromium.org/luci/analysis/internal/tasks/taskspb" 40 "go.chromium.org/luci/analysis/internal/testutil" 41 configpb "go.chromium.org/luci/analysis/proto/config" 42 43 _ "go.chromium.org/luci/server/tq/txn/spanner" 44 45 . "github.com/smartystreets/goconvey/convey" 46 . "go.chromium.org/luci/common/testing/assertions" 47 ) 48 49 func TestOrchestrator(t *testing.T) { 50 Convey(`With Spanner Test Database`, t, func() { 51 ctx := testutil.IntegrationTestContext(t) 52 53 // Simulate the Orchestrator job running one second past the hour. 54 startTime := testclock.TestRecentTimeUTC.Truncate(time.Hour).Add(time.Second) 55 ctx, tc := testclock.UseTime(ctx, startTime) 56 57 ctx = memory.Use(ctx) // For config cache. 58 ctx, skdr := tq.TestingContext(ctx, nil) 59 60 cfg := &configpb.Config{ 61 ReclusteringWorkers: 5, 62 } 63 config.SetTestConfig(ctx, cfg) 64 65 testProjects := []string{"project-a", "project-b", "project-c"} 66 67 testOrchestratorDoesNothing := func() { 68 beforeTasks := tasks(skdr) 69 beforeRuns := readRuns(ctx, testProjects) 70 71 err := CronHandler(ctx) 72 So(err, ShouldBeNil) 73 74 afterTasks := tasks(skdr) 75 afterRuns := readRuns(ctx, testProjects) 76 So(afterTasks, ShouldResembleProto, beforeTasks) 77 So(afterRuns, ShouldResemble, beforeRuns) 78 } 79 80 Convey("Without Projects", func() { 81 testutil.MustApply(ctx, 82 spanner.Delete("ClusteringState", spanner.AllKeys())) 83 84 testOrchestratorDoesNothing() 85 }) 86 Convey("With Projects", func() { 87 // Orchestrator only looks at the projects in ClusteringState table. 88 var projectEntries []*state.Entry 89 for _, p := range testProjects { 90 projectEntries = append(projectEntries, state.NewEntry(0).WithProject(p).Build()) 91 } 92 _, err := state.CreateEntriesForTesting(ctx, projectEntries) 93 So(err, ShouldBeNil) 94 95 // Some projects have config. 96 configVersionA := time.Date(2029, time.April, 1, 0, 0, 0, 1, time.UTC) 97 configVersionB := time.Date(2029, time.May, 1, 0, 0, 0, 1, time.UTC) 98 projectCfg := make(map[string]*configpb.ProjectConfig) 99 projectCfg["project-a"] = &configpb.ProjectConfig{ 100 LastUpdated: timestamppb.New(configVersionA), 101 } 102 projectCfg["project-b"] = &configpb.ProjectConfig{ 103 LastUpdated: timestamppb.New(configVersionB), 104 } 105 config.SetTestProjectConfig(ctx, projectCfg) 106 107 // Create chunks in project-b. After this, the row estimates 108 // for the projects should be: 109 // project-a: ~100 110 // project-b: ~450 111 // project-c: ~100 112 var entries []*state.Entry 113 for i := 1; i < 450; i++ { 114 entries = append(entries, state.NewEntry(i).WithProject("project-b").Build()) 115 } 116 _, err = state.CreateEntriesForTesting(ctx, entries) 117 So(err, ShouldBeNil) 118 119 rulesVersionB := time.Date(2020, time.January, 10, 9, 8, 7, 0, time.UTC) 120 rule := rules.NewRule(1).WithProject("project-b").WithPredicateLastUpdateTime(rulesVersionB).Build() 121 err = rules.SetForTesting(ctx, []*rules.Entry{rule}) 122 So(err, ShouldBeNil) 123 124 expectedRunStartTime := tc.Now().Truncate(time.Minute) 125 expectedRunEndTime := expectedRunStartTime.Add(time.Minute) 126 expectedTasks := []*taskspb.ReclusterChunks{ 127 { 128 Project: "project-a", 129 AttemptTime: timestamppb.New(expectedRunEndTime), 130 StartChunkId: "", 131 EndChunkId: state.EndOfTable, 132 State: &taskspb.ReclusterChunkState{ 133 CurrentChunkId: "", 134 NextReportDue: timestamppb.New(expectedRunStartTime), 135 }, 136 ShardNumber: 1, 137 }, 138 { 139 Project: "project-b", 140 AttemptTime: timestamppb.New(expectedRunEndTime), 141 StartChunkId: "", 142 EndChunkId: strings.Repeat("55", 15) + "54", 143 State: &taskspb.ReclusterChunkState{ 144 CurrentChunkId: "", 145 NextReportDue: timestamppb.New(expectedRunStartTime), 146 }, 147 ShardNumber: 2, 148 }, 149 { 150 Project: "project-b", 151 AttemptTime: timestamppb.New(expectedRunEndTime), 152 StartChunkId: strings.Repeat("55", 15) + "54", 153 EndChunkId: strings.Repeat("aa", 15) + "a9", 154 State: &taskspb.ReclusterChunkState{ 155 CurrentChunkId: strings.Repeat("55", 15) + "54", 156 NextReportDue: timestamppb.New(expectedRunStartTime.Add(5 * time.Second / 3)), 157 }, 158 ShardNumber: 3, 159 }, 160 { 161 Project: "project-b", 162 AttemptTime: timestamppb.New(expectedRunEndTime), 163 StartChunkId: strings.Repeat("aa", 15) + "a9", 164 EndChunkId: state.EndOfTable, 165 State: &taskspb.ReclusterChunkState{ 166 CurrentChunkId: strings.Repeat("aa", 15) + "a9", 167 NextReportDue: timestamppb.New(expectedRunStartTime.Add((5 * time.Second * 2) / 3)), 168 }, 169 ShardNumber: 4, 170 }, 171 { 172 Project: "project-c", 173 AttemptTime: timestamppb.New(expectedRunEndTime), 174 StartChunkId: "", 175 EndChunkId: state.EndOfTable, 176 State: &taskspb.ReclusterChunkState{ 177 CurrentChunkId: "", 178 NextReportDue: timestamppb.New(expectedRunStartTime), 179 }, 180 ShardNumber: 5, 181 }, 182 } 183 184 expectedShards := []shards.ReclusteringShard{ 185 { 186 ShardNumber: 1, 187 AttemptTimestamp: expectedRunEndTime, 188 Project: "project-a", 189 Progress: spanner.NullInt64{}, 190 }, 191 { 192 ShardNumber: 2, 193 AttemptTimestamp: expectedRunEndTime, 194 Project: "project-b", 195 Progress: spanner.NullInt64{}, 196 }, 197 { 198 ShardNumber: 3, 199 AttemptTimestamp: expectedRunEndTime, 200 Project: "project-b", 201 Progress: spanner.NullInt64{}, 202 }, 203 { 204 ShardNumber: 4, 205 AttemptTimestamp: expectedRunEndTime, 206 Project: "project-b", 207 Progress: spanner.NullInt64{}, 208 }, 209 { 210 ShardNumber: 5, 211 AttemptTimestamp: expectedRunEndTime, 212 Project: "project-c", 213 Progress: spanner.NullInt64{}, 214 }, 215 } 216 217 expectedRunA := &runs.ReclusteringRun{ 218 Project: "project-a", 219 AttemptTimestamp: expectedRunEndTime, 220 AlgorithmsVersion: algorithms.AlgorithmsVersion, 221 ConfigVersion: configVersionA, 222 RulesVersion: rules.StartingEpoch, 223 ShardCount: 1, 224 ShardsReported: 0, 225 Progress: 0, 226 } 227 expectedRunB := &runs.ReclusteringRun{ 228 Project: "project-b", 229 AttemptTimestamp: expectedRunEndTime, 230 AlgorithmsVersion: algorithms.AlgorithmsVersion, 231 ConfigVersion: configVersionB, 232 RulesVersion: rulesVersionB, 233 ShardCount: 3, 234 ShardsReported: 0, 235 Progress: 0, 236 } 237 expectedRunC := &runs.ReclusteringRun{ 238 Project: "project-c", 239 AttemptTimestamp: expectedRunEndTime, 240 AlgorithmsVersion: algorithms.AlgorithmsVersion, 241 ConfigVersion: config.StartingEpoch, 242 RulesVersion: rules.StartingEpoch, 243 ShardCount: 1, 244 ShardsReported: 0, 245 Progress: 0, 246 } 247 expectedRuns := make(map[string]*runs.ReclusteringRun) 248 expectedRuns["project-a"] = expectedRunA 249 expectedRuns["project-b"] = expectedRunB 250 expectedRuns["project-c"] = expectedRunC 251 252 // updateExpectedTasks sets the Algorithms Version, 253 // Rules Version and Config Version of expected tasks 254 // to match those of the expected runs. 255 updateExpectedTasks := func() { 256 for _, t := range expectedTasks { 257 run := expectedRuns[t.Project] 258 t.AlgorithmsVersion = run.AlgorithmsVersion 259 t.RulesVersion = timestamppb.New(run.RulesVersion) 260 t.ConfigVersion = timestamppb.New(run.ConfigVersion) 261 } 262 } 263 updateExpectedTasks() 264 265 Convey("Disabled orchestrator does nothing", func() { 266 Convey("Workers is zero", func() { 267 cfg.ReclusteringWorkers = 0 268 config.SetTestConfig(ctx, cfg) 269 270 testOrchestratorDoesNothing() 271 }) 272 }) 273 Convey("Schedules successfully without existing runs", func() { 274 err := CronHandler(ctx) 275 So(err, ShouldBeNil) 276 277 actualTasks := tasks(skdr) 278 So(actualTasks, ShouldResembleProto, expectedTasks) 279 280 actualRuns := readRuns(ctx, testProjects) 281 So(actualRuns, ShouldResemble, expectedRuns) 282 283 actualShards, err := shards.ReadAll(span.Single(ctx)) 284 So(err, ShouldBeNil) 285 So(actualShards, ShouldResemble, expectedShards) 286 }) 287 Convey("Schedules successfully with a previous run", func() { 288 previousRunB := &runs.ReclusteringRun{ 289 Project: "project-b", 290 AttemptTimestamp: expectedRunEndTime.Add(-1 * time.Minute), 291 AlgorithmsVersion: 1, 292 ConfigVersion: configVersionB.Add(-1 * time.Hour), 293 RulesVersion: rulesVersionB.Add(-1 * time.Hour), 294 ShardCount: 10, 295 } 296 var previousShards []shards.ReclusteringShard 297 for i := 0; i < 10; i++ { 298 previousShards = append(previousShards, shards.ReclusteringShard{ 299 ShardNumber: int64(50 + i), 300 AttemptTimestamp: expectedRunEndTime.Add(-1 * time.Minute), 301 Project: "project-b", 302 Progress: spanner.NullInt64{Valid: true, Int64: 1000}, 303 }) 304 } 305 306 expectedProgress := 10 * 1000 307 expectedShardsReported := 10 308 test := func() { 309 err = CronHandler(ctx) 310 So(err, ShouldBeNil) 311 312 // Verify that the previous run had its progress set correctly. 313 updatedPreviousRun, err := runs.Read(span.Single(ctx), previousRunB.Project, previousRunB.AttemptTimestamp) 314 So(err, ShouldBeNil) 315 So(updatedPreviousRun.Progress, ShouldEqual, expectedProgress) 316 So(updatedPreviousRun.ShardsReported, ShouldEqual, expectedShardsReported) 317 318 // Verify that correct shards were created and that shards 319 // from previous runs were deleted. 320 actualShards, err := shards.ReadAll(span.Single(ctx)) 321 So(err, ShouldBeNil) 322 So(actualShards, ShouldResemble, expectedShards) 323 324 actualTasks := tasks(skdr) 325 So(actualTasks, ShouldResembleProto, expectedTasks) 326 327 actualRuns := readRuns(ctx, testProjects) 328 So(actualRuns, ShouldResemble, expectedRuns) 329 } 330 331 Convey("existing complete run", func() { 332 err := runs.SetRunsForTesting(ctx, []*runs.ReclusteringRun{previousRunB}) 333 So(err, ShouldBeNil) 334 335 err = shards.SetShardsForTesting(ctx, previousShards) 336 So(err, ShouldBeNil) 337 338 // A run scheduled after an existing complete run should 339 // use the latest algorithms, config and rules available. So 340 // our expectations are unchanged. 341 test() 342 }) 343 Convey("existing incomplete run", func() { 344 for i := range previousShards { 345 previousShards[i].Progress = spanner.NullInt64{Valid: true, Int64: 500} 346 } 347 expectedProgress = 10 * 500 348 expectedShardsReported = 10 349 350 err := runs.SetRunsForTesting(ctx, []*runs.ReclusteringRun{previousRunB}) 351 So(err, ShouldBeNil) 352 353 err = shards.SetShardsForTesting(ctx, previousShards) 354 So(err, ShouldBeNil) 355 356 sds, err := shards.ReadAll(span.Single(ctx)) 357 So(err, ShouldBeNil) 358 So(sds, ShouldResemble, previousShards) 359 360 // Expect the same algorithms and rules version to be used as 361 // the previous run, to ensure forward progress (if new rules 362 // are being constantly created, we don't want to be 363 // reclustering only the beginning of the workers' keyspaces). 364 expectedRunB.AlgorithmsVersion = previousRunB.AlgorithmsVersion 365 expectedRunB.ConfigVersion = previousRunB.ConfigVersion 366 expectedRunB.RulesVersion = previousRunB.RulesVersion 367 updateExpectedTasks() 368 test() 369 }) 370 Convey("existing unreported run", func() { 371 for i := range previousShards { 372 // Assume the shards did not report progress at all. 373 previousShards[i].Progress = spanner.NullInt64{} 374 } 375 expectedProgress = 0 376 expectedShardsReported = 0 377 378 err := runs.SetRunsForTesting(ctx, []*runs.ReclusteringRun{previousRunB}) 379 So(err, ShouldBeNil) 380 381 err = shards.SetShardsForTesting(ctx, previousShards) 382 So(err, ShouldBeNil) 383 384 // Expect the same algorithms and rules version to be used as 385 // the previous run, to ensure forward progress (if new rules 386 // are being constantly created, we don't want to be 387 // reclustering only the beginning of the workers' keyspaces). 388 expectedRunB.AlgorithmsVersion = previousRunB.AlgorithmsVersion 389 expectedRunB.ConfigVersion = previousRunB.ConfigVersion 390 expectedRunB.RulesVersion = previousRunB.RulesVersion 391 updateExpectedTasks() 392 test() 393 }) 394 Convey("existing complete run with later algorithms version", func() { 395 previousRunB.AlgorithmsVersion = algorithms.AlgorithmsVersion + 5 396 397 err := runs.SetRunsForTesting(ctx, []*runs.ReclusteringRun{previousRunB}) 398 So(err, ShouldBeNil) 399 400 err = shards.SetShardsForTesting(ctx, previousShards) 401 So(err, ShouldBeNil) 402 403 // If new algorithms are being rolled out, some GAE instances 404 // may be running old code. This includes the instance that 405 // runs the orchestrator. 406 // To simplify reasoning about re-clustering runs, and ensure 407 // correctness of re-clustering progress logic, we require 408 // the algorithms version of subsequent runs to always be 409 // non-decreasing. 410 expectedRunB.AlgorithmsVersion = previousRunB.AlgorithmsVersion 411 updateExpectedTasks() 412 test() 413 }) 414 Convey("existing complete run with later config version", func() { 415 previousRunB.ConfigVersion = configVersionB.Add(time.Hour) 416 417 err := runs.SetRunsForTesting(ctx, []*runs.ReclusteringRun{previousRunB}) 418 So(err, ShouldBeNil) 419 420 err = shards.SetShardsForTesting(ctx, previousShards) 421 So(err, ShouldBeNil) 422 423 // If new config is being rolled out, some GAE instances 424 // may still have old config cached. This includes the instance 425 // that runs the orchestrator. 426 // To simplify reasoning about re-clustering runs, and ensure 427 // correctness of re-clustering progress logic, we require 428 // the config version of subsequent runs to always be 429 // non-decreasing. 430 expectedRunB.ConfigVersion = previousRunB.ConfigVersion 431 updateExpectedTasks() 432 test() 433 }) 434 }) 435 }) 436 }) 437 } 438 439 func tasks(s *tqtesting.Scheduler) []*taskspb.ReclusterChunks { 440 var tasks []*taskspb.ReclusterChunks 441 for _, pl := range s.Tasks().Payloads() { 442 task := pl.(*taskspb.ReclusterChunks) 443 tasks = append(tasks, task) 444 } 445 sort.Slice(tasks, func(i, j int) bool { 446 return tasks[i].ShardNumber < tasks[j].ShardNumber 447 }) 448 return tasks 449 } 450 451 func readRuns(ctx context.Context, projects []string) map[string]*runs.ReclusteringRun { 452 txn, cancel := span.ReadOnlyTransaction(ctx) 453 defer cancel() 454 455 result := make(map[string]*runs.ReclusteringRun) 456 for _, project := range projects { 457 run, err := runs.ReadLastUpTo(txn, project, runs.MaxAttemptTimestamp) 458 So(err, ShouldBeNil) 459 result[project] = run 460 } 461 return result 462 }