go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/reclustering/worker_test.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package reclustering
    16  
    17  import (
    18  	"context"
    19  	"crypto/sha256"
    20  	"encoding/hex"
    21  	"fmt"
    22  	"sort"
    23  	"strings"
    24  	"testing"
    25  	"time"
    26  
    27  	"cloud.google.com/go/spanner"
    28  	"google.golang.org/protobuf/proto"
    29  	"google.golang.org/protobuf/types/known/durationpb"
    30  	"google.golang.org/protobuf/types/known/timestamppb"
    31  
    32  	"go.chromium.org/luci/common/clock/testclock"
    33  	"go.chromium.org/luci/gae/impl/memory"
    34  	"go.chromium.org/luci/server/caching"
    35  	"go.chromium.org/luci/server/span"
    36  
    37  	"go.chromium.org/luci/analysis/internal/analysis"
    38  	"go.chromium.org/luci/analysis/internal/analysis/clusteredfailures"
    39  	"go.chromium.org/luci/analysis/internal/clustering"
    40  	"go.chromium.org/luci/analysis/internal/clustering/algorithms"
    41  	"go.chromium.org/luci/analysis/internal/clustering/algorithms/failurereason"
    42  	"go.chromium.org/luci/analysis/internal/clustering/algorithms/rulesalgorithm"
    43  	"go.chromium.org/luci/analysis/internal/clustering/algorithms/testname"
    44  	"go.chromium.org/luci/analysis/internal/clustering/chunkstore"
    45  	cpb "go.chromium.org/luci/analysis/internal/clustering/proto"
    46  	"go.chromium.org/luci/analysis/internal/clustering/rules"
    47  	"go.chromium.org/luci/analysis/internal/clustering/rules/cache"
    48  	"go.chromium.org/luci/analysis/internal/clustering/shards"
    49  	"go.chromium.org/luci/analysis/internal/clustering/state"
    50  	"go.chromium.org/luci/analysis/internal/config"
    51  	"go.chromium.org/luci/analysis/internal/config/compiledcfg"
    52  	spanutil "go.chromium.org/luci/analysis/internal/span"
    53  	"go.chromium.org/luci/analysis/internal/tasks/taskspb"
    54  	"go.chromium.org/luci/analysis/internal/testutil"
    55  	"go.chromium.org/luci/analysis/pbutil"
    56  	bqpb "go.chromium.org/luci/analysis/proto/bq"
    57  	configpb "go.chromium.org/luci/analysis/proto/config"
    58  	pb "go.chromium.org/luci/analysis/proto/v1"
    59  
    60  	. "github.com/smartystreets/goconvey/convey"
    61  	. "go.chromium.org/luci/common/testing/assertions"
    62  )
    63  
    64  const testProject = "testproject"
    65  
    66  // scenario represents a LUCI Analysis system state used for testing.
    67  type scenario struct {
    68  	// clusteringState stores the test result-cluster inclusions
    69  	// for each test result in each chunk, and related metadata.
    70  	clusteringState []*state.Entry
    71  	// netBQExports are the test result-cluster insertions recorded
    72  	// in BigQuery, net of any deletions/updates.
    73  	netBQExports []*bqpb.ClusteredFailureRow
    74  	// config is the clustering configuration.
    75  	config *configpb.Clustering
    76  	// configVersion is the last updated time of the configuration.
    77  	configVersion time.Time
    78  	// rulesVersion is version of failure association rules.
    79  	rulesVersion rules.Version
    80  	// rules are the failure association rules.
    81  	rules []*rules.Entry
    82  	// testResults are the actual test failures ingested by LUCI Analysis,
    83  	// organised in chunks by object ID.
    84  	testResultsByObjectID map[string]*cpb.Chunk
    85  	// noProjectConfig set to true to not set up any project configuration.
    86  	noProjectConfig bool
    87  }
    88  
    89  func TestReclustering(t *testing.T) {
    90  	Convey(`With Worker`, t, func() {
    91  		ctx := testutil.IntegrationTestContext(t)
    92  		ctx, tc := testclock.UseTime(ctx, testclock.TestRecentTimeUTC)
    93  		ctx = caching.WithEmptyProcessCache(ctx) // For rules cache.
    94  		ctx = memory.Use(ctx)                    // For project config.
    95  
    96  		chunkStore := chunkstore.NewFakeClient()
    97  		clusteredFailures := clusteredfailures.NewFakeClient()
    98  		analysis := analysis.NewClusteringHandler(clusteredFailures)
    99  
   100  		worker := NewWorker(chunkStore, analysis)
   101  
   102  		runEndTime := tc.Now().Add(time.Minute * 10)
   103  		shard := shards.ReclusteringShard{
   104  			ShardNumber:      5,
   105  			AttemptTimestamp: runEndTime,
   106  			Project:          testProject,
   107  		}
   108  		task := &taskspb.ReclusterChunks{
   109  			ShardNumber:  shard.ShardNumber,
   110  			Project:      testProject,
   111  			AttemptTime:  timestamppb.New(runEndTime),
   112  			StartChunkId: "",
   113  			EndChunkId:   state.EndOfTable,
   114  			State: &taskspb.ReclusterChunkState{
   115  				CurrentChunkId: "",
   116  				NextReportDue:  timestamppb.New(tc.Now()),
   117  			},
   118  			AlgorithmsVersion: algorithms.AlgorithmsVersion,
   119  		}
   120  
   121  		setupScenario := func(s *scenario) {
   122  			task.RulesVersion = timestamppb.New(s.rulesVersion.Predicates)
   123  			task.ConfigVersion = timestamppb.New(s.configVersion)
   124  
   125  			// Create a shard entry corresponding to the task.
   126  			So(shards.SetShardsForTesting(ctx, []shards.ReclusteringShard{shard}), ShouldBeNil)
   127  
   128  			// Set stored failure association rules.
   129  			So(rules.SetForTesting(ctx, s.rules), ShouldBeNil)
   130  
   131  			cfg := map[string]*configpb.ProjectConfig{
   132  				testProject: {
   133  					Clustering:  s.config,
   134  					LastUpdated: timestamppb.New(s.configVersion),
   135  				},
   136  			}
   137  			if s.noProjectConfig {
   138  				cfg = map[string]*configpb.ProjectConfig{}
   139  			}
   140  			So(config.SetTestProjectConfig(ctx, cfg), ShouldBeNil)
   141  
   142  			// Set stored test result chunks.
   143  			for objectID, chunk := range s.testResultsByObjectID {
   144  				chunkStore.Contents[chunkstore.FileName(testProject, objectID)] = chunk
   145  			}
   146  
   147  			// Set stored clustering state.
   148  			commitTime, err := state.CreateEntriesForTesting(ctx, s.clusteringState)
   149  			for _, e := range s.clusteringState {
   150  				e.LastUpdated = commitTime.In(time.UTC)
   151  			}
   152  			So(err, ShouldBeNil)
   153  		}
   154  
   155  		Convey(`Re-clustering`, func() {
   156  			testReclustering := func(initial *scenario, expected *scenario) {
   157  				setupScenario(initial)
   158  
   159  				// Run the task.
   160  				continuation, err := worker.Do(ctx, task, TargetTaskDuration)
   161  				So(err, ShouldBeNil)
   162  				So(continuation, ShouldBeNil)
   163  
   164  				// Final clustering state should be equal expected state.
   165  				actualState, err := state.ReadAllForTesting(ctx, testProject)
   166  				So(err, ShouldBeNil)
   167  				for _, as := range actualState {
   168  					// Clear last updated time to compare actual vs expected
   169  					// state based on row contents, not when the row was updated.
   170  					as.LastUpdated = time.Time{}
   171  				}
   172  				So(actualState, ShouldResemble, expected.clusteringState)
   173  
   174  				// BigQuery exports should correctly reflect the new
   175  				// test result-cluster inclusions.
   176  				exports := clusteredFailures.Insertions
   177  				sortBQExport(exports)
   178  				netExports := flattenBigQueryExports(append(initial.netBQExports, exports...))
   179  				So(netExports, ShouldResembleProto, expected.netBQExports)
   180  
   181  				// Run is reported as complete.
   182  				actualShards, err := shards.ReadAll(span.Single(ctx))
   183  				So(err, ShouldBeNil)
   184  				So(actualShards, ShouldHaveLength, 1)
   185  				So(actualShards[0].Progress, ShouldResemble, spanner.NullInt64{Valid: true, Int64: 1000})
   186  			}
   187  
   188  			Convey("Already up-to-date", func() {
   189  				expected := newScenario().build()
   190  
   191  				// Start with up-to-date clustering.
   192  				s := newScenario().build()
   193  
   194  				testReclustering(s, expected)
   195  
   196  				// Further bound the expected behaviour. Not only
   197  				// should there be zero net changes to the BigQuery
   198  				// export, no changes should be written to BigQuery
   199  				// at all.
   200  				So(clusteredFailures.Insertions, ShouldBeEmpty)
   201  			})
   202  			Convey("From old algorithms", func() {
   203  				expected := newScenario().build()
   204  
   205  				// Start with an out of date clustering.
   206  				s := newScenario().withOldAlgorithms(true).build()
   207  
   208  				testReclustering(s, expected)
   209  			})
   210  			Convey("From old configuration", func() {
   211  				expected := newScenario().build()
   212  
   213  				// Start with clustering based on old configuration.
   214  				s := newScenario().withOldConfig(true).build()
   215  				s.config = expected.config
   216  				s.configVersion = expected.configVersion
   217  
   218  				testReclustering(s, expected)
   219  			})
   220  			Convey("From old rules", func() {
   221  				expected := newScenario().build()
   222  
   223  				// Start with clustering based on old rules.
   224  				s := newScenario().withOldRules(true).build()
   225  				s.rules = expected.rules
   226  				s.rulesVersion = expected.rulesVersion
   227  
   228  				testReclustering(s, expected)
   229  			})
   230  			Convey("From old rules with no project config", func() {
   231  				expected := newScenario().withNoConfig(true).build()
   232  
   233  				// Start with clustering based on old rules.
   234  				s := newScenario().withNoConfig(true).withOldRules(true).build()
   235  				s.rules = expected.rules
   236  				s.rulesVersion = expected.rulesVersion
   237  
   238  				testReclustering(s, expected)
   239  			})
   240  		})
   241  		Convey(`Worker respects end time`, func() {
   242  			expected := newScenario().build()
   243  
   244  			// Start with an out of date clustering.
   245  			s := newScenario().withOldAlgorithms(true).build()
   246  			s.rules = expected.rules
   247  			s.rulesVersion = expected.rulesVersion
   248  			setupScenario(s)
   249  
   250  			// Start the worker after the run end time.
   251  			tc.Add(11 * time.Minute)
   252  			So(tc.Now(), ShouldHappenAfter, task.AttemptTime.AsTime())
   253  
   254  			// Run the task.
   255  			continuation, err := worker.Do(ctx, task, TargetTaskDuration)
   256  			So(err, ShouldBeNil)
   257  			So(continuation, ShouldBeNil)
   258  
   259  			// Clustering state should be same as the initial state.
   260  			actualState, err := state.ReadAllForTesting(ctx, testProject)
   261  			So(err, ShouldBeNil)
   262  			So(actualState, ShouldResemble, s.clusteringState)
   263  
   264  			// No changes written to BigQuery.
   265  			So(clusteredFailures.Insertions, ShouldBeEmpty)
   266  
   267  			// No progress is reported.
   268  			actualShards, err := shards.ReadAll(span.Single(ctx))
   269  			So(err, ShouldBeNil)
   270  			So(actualShards, ShouldHaveLength, 1)
   271  			So(actualShards[0].Progress, ShouldResemble, spanner.NullInt64{Valid: false, Int64: 0})
   272  		})
   273  		Convey(`Handles update/update races`, func() {
   274  			finalState := newScenario().build()
   275  
   276  			// Start with an out of date clustering.
   277  			s := newScenario().withOldAlgorithms(true).build()
   278  			s.rules = finalState.rules
   279  			s.rulesVersion = finalState.rulesVersion
   280  			setupScenario(s)
   281  
   282  			// Make reading a chunk's test results trigger updating
   283  			// its clustering state Spanner, to simulate an update/update race.
   284  			chunkIDByObjectID := make(map[string]string)
   285  			for _, state := range s.clusteringState {
   286  				chunkIDByObjectID[state.ObjectID] = state.ChunkID
   287  			}
   288  			chunkStore.GetCallack = func(objectID string) {
   289  				chunkID, ok := chunkIDByObjectID[objectID]
   290  
   291  				// Only simulate the update/update race once per chunk.
   292  				if !ok {
   293  					return
   294  				}
   295  				delete(chunkIDByObjectID, objectID)
   296  
   297  				_, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error {
   298  					span.BufferWrite(ctx, spanutil.UpdateMap("ClusteringState", map[string]any{
   299  						"Project": testProject,
   300  						"ChunkID": chunkID,
   301  						// Simulate a race with another update, that
   302  						// re-clustered the chunk to an algorithms version
   303  						// later than the one we know about.
   304  						"AlgorithmsVersion": algorithms.AlgorithmsVersion + 1,
   305  						"LastUpdated":       spanner.CommitTimestamp,
   306  					}))
   307  					return nil
   308  				})
   309  				So(err, ShouldBeNil)
   310  			}
   311  
   312  			// Run the worker with time advancing at 100 times speed,
   313  			// as the transaction retry logic sets timers which must be
   314  			// triggered.
   315  			runWithTimeAdvancing(tc, func() {
   316  				continuation, err := worker.Do(ctx, task, TargetTaskDuration)
   317  				So(err, ShouldBeNil)
   318  				So(continuation, ShouldBeNil)
   319  			})
   320  
   321  			// Because of update races, none of the chunks should have been
   322  			// re-clustered further.
   323  			expected := newScenario().withOldAlgorithms(true).build()
   324  			for _, es := range expected.clusteringState {
   325  				es.Clustering.AlgorithmsVersion = algorithms.AlgorithmsVersion + 1
   326  			}
   327  
   328  			actualState, err := state.ReadAllForTesting(ctx, testProject)
   329  			So(err, ShouldBeNil)
   330  			for _, as := range actualState {
   331  				as.LastUpdated = time.Time{}
   332  			}
   333  			So(actualState, ShouldResemble, expected.clusteringState)
   334  
   335  			// No changes written to BigQuery.
   336  			So(clusteredFailures.Insertions, ShouldBeEmpty)
   337  
   338  			// Shard is reported as complete.
   339  			actualShards, err := shards.ReadAll(span.Single(ctx))
   340  			So(err, ShouldBeNil)
   341  			So(actualShards, ShouldHaveLength, 1)
   342  			So(actualShards[0].Progress, ShouldResemble, spanner.NullInt64{Valid: true, Int64: 1000})
   343  		})
   344  		Convey(`Worker running out of date algorithms`, func() {
   345  			task.AlgorithmsVersion = algorithms.AlgorithmsVersion + 1
   346  			task.ConfigVersion = timestamppb.New(config.StartingEpoch)
   347  			task.RulesVersion = timestamppb.New(rules.StartingEpoch)
   348  
   349  			continuation, err := worker.Do(ctx, task, TargetTaskDuration)
   350  			So(err, ShouldErrLike, "running out-of-date algorithms version")
   351  			So(continuation, ShouldBeNil)
   352  		})
   353  		Convey(`Continuation correctly scheduled`, func() {
   354  			task.RulesVersion = timestamppb.New(rules.StartingEpoch)
   355  			task.ConfigVersion = timestamppb.New(config.StartingEpoch)
   356  
   357  			// Leave no time for the task to run.
   358  			continuation, err := worker.Do(ctx, task, 0*time.Second)
   359  			So(err, ShouldBeNil)
   360  
   361  			// Continuation should be scheduled, matching original task.
   362  			So(continuation, ShouldResembleProto, task)
   363  		})
   364  	})
   365  }
   366  
   367  func TestProgress(t *testing.T) {
   368  	Convey(`Task assigned entire keyspace`, t, func() {
   369  		task := &taskspb.ReclusterChunks{
   370  			StartChunkId: "",
   371  			EndChunkId:   strings.Repeat("ff", 16),
   372  		}
   373  
   374  		progress, err := calculateProgress(task, strings.Repeat("00", 16))
   375  		So(err, ShouldBeNil)
   376  		So(progress, ShouldEqual, 0)
   377  
   378  		progress, err = calculateProgress(task, "80"+strings.Repeat("00", 15))
   379  		So(err, ShouldBeNil)
   380  		So(progress, ShouldEqual, 500)
   381  
   382  		progress, err = calculateProgress(task, strings.Repeat("ff", 16))
   383  		So(err, ShouldBeNil)
   384  		So(progress, ShouldEqual, 999)
   385  	})
   386  	Convey(`Task assigned partial keyspace`, t, func() {
   387  		// Consistent with the second shard, if the keyspace is split into
   388  		// three.
   389  		task := &taskspb.ReclusterChunks{
   390  			StartChunkId: strings.Repeat("55", 15) + "54",
   391  			EndChunkId:   strings.Repeat("aa", 15) + "a9",
   392  		}
   393  
   394  		progress, err := calculateProgress(task, strings.Repeat("55", 16))
   395  		So(err, ShouldBeNil)
   396  		So(progress, ShouldEqual, 0)
   397  
   398  		progress, err = calculateProgress(task, strings.Repeat("77", 16))
   399  		So(err, ShouldBeNil)
   400  		So(progress, ShouldEqual, 400)
   401  
   402  		progress, err = calculateProgress(task, strings.Repeat("aa", 15)+"a9")
   403  		So(err, ShouldBeNil)
   404  		So(progress, ShouldEqual, 999)
   405  	})
   406  }
   407  
   408  func runWithTimeAdvancing(tc testclock.TestClock, cb func()) {
   409  	ticker := time.NewTicker(time.Millisecond)
   410  	done := make(chan bool)
   411  	go func() {
   412  		for {
   413  			select {
   414  			case <-done:
   415  				return
   416  			case <-ticker.C:
   417  				// Run with time advancing at 100 times speed, to
   418  				// avoid holding up tests unnecessarily.
   419  				tc.Add(time.Millisecond * 100)
   420  			}
   421  		}
   422  	}()
   423  
   424  	cb()
   425  
   426  	ticker.Stop()
   427  	done <- true
   428  }
   429  
   430  // flattenBigQueryExports returns the latest inclusion row for
   431  // each test result-cluster, from a list of BigQuery exports.
   432  // The returned set of rows do not have last updated time set.
   433  func flattenBigQueryExports(exports []*bqpb.ClusteredFailureRow) []*bqpb.ClusteredFailureRow {
   434  	keyValue := make(map[string]*bqpb.ClusteredFailureRow)
   435  	for _, row := range exports {
   436  		key := bigQueryKey(row)
   437  		existingRow, ok := keyValue[key]
   438  		if ok && existingRow.LastUpdated.AsTime().After(row.LastUpdated.AsTime()) {
   439  			continue
   440  		}
   441  		keyValue[key] = row
   442  	}
   443  	var result []*bqpb.ClusteredFailureRow
   444  	for _, row := range keyValue {
   445  		if row.IsIncluded {
   446  			clonedRow := proto.Clone(row).(*bqpb.ClusteredFailureRow)
   447  			clonedRow.LastUpdated = nil
   448  			result = append(result, clonedRow)
   449  		}
   450  	}
   451  	sortBQExport(result)
   452  	return result
   453  }
   454  
   455  func bigQueryKey(row *bqpb.ClusteredFailureRow) string {
   456  	return fmt.Sprintf("%q/%q/%q/%q", row.ClusterAlgorithm, row.ClusterId, row.TestResultSystem, row.TestResultId)
   457  }
   458  
   459  type testResultBuilder struct {
   460  	uniqifier     int
   461  	failureReason *pb.FailureReason
   462  	testName      string
   463  }
   464  
   465  func newTestResult(uniqifier int) *testResultBuilder {
   466  	return &testResultBuilder{
   467  		uniqifier: uniqifier,
   468  		testName:  fmt.Sprintf("ninja://test_name/%v", uniqifier),
   469  		failureReason: &pb.FailureReason{
   470  			PrimaryErrorMessage: fmt.Sprintf("Failure reason %v.", uniqifier),
   471  		},
   472  	}
   473  }
   474  
   475  func (b *testResultBuilder) withTestName(name string) *testResultBuilder {
   476  	b.testName = name
   477  	return b
   478  }
   479  
   480  func (b *testResultBuilder) withFailureReason(reason *pb.FailureReason) *testResultBuilder {
   481  	b.failureReason = reason
   482  	return b
   483  }
   484  
   485  func (b *testResultBuilder) buildFailure() *cpb.Failure {
   486  	keyHash := sha256.Sum256([]byte("variantkey:value\n"))
   487  	buildCritical := b.uniqifier%2 == 0
   488  	return &cpb.Failure{
   489  		TestResultId:  pbutil.TestResultIDFromResultDB(fmt.Sprintf("invocations/testrun-%v/tests/test-name-%v/results/%v", b.uniqifier, b.uniqifier, b.uniqifier)),
   490  		PartitionTime: timestamppb.New(time.Date(2020, time.April, 1, 2, 3, 4, 0, time.UTC)),
   491  		ChunkIndex:    -1, // To be populated by caller.
   492  		Realm:         "testproject:realm",
   493  		TestId:        b.testName,
   494  		Variant:       &pb.Variant{Def: map[string]string{"variantkey": "value"}},
   495  		VariantHash:   hex.EncodeToString(keyHash[:]),
   496  		FailureReason: b.failureReason,
   497  		BugTrackingComponent: &pb.BugTrackingComponent{
   498  			System:    "monorail",
   499  			Component: "Component>MyComponent",
   500  		},
   501  		StartTime: timestamppb.New(time.Date(2025, time.March, 2, 2, 2, 2, b.uniqifier, time.UTC)),
   502  		Duration:  durationpb.New(time.Duration(b.uniqifier) * time.Second),
   503  		Exonerations: []*cpb.TestExoneration{
   504  			{
   505  				Reason: pb.ExonerationReason(1 + (b.uniqifier % 3)),
   506  			},
   507  		},
   508  		PresubmitRun: &cpb.PresubmitRun{
   509  			PresubmitRunId: &pb.PresubmitRunId{
   510  				System: "luci-cv",
   511  				Id:     fmt.Sprintf("run-%v", b.uniqifier),
   512  			},
   513  			Owner:  fmt.Sprintf("owner-%v", b.uniqifier),
   514  			Mode:   pb.PresubmitRunMode(1 + b.uniqifier%3),
   515  			Status: pb.PresubmitRunStatus(3 - b.uniqifier%3),
   516  		},
   517  		BuildStatus:   pb.BuildStatus(1 + b.uniqifier%4),
   518  		BuildCritical: &buildCritical,
   519  
   520  		IngestedInvocationId:          fmt.Sprintf("invocation-%v", b.uniqifier),
   521  		IngestedInvocationResultIndex: int64(b.uniqifier + 1),
   522  		IngestedInvocationResultCount: int64(b.uniqifier*2 + 1),
   523  		IsIngestedInvocationBlocked:   b.uniqifier%3 == 0,
   524  
   525  		TestRunId:          fmt.Sprintf("test-run-%v", b.uniqifier),
   526  		TestRunResultIndex: int64((int64(b.uniqifier) / 2) + 1),
   527  		TestRunResultCount: int64(b.uniqifier + 1),
   528  		IsTestRunBlocked:   b.uniqifier%2 == 0,
   529  	}
   530  }
   531  
   532  // buildBQExport returns the expected test result-cluster inclusion rows that
   533  // would appear in BigQuery, if the test result was in the given clusters.
   534  // Note that deletions are not returned; these are simply the 'net' rows that
   535  // would be expected.
   536  func (b *testResultBuilder) buildBQExport(clusterIDs []clustering.ClusterID) []*bqpb.ClusteredFailureRow {
   537  	keyHash := sha256.Sum256([]byte("variantkey:value\n"))
   538  	var inBugCluster bool
   539  	for _, cID := range clusterIDs {
   540  		if cID.IsBugCluster() {
   541  			inBugCluster = true
   542  		}
   543  	}
   544  
   545  	presubmitRunStatus := pb.PresubmitRunStatus(3 - b.uniqifier%3).String()
   546  	if !strings.HasPrefix(presubmitRunStatus, "PRESUBMIT_RUN_STATUS_") {
   547  		panic("PresubmitRunStatus does not have expected prefix: " + presubmitRunStatus)
   548  	}
   549  	presubmitRunStatus = strings.TrimPrefix(presubmitRunStatus, "PRESUBMIT_RUN_STATUS_")
   550  
   551  	var results []*bqpb.ClusteredFailureRow
   552  	for _, cID := range clusterIDs {
   553  		result := &bqpb.ClusteredFailureRow{
   554  			ClusterAlgorithm: cID.Algorithm,
   555  			ClusterId:        cID.ID,
   556  			TestResultSystem: "resultdb",
   557  			TestResultId:     fmt.Sprintf("invocations/testrun-%v/tests/test-name-%v/results/%v", b.uniqifier, b.uniqifier, b.uniqifier),
   558  			LastUpdated:      nil, // To be set by caller.
   559  			Project:          testProject,
   560  
   561  			PartitionTime:              timestamppb.New(time.Date(2020, time.April, 1, 2, 3, 4, 0, time.UTC)),
   562  			IsIncluded:                 true,
   563  			IsIncludedWithHighPriority: cID.IsBugCluster() || !inBugCluster,
   564  
   565  			ChunkId:    "", // To be set by caller.
   566  			ChunkIndex: 0,  // To be set by caller.
   567  
   568  			Realm:  "testproject:realm",
   569  			TestId: b.testName,
   570  			Variant: []*pb.StringPair{
   571  				{
   572  					Key:   "variantkey",
   573  					Value: "value",
   574  				},
   575  			},
   576  			VariantHash:   hex.EncodeToString(keyHash[:]),
   577  			FailureReason: b.failureReason,
   578  			BugTrackingComponent: &pb.BugTrackingComponent{
   579  				System:    "monorail",
   580  				Component: "Component>MyComponent",
   581  			},
   582  			StartTime: timestamppb.New(time.Date(2025, time.March, 2, 2, 2, 2, b.uniqifier, time.UTC)),
   583  			Duration:  float64(b.uniqifier * 1.0),
   584  			Exonerations: []*bqpb.ClusteredFailureRow_TestExoneration{
   585  				{
   586  					Reason: pb.ExonerationReason(1 + (b.uniqifier % 3)),
   587  				},
   588  			},
   589  			PresubmitRunId: &pb.PresubmitRunId{
   590  				System: "luci-cv",
   591  				Id:     fmt.Sprintf("run-%v", b.uniqifier),
   592  			},
   593  			PresubmitRunOwner:  fmt.Sprintf("owner-%v", b.uniqifier),
   594  			PresubmitRunMode:   pb.PresubmitRunMode(1 + b.uniqifier%3).String(),
   595  			PresubmitRunStatus: presubmitRunStatus,
   596  			BuildStatus:        strings.TrimPrefix(pb.BuildStatus(1+b.uniqifier%4).String(), "BUILD_STATUS_"),
   597  			BuildCritical:      b.uniqifier%2 == 0,
   598  
   599  			IngestedInvocationId:          fmt.Sprintf("invocation-%v", b.uniqifier),
   600  			IngestedInvocationResultIndex: int64(b.uniqifier + 1),
   601  			IngestedInvocationResultCount: int64(b.uniqifier*2 + 1),
   602  			IsIngestedInvocationBlocked:   b.uniqifier%3 == 0,
   603  
   604  			TestRunId:          fmt.Sprintf("test-run-%v", b.uniqifier),
   605  			TestRunResultIndex: int64((int64(b.uniqifier) / 2) + 1),
   606  			TestRunResultCount: int64(b.uniqifier + 1),
   607  			IsTestRunBlocked:   b.uniqifier%2 == 0,
   608  		}
   609  		results = append(results, result)
   610  	}
   611  	return results
   612  }
   613  
   614  // buildClusters returns the clusters that would be expected for this test
   615  // result, if current clustering algorithms were used.
   616  func (b *testResultBuilder) buildClusters(rules *cache.Ruleset, config *compiledcfg.ProjectConfig) []clustering.ClusterID {
   617  	var clusters []clustering.ClusterID
   618  	failure := &clustering.Failure{
   619  		TestID: b.testName,
   620  		Reason: b.failureReason,
   621  	}
   622  	testNameAlg := &testname.Algorithm{}
   623  	clusters = append(clusters, clustering.ClusterID{
   624  		Algorithm: testNameAlg.Name(),
   625  		ID:        hex.EncodeToString(testNameAlg.Cluster(config, failure)),
   626  	})
   627  	if b.failureReason != nil && b.failureReason.PrimaryErrorMessage != "" {
   628  		failureReasonAlg := &failurereason.Algorithm{}
   629  		clusters = append(clusters, clustering.ClusterID{
   630  			Algorithm: failureReasonAlg.Name(),
   631  			ID:        hex.EncodeToString(failureReasonAlg.Cluster(config, failure)),
   632  		})
   633  	}
   634  	vals := &clustering.Failure{
   635  		TestID: b.testName,
   636  		Reason: &pb.FailureReason{PrimaryErrorMessage: b.failureReason.GetPrimaryErrorMessage()},
   637  	}
   638  	for _, rule := range rules.ActiveRulesSorted {
   639  		if rule.Expr.Evaluate(vals) {
   640  			clusters = append(clusters, clustering.ClusterID{
   641  				Algorithm: rulesalgorithm.AlgorithmName,
   642  				ID:        rule.Rule.RuleID,
   643  			})
   644  		}
   645  	}
   646  	clustering.SortClusters(clusters)
   647  	return clusters
   648  }
   649  
   650  // chunkBuilder is used to build a chunk with test results, clustering state
   651  // and BigQuery exports, for testing.
   652  type chunkBuilder struct {
   653  	project       string
   654  	chunkID       string
   655  	objectID      string
   656  	testResults   []*testResultBuilder
   657  	ruleset       *cache.Ruleset
   658  	config        *compiledcfg.ProjectConfig
   659  	oldAlgorithms bool
   660  }
   661  
   662  // newChunk returns a new chunkBuilder for creating a new chunk. Uniqifier
   663  // is used to generate a chunk ID.
   664  func newChunk(uniqifier int) *chunkBuilder {
   665  	chunkID := sha256.Sum256([]byte(fmt.Sprintf("chunk-%v", uniqifier)))
   666  	objectID := sha256.Sum256([]byte(fmt.Sprintf("object-%v", uniqifier)))
   667  	config, err := compiledcfg.NewConfig(&configpb.ProjectConfig{
   668  		LastUpdated: timestamppb.New(time.Date(2022, time.January, 1, 0, 0, 0, 0, time.UTC)),
   669  	})
   670  	if err != nil {
   671  		// This should never occur, as the config should be valid.
   672  		panic(err)
   673  	}
   674  	return &chunkBuilder{
   675  		project:       "testproject",
   676  		chunkID:       hex.EncodeToString(chunkID[:16]),
   677  		objectID:      hex.EncodeToString(objectID[:16]),
   678  		ruleset:       cache.NewRuleset("", nil, rules.StartingVersion, time.Time{}),
   679  		config:        config,
   680  		oldAlgorithms: false,
   681  	}
   682  }
   683  
   684  func (b *chunkBuilder) withProject(project string) *chunkBuilder {
   685  	b.project = project
   686  	return b
   687  }
   688  
   689  func (b *chunkBuilder) withTestResults(tr ...*testResultBuilder) *chunkBuilder {
   690  	b.testResults = tr
   691  	return b
   692  }
   693  
   694  // withOldAlgorithms sets whether out of date algorithms
   695  // should be used instead of current clustering.
   696  func (b *chunkBuilder) withOldAlgorithms(old bool) *chunkBuilder {
   697  	b.oldAlgorithms = old
   698  	return b
   699  }
   700  
   701  // withRuleset sets the ruleset to use to determine current clustering
   702  // (only used if out-of-date algorithms is not set).
   703  func (b *chunkBuilder) withRuleset(ruleset *cache.Ruleset) *chunkBuilder {
   704  	b.ruleset = ruleset
   705  	return b
   706  }
   707  
   708  // withConfig sets the configuration to use to determine current clustering
   709  // (only used if out-of-date algorithms is not set).
   710  func (b *chunkBuilder) withConfig(config *compiledcfg.ProjectConfig) *chunkBuilder {
   711  	b.config = config
   712  	return b
   713  }
   714  
   715  func (b *chunkBuilder) buildTestResults() (chunk *cpb.Chunk) {
   716  	var failures []*cpb.Failure
   717  	for i, tr := range b.testResults {
   718  		failure := tr.buildFailure()
   719  		failure.ChunkIndex = int64(i + 1)
   720  		failures = append(failures, failure)
   721  	}
   722  	return &cpb.Chunk{
   723  		Failures: failures,
   724  	}
   725  }
   726  
   727  func (b *chunkBuilder) buildState() *state.Entry {
   728  	var crs clustering.ClusterResults
   729  	if b.oldAlgorithms {
   730  		algs := make(map[string]struct{})
   731  		algs["testname-v1"] = struct{}{}
   732  		algs["rules-v1"] = struct{}{}
   733  		var clusters [][]clustering.ClusterID
   734  		for range b.testResults {
   735  			cs := []clustering.ClusterID{
   736  				{
   737  					Algorithm: "testname-v1",
   738  					ID:        "01dc151e01dc151e01dc151e01dc151e",
   739  				},
   740  				{
   741  					Algorithm: "rules-v1",
   742  					ID:        "12341234123412341234123412341234",
   743  				},
   744  			}
   745  			clustering.SortClusters(cs)
   746  			clusters = append(clusters, cs)
   747  		}
   748  		crs = clustering.ClusterResults{
   749  			AlgorithmsVersion: 1,
   750  			ConfigVersion:     b.config.LastUpdated,
   751  			RulesVersion:      b.ruleset.Version.Predicates,
   752  			Algorithms:        algs,
   753  			Clusters:          clusters,
   754  		}
   755  	} else {
   756  		algs := make(map[string]struct{})
   757  		algs[testname.AlgorithmName] = struct{}{}
   758  		algs[failurereason.AlgorithmName] = struct{}{}
   759  		algs[rulesalgorithm.AlgorithmName] = struct{}{}
   760  		var clusters [][]clustering.ClusterID
   761  		for _, tr := range b.testResults {
   762  			clusters = append(clusters, tr.buildClusters(b.ruleset, b.config))
   763  		}
   764  		crs = clustering.ClusterResults{
   765  			AlgorithmsVersion: algorithms.AlgorithmsVersion,
   766  			ConfigVersion:     b.config.LastUpdated,
   767  			RulesVersion:      b.ruleset.Version.Predicates,
   768  			Algorithms:        algs,
   769  			Clusters:          clusters,
   770  		}
   771  	}
   772  
   773  	return &state.Entry{
   774  		Project:       b.project,
   775  		ChunkID:       b.chunkID,
   776  		PartitionTime: time.Date(2020, time.April, 1, 2, 3, 4, 0, time.UTC),
   777  		ObjectID:      b.objectID,
   778  		Clustering:    crs,
   779  	}
   780  }
   781  
   782  func (b *chunkBuilder) buildBQExport() []*bqpb.ClusteredFailureRow {
   783  	state := b.buildState()
   784  	var result []*bqpb.ClusteredFailureRow
   785  	for i, tr := range b.testResults {
   786  		cIDs := state.Clustering.Clusters[i]
   787  		rows := tr.buildBQExport(cIDs)
   788  		for _, r := range rows {
   789  			r.ChunkId = b.chunkID
   790  			r.ChunkIndex = int64(i + 1)
   791  		}
   792  		result = append(result, rows...)
   793  	}
   794  	return result
   795  }
   796  
   797  // scenarioBuilder is used to generate LUCI Analysis system states used for
   798  // testing. Each scenario represents a consistent state of the LUCI Analysis
   799  // system, i.e.
   800  //   - where the clustering state matches the configured rules, and
   801  //   - the BigQuery exports match the clustering state, and the test results
   802  //     in the chunk store.
   803  type scenarioBuilder struct {
   804  	project       string
   805  	chunkCount    int
   806  	oldAlgorithms bool
   807  	oldRules      bool
   808  	oldConfig     bool
   809  	noConfig      bool
   810  }
   811  
   812  func newScenario() *scenarioBuilder {
   813  	return &scenarioBuilder{
   814  		project:    testProject,
   815  		chunkCount: 2,
   816  	}
   817  }
   818  
   819  func (b *scenarioBuilder) withOldAlgorithms(value bool) *scenarioBuilder {
   820  	b.oldAlgorithms = value
   821  	return b
   822  }
   823  
   824  func (b *scenarioBuilder) withOldRules(value bool) *scenarioBuilder {
   825  	b.oldRules = value
   826  	return b
   827  }
   828  
   829  func (b *scenarioBuilder) withOldConfig(value bool) *scenarioBuilder {
   830  	b.oldConfig = value
   831  	return b
   832  }
   833  
   834  func (b *scenarioBuilder) withNoConfig(value bool) *scenarioBuilder {
   835  	b.noConfig = value
   836  	return b
   837  }
   838  
   839  func (b *scenarioBuilder) build() *scenario {
   840  	var rs []*rules.Entry
   841  	var activeRules []*cache.CachedRule
   842  
   843  	rulesVersion := rules.Version{
   844  		Predicates: time.Date(2001, time.January, 1, 0, 0, 0, 1000, time.UTC),
   845  		Total:      time.Date(2001, time.January, 1, 0, 0, 0, 2000, time.UTC),
   846  	}
   847  	ruleOne := rules.NewRule(0).WithProject(b.project).
   848  		WithRuleDefinition(`test = "test_b"`).
   849  		WithPredicateLastUpdateTime(rulesVersion.Predicates).
   850  		WithLastUpdateTime(rulesVersion.Total).
   851  		Build()
   852  	rs = []*rules.Entry{ruleOne}
   853  	if !b.oldRules {
   854  		rulesVersion = rules.Version{
   855  			Predicates: time.Date(2002, time.January, 1, 0, 0, 0, 1000, time.UTC),
   856  			Total:      time.Date(2002, time.January, 1, 0, 0, 0, 2000, time.UTC),
   857  		}
   858  		ruleTwo := rules.NewRule(1).WithProject(b.project).
   859  			WithRuleDefinition(`reason = "reason_b"`).
   860  			WithPredicateLastUpdateTime(rulesVersion.Predicates).
   861  			WithLastUpdateTime(rulesVersion.Total).
   862  			Build()
   863  		rs = append(rs, ruleTwo)
   864  	}
   865  	for _, r := range rs {
   866  		active, err := cache.NewCachedRule(r)
   867  		So(err, ShouldBeNil)
   868  		activeRules = append(activeRules, active)
   869  	}
   870  
   871  	configVersion := time.Date(2001, time.January, 2, 0, 0, 0, 1, time.UTC)
   872  	cfgpb := &configpb.Clustering{
   873  		TestNameRules: []*configpb.TestNameClusteringRule{
   874  			{
   875  				Name:         "Test underscore clustering",
   876  				Pattern:      `^(?P<name>\w+)_\w+$`,
   877  				LikeTemplate: `${name}%`,
   878  			},
   879  		},
   880  	}
   881  	if !b.oldConfig {
   882  		configVersion = time.Date(2002, time.January, 2, 0, 0, 0, 1, time.UTC)
   883  		cfgpb = &configpb.Clustering{
   884  			TestNameRules: []*configpb.TestNameClusteringRule{
   885  				{
   886  					Name:         "Test underscore clustering",
   887  					Pattern:      `^(?P<name>\w+)_\w+$`,
   888  					LikeTemplate: `${name}\_%`,
   889  				},
   890  			},
   891  		}
   892  	}
   893  
   894  	ruleset := cache.NewRuleset(b.project, activeRules, rulesVersion, time.Time{})
   895  	projectCfg := &configpb.ProjectConfig{
   896  		Clustering:  cfgpb,
   897  		LastUpdated: timestamppb.New(configVersion),
   898  	}
   899  	if b.noConfig {
   900  		projectCfg = config.NewEmptyProject()
   901  		configVersion = projectCfg.LastUpdated.AsTime()
   902  	}
   903  	cfg, err := compiledcfg.NewConfig(projectCfg)
   904  	if err != nil {
   905  		// Should never occur as config should be valid.
   906  		panic(err)
   907  	}
   908  	var state []*state.Entry
   909  	testResultsByObjectID := make(map[string]*cpb.Chunk)
   910  	var bqExports []*bqpb.ClusteredFailureRow
   911  	for i := 0; i < b.chunkCount; i++ {
   912  		trOne := newTestResult(i * 2).withFailureReason(&pb.FailureReason{
   913  			PrimaryErrorMessage: "reason_a",
   914  		}).withTestName("test_a")
   915  		trTwo := newTestResult(i*2 + 1).withFailureReason(&pb.FailureReason{
   916  			PrimaryErrorMessage: "reason_b",
   917  		}).withTestName("test_b")
   918  
   919  		cb := newChunk(i).withProject(b.project).
   920  			withOldAlgorithms(b.oldAlgorithms).
   921  			withRuleset(ruleset).
   922  			withConfig(cfg).
   923  			withTestResults(trOne, trTwo)
   924  
   925  		s := cb.buildState()
   926  		state = append(state, s)
   927  		bqExports = append(bqExports, cb.buildBQExport()...)
   928  		testResultsByObjectID[s.ObjectID] = cb.buildTestResults()
   929  	}
   930  	sortState(state)
   931  	sortBQExport(bqExports)
   932  	return &scenario{
   933  		config:                cfgpb,
   934  		configVersion:         configVersion,
   935  		rulesVersion:          rulesVersion,
   936  		rules:                 rs,
   937  		testResultsByObjectID: testResultsByObjectID,
   938  		clusteringState:       state,
   939  		netBQExports:          bqExports,
   940  		noProjectConfig:       b.noConfig,
   941  	}
   942  }
   943  
   944  // sortState sorts state.Entry elements in ascending ChunkID order.
   945  func sortState(state []*state.Entry) {
   946  	sort.Slice(state, func(i, j int) bool {
   947  		return state[i].ChunkID < state[j].ChunkID
   948  	})
   949  }
   950  
   951  // sortBQExport sorts BigQuery export rows in ascending key order.
   952  func sortBQExport(rows []*bqpb.ClusteredFailureRow) {
   953  	sort.Slice(rows, func(i, j int) bool {
   954  		return bigQueryKey(rows[i]) < bigQueryKey(rows[j])
   955  	})
   956  }