go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/reclustering/update.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/reclustering/update.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package reclustering
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"time"
    21  
    22  	"go.opentelemetry.io/otel/attribute"
    23  
    24  	"go.chromium.org/luci/common/errors"
    25  	"go.chromium.org/luci/server/caching"
    26  
    27  	"go.chromium.org/luci/analysis/internal/clustering"
    28  	"go.chromium.org/luci/analysis/internal/clustering/algorithms"
    29  	cpb "go.chromium.org/luci/analysis/internal/clustering/proto"
    30  	"go.chromium.org/luci/analysis/internal/clustering/rules/cache"
    31  	"go.chromium.org/luci/analysis/internal/clustering/state"
    32  	"go.chromium.org/luci/analysis/internal/config/compiledcfg"
    33  	"go.chromium.org/luci/analysis/internal/tracing"
    34  )
    35  
    36  // TODO(crbug.com/1243174). Instrument the size of this cache so that we
    37  // can monitor it.
    38  var rulesCache = cache.NewRulesCache(caching.RegisterLRUCache[string, *cache.Ruleset](0))
    39  
    40  // Ruleset returns the cached ruleset for the given project. If a minimum
    41  // version of rule predicates is required, pass it as minimumPredicatesVersion.
    42  // If a strong read is required, pass cache.StrongRead.
    43  // Otherwise, pass rules.StartingEpoch.
    44  func Ruleset(ctx context.Context, project string, minimumPredicatesVersion time.Time) (*cache.Ruleset, error) {
    45  	ruleset, err := rulesCache.Ruleset(ctx, project, minimumPredicatesVersion)
    46  	if err != nil {
    47  		return nil, err
    48  	}
    49  	return ruleset, nil
    50  }
    51  
    52  // Analysis is the interface for cluster analysis.
    53  type Analysis interface {
    54  	// HandleUpdatedClusters handles (re-)clustered test results. It is called
    55  	// after the spanner transaction effecting the (re-)clustering has
    56  	// committed. commitTime is the Spanner time the transaction committed.
    57  	HandleUpdatedClusters(ctx context.Context, updates *clustering.Update, commitTime time.Time) error
    58  }
    59  
    60  // PendingUpdate is a (re-)clustering of a chunk of test results
    61  // that has not been applied to Spanner and/or sent for re-analysis
    62  // yet.
    63  type PendingUpdate struct {
    64  	// Chunk is the identity of the chunk which will be updated.
    65  	Chunk         state.ChunkKey
    66  	existingState *state.Entry
    67  	newClustering clustering.ClusterResults
    68  	updates       []*clustering.FailureUpdate
    69  }
    70  
    71  // PrepareUpdate will (re-)cluster the specific chunk of test results,
    72  // preparing an updated state for Spanner and updates to be exported
    73  // to analysis. The caller can determine how to batch these updates/
    74  // exports together, with help of the Size() method on the returned
    75  // pending update.
    76  //
    77  // If the chunk does not exist in Spanner, pass a *state.Entry
    78  // with project, chunkID, objectID and partitionTime set
    79  // but with LastUpdated set to its zero value. The chunk will be
    80  // clustered for the first time and saved to Spanner.
    81  //
    82  // If the chunk does exist in Spanner, pass the state.Entry read
    83  // from Spanner, along with the test results. The chunk will be
    84  // re-clustered and updated.
    85  func PrepareUpdate(ctx context.Context, ruleset *cache.Ruleset, config *compiledcfg.ProjectConfig, chunk *cpb.Chunk, existingState *state.Entry) (upd *PendingUpdate, err error) {
    86  	_, s := tracing.Start(ctx, "go.chromium.org/luci/analysis/internal/clustering/reclustering.PrepareUpdate",
    87  		attribute.String("project", existingState.Project),
    88  		attribute.String("chunkID", existingState.ChunkID),
    89  	)
    90  	defer func() { tracing.End(s, err) }()
    91  
    92  	exists := !existingState.LastUpdated.IsZero()
    93  	var existingClustering clustering.ClusterResults
    94  	if !exists {
    95  		existingClustering = algorithms.NewEmptyClusterResults(len(chunk.Failures))
    96  	} else {
    97  		if len(existingState.Clustering.Clusters) != len(chunk.Failures) {
    98  			return nil, fmt.Errorf("existing clustering does not match chunk; got clusters for %v test results, want %v", len(existingClustering.Clusters), len(chunk.Failures))
    99  		}
   100  		existingClustering = existingState.Clustering
   101  	}
   102  
   103  	newClustering := algorithms.Cluster(config, ruleset, existingClustering, clustering.FailuresFromProtos(chunk.Failures))
   104  
   105  	updates := prepareClusterUpdates(chunk, existingClustering, newClustering)
   106  
   107  	return &PendingUpdate{
   108  		Chunk:         state.ChunkKey{Project: existingState.Project, ChunkID: existingState.ChunkID},
   109  		existingState: existingState,
   110  		newClustering: newClustering,
   111  		updates:       updates,
   112  	}, nil
   113  }
   114  
   115  // Attempts to apply the update to Spanner.
   116  //
   117  // Important: Before calling this method, the caller should verify the chunks
   118  // in Spanner still have the same LastUpdatedTime as passed to PrepareUpdate,
   119  // in the same transaction as attempting this update.
   120  // This will prevent clobbering a concurrently applied update or create.
   121  //
   122  // In case of an update race, PrepareUpdate should be retried with a more
   123  // recent version of the chunk.
   124  func (p *PendingUpdate) ApplyToSpanner(ctx context.Context) error {
   125  	exists := !p.existingState.LastUpdated.IsZero()
   126  	if !exists {
   127  		clusterState := &state.Entry{
   128  			Project:       p.existingState.Project,
   129  			ChunkID:       p.existingState.ChunkID,
   130  			PartitionTime: p.existingState.PartitionTime,
   131  			ObjectID:      p.existingState.ObjectID,
   132  			Clustering:    p.newClustering,
   133  		}
   134  		if err := state.Create(ctx, clusterState); err != nil {
   135  			return err
   136  		}
   137  	} else {
   138  		if err := state.UpdateClustering(ctx, p.existingState, &p.newClustering); err != nil {
   139  			return err
   140  		}
   141  	}
   142  	return nil
   143  }
   144  
   145  // ApplyToAnalysis exports changed failures for re-analysis. The
   146  // Spanner commit time must be provided so that analysis has the
   147  // correct update chronology.
   148  func (p *PendingUpdate) ApplyToAnalysis(ctx context.Context, analysis Analysis, commitTime time.Time) error {
   149  	if len(p.updates) > 0 {
   150  		update := &clustering.Update{
   151  			Project: p.existingState.Project,
   152  			ChunkID: p.existingState.ChunkID,
   153  			Updates: p.updates,
   154  		}
   155  		if err := analysis.HandleUpdatedClusters(ctx, update, commitTime); err != nil {
   156  			return errors.Annotate(err, "handle updated clusters (project: %s chunkID: %s)", p.existingState.Project, p.existingState.ChunkID).Err()
   157  		}
   158  	}
   159  	return nil
   160  }
   161  
   162  // EstimatedTransactionSize returns the estimated size of the
   163  // Spanner transaction, in bytes.
   164  func (p *PendingUpdate) EstimatedTransactionSize() int {
   165  	if len(p.updates) > 0 {
   166  		// This means we will be updating the clustering state in Spanner,
   167  		// not just the Version fields.
   168  		numClusters := 0
   169  		for _, cs := range p.newClustering.Clusters {
   170  			numClusters += len(cs)
   171  		}
   172  		// Est. 10 bytes per cluster, plus 200 bytes overhead.
   173  		return 200 + numClusters*10
   174  	}
   175  	// The clustering state has not changed, only
   176  	// AlgorithmsVersion and RulesVersion will be updated.
   177  	return 200
   178  }
   179  
   180  // FailuresUpdated returns the number of failures that will
   181  // exported for re-analysis as a result of the update.
   182  func (p *PendingUpdate) FailuresUpdated() int {
   183  	return len(p.updates)
   184  }
   185  
   186  func prepareClusterUpdates(chunk *cpb.Chunk, previousClustering clustering.ClusterResults, newClustering clustering.ClusterResults) []*clustering.FailureUpdate {
   187  	var updates []*clustering.FailureUpdate
   188  	for i, testResult := range chunk.Failures {
   189  		previousClusters := previousClustering.Clusters[i]
   190  		newClusters := newClustering.Clusters[i]
   191  
   192  		if !clustering.ClustersEqual(previousClusters, newClusters) {
   193  			update := &clustering.FailureUpdate{
   194  				TestResult:       testResult,
   195  				PreviousClusters: previousClusters,
   196  				NewClusters:      newClusters,
   197  			}
   198  			updates = append(updates, update)
   199  		}
   200  	}
   201  	return updates
   202  }