go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/reclustering/updates.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package reclustering
    16  
    17  import (
    18  	"context"
    19  	"time"
    20  
    21  	"go.chromium.org/luci/common/clock"
    22  	"go.chromium.org/luci/common/errors"
    23  	"go.chromium.org/luci/server/span"
    24  
    25  	"go.chromium.org/luci/analysis/internal/clustering/state"
    26  	"go.chromium.org/luci/analysis/internal/tracing"
    27  )
    28  
    29  const (
    30  	// Maximum size of pending Spanner transactions, in bytes. Once
    31  	// a transaction meets or exceeds this size, it will be committed.
    32  	maxTransactionBytes = 1000 * 1000
    33  	// Maximum number of failures to export to analysis at a time.
    34  	// Once the failures to be exported meets or exceeds this size, they will
    35  	// be committed.
    36  	maxAnalysisSize = 1000
    37  	// Maximum amount of time between Spanner commits. If no updates have
    38  	// been committed to Spanner for this time, a commit will be made at
    39  	// the next earliest opportunity. This limits how much work can be
    40  	// lost in case of an error or an update conflict.
    41  	maxPendingTime = 2 * time.Second
    42  )
    43  
    44  // UpdateRaceErr is the error returned by UpdateClustering if a concurrent
    45  // modification (or deletion) of a chunk is detected.
    46  var UpdateRaceErr = errors.New("concurrent modification to cluster")
    47  
    48  // PendingUpdates represents a pending set of chunk updates. It facilitates
    49  // batching updates together for efficiency.
    50  type PendingUpdates struct {
    51  	updates                 []*PendingUpdate
    52  	pendingTransactionBytes int
    53  	pendingAnalysisSize     int
    54  	lastCommit              time.Time
    55  }
    56  
    57  // NewPendingUpdates initialises a new PendingUpdates.
    58  func NewPendingUpdates(ctx context.Context) *PendingUpdates {
    59  	return &PendingUpdates{
    60  		updates:                 nil,
    61  		pendingTransactionBytes: 0,
    62  		pendingAnalysisSize:     0,
    63  		lastCommit:              clock.Now(ctx),
    64  	}
    65  }
    66  
    67  // Add adds the specified update to the set of pending updates.
    68  func (p *PendingUpdates) Add(update *PendingUpdate) {
    69  	p.updates = append(p.updates, update)
    70  	p.pendingTransactionBytes += update.EstimatedTransactionSize()
    71  	p.pendingAnalysisSize += update.FailuresUpdated()
    72  }
    73  
    74  // ShouldApply returns whether the updates should be applied now because
    75  // they have reached a maximum size or time limit.
    76  func (p *PendingUpdates) ShouldApply(ctx context.Context) bool {
    77  	return p.pendingTransactionBytes > maxTransactionBytes ||
    78  		p.pendingAnalysisSize > maxAnalysisSize ||
    79  		clock.Now(ctx).Sub(p.lastCommit) > maxPendingTime
    80  }
    81  
    82  // Apply applies the chunk updates to Spanner and exports them for re-analysis.
    83  // If some applications failed because of a concurrent modification, the method
    84  // returns UpdateRaceErr. In this case, the caller should construct the updates
    85  // again from a fresh read of the Clustering State and retry.
    86  // Note that some of the updates may have successfully applied.
    87  func (p *PendingUpdates) Apply(ctx context.Context, analysis Analysis) (err error) {
    88  	ctx, s := tracing.Start(ctx, "go.chromium.org/luci/analysis/internal/clustering/reclustering.Apply")
    89  	defer func() { tracing.End(s, err) }()
    90  
    91  	var appliedUpdates []*PendingUpdate
    92  	f := func(ctx context.Context) error {
    93  		var keys []state.ChunkKey
    94  		for _, pu := range p.updates {
    95  			keys = append(keys, pu.Chunk)
    96  		}
    97  		lastUpdated, err := state.ReadLastUpdated(ctx, keys)
    98  		if err != nil {
    99  			return errors.Annotate(err, "read last updated").Err()
   100  		}
   101  
   102  		appliedUpdates = nil
   103  		for i, pu := range p.updates {
   104  			actualLastUpdated := lastUpdated[i]
   105  			expectedLastUpdated := pu.existingState.LastUpdated
   106  			if !expectedLastUpdated.Equal(actualLastUpdated) {
   107  				// Our update raced with another update.
   108  				continue
   109  			}
   110  			if err := pu.ApplyToSpanner(ctx); err != nil {
   111  				return errors.Annotate(err, "apply to spanner").Err()
   112  			}
   113  			appliedUpdates = append(appliedUpdates, pu)
   114  		}
   115  		return nil
   116  	}
   117  	commitTime, err := span.ReadWriteTransaction(ctx, f)
   118  	if err != nil {
   119  		return err
   120  	}
   121  	for _, pu := range appliedUpdates {
   122  		if err := pu.ApplyToAnalysis(ctx, analysis, commitTime); err != nil {
   123  			return errors.Annotate(err, "export analysis").Err()
   124  		}
   125  	}
   126  	if len(appliedUpdates) != len(p.updates) {
   127  		// One more more updates raced.
   128  		return UpdateRaceErr
   129  	}
   130  	return nil
   131  }