go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/finalizer/finalizer.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package finalizer
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sync"
    21  
    22  	"cloud.google.com/go/spanner"
    23  	"golang.org/x/sync/errgroup"
    24  	"golang.org/x/sync/semaphore"
    25  	"google.golang.org/protobuf/proto"
    26  
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/logging"
    29  	"go.chromium.org/luci/common/sync/parallel"
    30  	"go.chromium.org/luci/resultdb/internal/invocations"
    31  	"go.chromium.org/luci/resultdb/internal/services/baselineupdater"
    32  	"go.chromium.org/luci/resultdb/internal/services/bqexporter"
    33  	"go.chromium.org/luci/resultdb/internal/services/testmetadataupdator"
    34  	"go.chromium.org/luci/resultdb/internal/spanutil"
    35  	"go.chromium.org/luci/resultdb/internal/tasks"
    36  	"go.chromium.org/luci/resultdb/internal/tasks/taskspb"
    37  	"go.chromium.org/luci/resultdb/internal/tracing"
    38  	pb "go.chromium.org/luci/resultdb/proto/v1"
    39  	"go.chromium.org/luci/server"
    40  	"go.chromium.org/luci/server/span"
    41  	"go.chromium.org/luci/server/tq"
    42  )
    43  
    44  // InitServer initializes a finalizer server.
    45  func InitServer(srv *server.Server) {
    46  	// init() below takes care of everything.
    47  }
    48  
    49  func init() {
    50  	tasks.FinalizationTasks.AttachHandler(func(ctx context.Context, msg proto.Message) error {
    51  		task := msg.(*taskspb.TryFinalizeInvocation)
    52  		return tryFinalizeInvocation(ctx, invocations.ID(task.InvocationId))
    53  	})
    54  }
    55  
    56  // Invocation finalization is asynchronous. First, an invocation transitions
    57  // from ACTIVE to FINALIZING state and transactionally an invocation task is
    58  // enqueued to try to transition it from FINALIZING to FINALIZED.
    59  // Then the task tries to finalize the invocation:
    60  // 1. Check if the invocation is ready to be finalized.
    61  // 2. Finalize the invocation.
    62  //
    63  // The invocation is ready to be finalized iff it is in FINALIZING state and it
    64  // does not include, directly or indirectly, an active invocation.
    65  // The latter involves a graph traversal.
    66  // Given that a client cannot mutate inclusions of a FINALIZING/FINALIZED
    67  // invocation, this means that once an invocation is ready to be finalized,
    68  // it cannot become un-ready. This is why the check is done in a ready-only
    69  // transaction with minimal contention.
    70  // If the invocation is not ready to finalize, the task is dropped.
    71  // This check is implemented in readyToFinalize() function.
    72  //
    73  // The second part is actual finalization. It is done in a separate read-write
    74  // transaction. First the task checks again if the invocation is still
    75  // FINALIZING. If so, the task changes state to FINALIZED, enqueues BQExport
    76  // tasks and tasks to try to finalize invocations that directly include the
    77  // current one (more about this below).
    78  // The finalization is implemented in finalizeInvocation() function.
    79  //
    80  // If we have a chain of inclusions A includes B, B includes C, where A and B
    81  // are FINALIZING and C is active, then A and B are waiting for C to be
    82  // finalized.
    83  // In this state, tasks attempting to finalize A or B will conclude that they
    84  // are not ready.
    85  // Once C is finalized, a task to try to finalize B is enqueued.
    86  // B gets finalized and it enqueues a task to try to finalize A.
    87  // More generally speaking, whenever a node transitions from FINALIZING to
    88  // FINALIZED, we ping incoming edges. This may cause a chain of pings along
    89  // the edges.
    90  //
    91  // More specifically, given edge (A, B), when finalizing B, A is pinged only if
    92  // it is FINALIZING. It does not make sense to do it if A is FINALIZED for
    93  // obvious reasons; and there is no need to do it if A is ACTIVE because
    94  // a transition ACTIVE->FINALIZING is always accompanied with enqueuing a task
    95  // to try to finalize it.
    96  
    97  // tryFinalizeInvocation finalizes the invocation unless it directly or
    98  // indirectly includes an ACTIVE invocation.
    99  // If the invocation is too early to finalize, logs the reason and returns nil.
   100  // Idempotent.
   101  func tryFinalizeInvocation(ctx context.Context, invID invocations.ID) error {
   102  	// The check whether the invocation is ready to finalize involves traversing
   103  	// the invocation graph and reading Invocations.State column. Doing so in a
   104  	// RW transaction will cause contention. Fortunately, once an invocation
   105  	// is ready to finalize, it cannot go back to being unready, so doing
   106  	// check and finalization in separate transactions is fine.
   107  	switch ready, err := readyToFinalize(ctx, invID); {
   108  	case err != nil:
   109  		return err
   110  
   111  	case !ready:
   112  		return nil
   113  
   114  	default:
   115  		logging.Infof(ctx, "decided to finalize %s...", invID.Name())
   116  		return finalizeInvocation(ctx, invID)
   117  	}
   118  }
   119  
   120  var errAlreadyFinalized = fmt.Errorf("the invocation is already finalized")
   121  
   122  // notReadyToFinalize means the invocation is not ready to finalize.
   123  // It is used exclusively inside readyToFinalize.
   124  var notReadyToFinalize = errors.BoolTag{Key: errors.NewTagKey("not ready to get finalized")}
   125  
   126  // readyToFinalize returns true if the invocation should be finalized.
   127  // An invocation is ready to be finalized if no ACTIVE invocation is reachable
   128  // from it.
   129  func readyToFinalize(ctx context.Context, invID invocations.ID) (ready bool, err error) {
   130  	ctx, ts := tracing.Start(ctx, "resultdb.readyToFinalize")
   131  	defer func() { tracing.End(ts, err) }()
   132  
   133  	ctx, cancel := span.ReadOnlyTransaction(ctx)
   134  	defer cancel()
   135  
   136  	eg, ctx := errgroup.WithContext(ctx)
   137  	defer eg.Wait()
   138  
   139  	// Ensure the root invocation is in FINALIZING state.
   140  	eg.Go(func() error {
   141  		return ensureFinalizing(ctx, invID)
   142  	})
   143  
   144  	// Walk the graph of invocations, starting from the root, along the inclusion
   145  	// edges.
   146  	// Stop walking as soon as we encounter an active invocation.
   147  	seen := make(invocations.IDSet, 1)
   148  	var mu sync.Mutex
   149  
   150  	// Limit the number of concurrent queries.
   151  	sem := semaphore.NewWeighted(64)
   152  
   153  	var visit func(id invocations.ID)
   154  	visit = func(id invocations.ID) {
   155  		// Do not visit same node twice.
   156  		mu.Lock()
   157  		if seen.Has(id) {
   158  			mu.Unlock()
   159  			return
   160  		}
   161  		seen.Add(id)
   162  		mu.Unlock()
   163  
   164  		// Concurrently fetch inclusions without a lock.
   165  		eg.Go(func() error {
   166  			// Limit concurrent Spanner queries.
   167  			if err := sem.Acquire(ctx, 1); err != nil {
   168  				return err
   169  			}
   170  			defer sem.Release(1)
   171  
   172  			// Ignore inclusions of FINALIZED invocations. An ACTIVE invocation is
   173  			// certainly not reachable from those.
   174  			st := spanner.NewStatement(`
   175  				SELECT included.InvocationId, included.State
   176  				FROM IncludedInvocations incl
   177  				JOIN Invocations included on incl.IncludedInvocationId = included.InvocationId
   178  				WHERE incl.InvocationId = @invID AND included.State != @finalized
   179  			`)
   180  			st.Params = spanutil.ToSpannerMap(map[string]any{
   181  				"finalized": pb.Invocation_FINALIZED,
   182  				"invID":     id,
   183  			})
   184  			var b spanutil.Buffer
   185  			return span.Query(ctx, st).Do(func(row *spanner.Row) error {
   186  				var includedID invocations.ID
   187  				var includedState pb.Invocation_State
   188  				switch err := b.FromSpanner(row, &includedID, &includedState); {
   189  				case err != nil:
   190  					return err
   191  
   192  				case includedState == pb.Invocation_ACTIVE:
   193  					return errors.Reason("%s is still ACTIVE", includedID.Name()).Tag(notReadyToFinalize).Err()
   194  
   195  				case includedState != pb.Invocation_FINALIZING:
   196  					return errors.Reason("%s has unexpected state %s", includedID.Name(), includedState).Err()
   197  
   198  				default:
   199  					// The included invocation is FINALIZING and MAY include other
   200  					// still-active invocations. We must go deeper.
   201  					visit(includedID)
   202  					return nil
   203  				}
   204  			})
   205  		})
   206  	}
   207  
   208  	visit(invID)
   209  
   210  	switch err := eg.Wait(); {
   211  	case errors.Unwrap(err) == errAlreadyFinalized:
   212  		// The invocation is already finalized.
   213  		return false, nil
   214  
   215  	case notReadyToFinalize.In(err):
   216  		logging.Infof(ctx, "not ready to finalize: %s", err.Error())
   217  		return false, nil
   218  
   219  	default:
   220  		return err == nil, err
   221  	}
   222  }
   223  
   224  func ensureFinalizing(ctx context.Context, invID invocations.ID) error {
   225  	switch state, err := invocations.ReadState(ctx, invID); {
   226  	case err != nil:
   227  		return err
   228  	case state == pb.Invocation_FINALIZED:
   229  		return errAlreadyFinalized
   230  	case state != pb.Invocation_FINALIZING:
   231  		return errors.Reason("expected %s to be FINALIZING, but it is %s", invID.Name(), state).Err()
   232  	default:
   233  		return nil
   234  	}
   235  }
   236  
   237  // finalizeInvocation updates the invocation state to FINALIZED.
   238  // Enqueues BigQuery export tasks.
   239  // For each FINALIZING invocation that includes the given one, enqueues
   240  // a finalization task.
   241  func finalizeInvocation(ctx context.Context, invID invocations.ID) error {
   242  	_, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error {
   243  		// Check the state before proceeding, so that if the invocation already
   244  		// finalized, we return errAlreadyFinalized.
   245  		if err := ensureFinalizing(ctx, invID); err != nil {
   246  			return err
   247  		}
   248  
   249  		err := parallel.FanOutIn(func(work chan<- func() error) {
   250  			work <- func() error {
   251  				parentInvs, err := parentsInFinalizingState(ctx, invID)
   252  				if err != nil {
   253  					return err
   254  				}
   255  
   256  				// Enqueue tasks to try to finalize invocations that include ours.
   257  				// Note that MustAddTask in a Spanner transaction is essentially
   258  				// a BufferWrite (no RPCs inside), it's fine to call it sequentially
   259  				// and panic on errors.
   260  				for _, id := range parentInvs {
   261  					tq.MustAddTask(ctx, &tq.Task{
   262  						Payload: &taskspb.TryFinalizeInvocation{InvocationId: string(id)},
   263  						Title:   string(id),
   264  					})
   265  				}
   266  
   267  				// Enqueue a notification to pub/sub listeners that the invocation
   268  				// has been finalized.
   269  				realm, err := invocations.ReadRealm(ctx, invID)
   270  				if err != nil {
   271  					return err
   272  				}
   273  
   274  				// Note that this submits the notification transactionally,
   275  				// i.e. conditionally on this transaction committing.
   276  				notification := &pb.InvocationFinalizedNotification{
   277  					Invocation: invID.Name(),
   278  					Realm:      realm,
   279  				}
   280  				tasks.NotifyInvocationFinalized(ctx, notification)
   281  
   282  				// Enqueue update test metadata task transactionally.
   283  				if err := testmetadataupdator.Schedule(ctx, invID); err != nil {
   284  					return err
   285  				}
   286  
   287  				// Enqueue BigQuery exports transactionally.
   288  				return bqexporter.Schedule(ctx, invID)
   289  			}
   290  		})
   291  		if err != nil {
   292  			return err
   293  		}
   294  
   295  		// Update the invocation.
   296  		span.BufferWrite(ctx, spanutil.UpdateMap("Invocations", map[string]any{
   297  			"InvocationId": invID,
   298  			"State":        pb.Invocation_FINALIZED,
   299  			"FinalizeTime": spanner.CommitTimestamp,
   300  		}))
   301  
   302  		if err = scheduleBaselineTask(ctx, invID); err != nil {
   303  			return err
   304  		}
   305  		return nil
   306  	})
   307  	switch {
   308  	case err == errAlreadyFinalized:
   309  		return nil
   310  	case err != nil:
   311  		return err
   312  	default:
   313  		return nil
   314  	}
   315  }
   316  
   317  // parentsInFinalizingState returns IDs of invocations in FINALIZING state that
   318  // directly include ours.
   319  func parentsInFinalizingState(ctx context.Context, invID invocations.ID) (ids []invocations.ID, err error) {
   320  	st := spanner.NewStatement(`
   321  		SELECT including.InvocationId
   322  		FROM IncludedInvocations@{FORCE_INDEX=ReversedIncludedInvocations} incl
   323  		JOIN Invocations including ON incl.InvocationId = including.InvocationId
   324  		WHERE IncludedInvocationId = @invID AND including.State = @finalizing
   325  	`)
   326  	st.Params = spanutil.ToSpannerMap(map[string]any{
   327  		"invID":      invID.RowID(),
   328  		"finalizing": pb.Invocation_FINALIZING,
   329  	})
   330  	err = span.Query(ctx, st).Do(func(row *spanner.Row) error {
   331  		var id invocations.ID
   332  		if err := spanutil.FromSpanner(row, &id); err != nil {
   333  			return err
   334  		}
   335  		ids = append(ids, id)
   336  		return nil
   337  	})
   338  	return ids, err
   339  }
   340  
   341  func scheduleBaselineTask(ctx context.Context, invID invocations.ID) error {
   342  	submitted, err := invocations.ReadSubmitted(ctx, invID)
   343  	if err != nil {
   344  		return err
   345  	}
   346  	if submitted {
   347  		baselineupdater.Schedule(ctx, string(invID))
   348  	}
   349  	return nil
   350  }