go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/finalizer/finalizer.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package finalizer 16 17 import ( 18 "context" 19 "fmt" 20 "sync" 21 22 "cloud.google.com/go/spanner" 23 "golang.org/x/sync/errgroup" 24 "golang.org/x/sync/semaphore" 25 "google.golang.org/protobuf/proto" 26 27 "go.chromium.org/luci/common/errors" 28 "go.chromium.org/luci/common/logging" 29 "go.chromium.org/luci/common/sync/parallel" 30 "go.chromium.org/luci/resultdb/internal/invocations" 31 "go.chromium.org/luci/resultdb/internal/services/baselineupdater" 32 "go.chromium.org/luci/resultdb/internal/services/bqexporter" 33 "go.chromium.org/luci/resultdb/internal/services/testmetadataupdator" 34 "go.chromium.org/luci/resultdb/internal/spanutil" 35 "go.chromium.org/luci/resultdb/internal/tasks" 36 "go.chromium.org/luci/resultdb/internal/tasks/taskspb" 37 "go.chromium.org/luci/resultdb/internal/tracing" 38 pb "go.chromium.org/luci/resultdb/proto/v1" 39 "go.chromium.org/luci/server" 40 "go.chromium.org/luci/server/span" 41 "go.chromium.org/luci/server/tq" 42 ) 43 44 // InitServer initializes a finalizer server. 45 func InitServer(srv *server.Server) { 46 // init() below takes care of everything. 47 } 48 49 func init() { 50 tasks.FinalizationTasks.AttachHandler(func(ctx context.Context, msg proto.Message) error { 51 task := msg.(*taskspb.TryFinalizeInvocation) 52 return tryFinalizeInvocation(ctx, invocations.ID(task.InvocationId)) 53 }) 54 } 55 56 // Invocation finalization is asynchronous. First, an invocation transitions 57 // from ACTIVE to FINALIZING state and transactionally an invocation task is 58 // enqueued to try to transition it from FINALIZING to FINALIZED. 59 // Then the task tries to finalize the invocation: 60 // 1. Check if the invocation is ready to be finalized. 61 // 2. Finalize the invocation. 62 // 63 // The invocation is ready to be finalized iff it is in FINALIZING state and it 64 // does not include, directly or indirectly, an active invocation. 65 // The latter involves a graph traversal. 66 // Given that a client cannot mutate inclusions of a FINALIZING/FINALIZED 67 // invocation, this means that once an invocation is ready to be finalized, 68 // it cannot become un-ready. This is why the check is done in a ready-only 69 // transaction with minimal contention. 70 // If the invocation is not ready to finalize, the task is dropped. 71 // This check is implemented in readyToFinalize() function. 72 // 73 // The second part is actual finalization. It is done in a separate read-write 74 // transaction. First the task checks again if the invocation is still 75 // FINALIZING. If so, the task changes state to FINALIZED, enqueues BQExport 76 // tasks and tasks to try to finalize invocations that directly include the 77 // current one (more about this below). 78 // The finalization is implemented in finalizeInvocation() function. 79 // 80 // If we have a chain of inclusions A includes B, B includes C, where A and B 81 // are FINALIZING and C is active, then A and B are waiting for C to be 82 // finalized. 83 // In this state, tasks attempting to finalize A or B will conclude that they 84 // are not ready. 85 // Once C is finalized, a task to try to finalize B is enqueued. 86 // B gets finalized and it enqueues a task to try to finalize A. 87 // More generally speaking, whenever a node transitions from FINALIZING to 88 // FINALIZED, we ping incoming edges. This may cause a chain of pings along 89 // the edges. 90 // 91 // More specifically, given edge (A, B), when finalizing B, A is pinged only if 92 // it is FINALIZING. It does not make sense to do it if A is FINALIZED for 93 // obvious reasons; and there is no need to do it if A is ACTIVE because 94 // a transition ACTIVE->FINALIZING is always accompanied with enqueuing a task 95 // to try to finalize it. 96 97 // tryFinalizeInvocation finalizes the invocation unless it directly or 98 // indirectly includes an ACTIVE invocation. 99 // If the invocation is too early to finalize, logs the reason and returns nil. 100 // Idempotent. 101 func tryFinalizeInvocation(ctx context.Context, invID invocations.ID) error { 102 // The check whether the invocation is ready to finalize involves traversing 103 // the invocation graph and reading Invocations.State column. Doing so in a 104 // RW transaction will cause contention. Fortunately, once an invocation 105 // is ready to finalize, it cannot go back to being unready, so doing 106 // check and finalization in separate transactions is fine. 107 switch ready, err := readyToFinalize(ctx, invID); { 108 case err != nil: 109 return err 110 111 case !ready: 112 return nil 113 114 default: 115 logging.Infof(ctx, "decided to finalize %s...", invID.Name()) 116 return finalizeInvocation(ctx, invID) 117 } 118 } 119 120 var errAlreadyFinalized = fmt.Errorf("the invocation is already finalized") 121 122 // notReadyToFinalize means the invocation is not ready to finalize. 123 // It is used exclusively inside readyToFinalize. 124 var notReadyToFinalize = errors.BoolTag{Key: errors.NewTagKey("not ready to get finalized")} 125 126 // readyToFinalize returns true if the invocation should be finalized. 127 // An invocation is ready to be finalized if no ACTIVE invocation is reachable 128 // from it. 129 func readyToFinalize(ctx context.Context, invID invocations.ID) (ready bool, err error) { 130 ctx, ts := tracing.Start(ctx, "resultdb.readyToFinalize") 131 defer func() { tracing.End(ts, err) }() 132 133 ctx, cancel := span.ReadOnlyTransaction(ctx) 134 defer cancel() 135 136 eg, ctx := errgroup.WithContext(ctx) 137 defer eg.Wait() 138 139 // Ensure the root invocation is in FINALIZING state. 140 eg.Go(func() error { 141 return ensureFinalizing(ctx, invID) 142 }) 143 144 // Walk the graph of invocations, starting from the root, along the inclusion 145 // edges. 146 // Stop walking as soon as we encounter an active invocation. 147 seen := make(invocations.IDSet, 1) 148 var mu sync.Mutex 149 150 // Limit the number of concurrent queries. 151 sem := semaphore.NewWeighted(64) 152 153 var visit func(id invocations.ID) 154 visit = func(id invocations.ID) { 155 // Do not visit same node twice. 156 mu.Lock() 157 if seen.Has(id) { 158 mu.Unlock() 159 return 160 } 161 seen.Add(id) 162 mu.Unlock() 163 164 // Concurrently fetch inclusions without a lock. 165 eg.Go(func() error { 166 // Limit concurrent Spanner queries. 167 if err := sem.Acquire(ctx, 1); err != nil { 168 return err 169 } 170 defer sem.Release(1) 171 172 // Ignore inclusions of FINALIZED invocations. An ACTIVE invocation is 173 // certainly not reachable from those. 174 st := spanner.NewStatement(` 175 SELECT included.InvocationId, included.State 176 FROM IncludedInvocations incl 177 JOIN Invocations included on incl.IncludedInvocationId = included.InvocationId 178 WHERE incl.InvocationId = @invID AND included.State != @finalized 179 `) 180 st.Params = spanutil.ToSpannerMap(map[string]any{ 181 "finalized": pb.Invocation_FINALIZED, 182 "invID": id, 183 }) 184 var b spanutil.Buffer 185 return span.Query(ctx, st).Do(func(row *spanner.Row) error { 186 var includedID invocations.ID 187 var includedState pb.Invocation_State 188 switch err := b.FromSpanner(row, &includedID, &includedState); { 189 case err != nil: 190 return err 191 192 case includedState == pb.Invocation_ACTIVE: 193 return errors.Reason("%s is still ACTIVE", includedID.Name()).Tag(notReadyToFinalize).Err() 194 195 case includedState != pb.Invocation_FINALIZING: 196 return errors.Reason("%s has unexpected state %s", includedID.Name(), includedState).Err() 197 198 default: 199 // The included invocation is FINALIZING and MAY include other 200 // still-active invocations. We must go deeper. 201 visit(includedID) 202 return nil 203 } 204 }) 205 }) 206 } 207 208 visit(invID) 209 210 switch err := eg.Wait(); { 211 case errors.Unwrap(err) == errAlreadyFinalized: 212 // The invocation is already finalized. 213 return false, nil 214 215 case notReadyToFinalize.In(err): 216 logging.Infof(ctx, "not ready to finalize: %s", err.Error()) 217 return false, nil 218 219 default: 220 return err == nil, err 221 } 222 } 223 224 func ensureFinalizing(ctx context.Context, invID invocations.ID) error { 225 switch state, err := invocations.ReadState(ctx, invID); { 226 case err != nil: 227 return err 228 case state == pb.Invocation_FINALIZED: 229 return errAlreadyFinalized 230 case state != pb.Invocation_FINALIZING: 231 return errors.Reason("expected %s to be FINALIZING, but it is %s", invID.Name(), state).Err() 232 default: 233 return nil 234 } 235 } 236 237 // finalizeInvocation updates the invocation state to FINALIZED. 238 // Enqueues BigQuery export tasks. 239 // For each FINALIZING invocation that includes the given one, enqueues 240 // a finalization task. 241 func finalizeInvocation(ctx context.Context, invID invocations.ID) error { 242 _, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error { 243 // Check the state before proceeding, so that if the invocation already 244 // finalized, we return errAlreadyFinalized. 245 if err := ensureFinalizing(ctx, invID); err != nil { 246 return err 247 } 248 249 err := parallel.FanOutIn(func(work chan<- func() error) { 250 work <- func() error { 251 parentInvs, err := parentsInFinalizingState(ctx, invID) 252 if err != nil { 253 return err 254 } 255 256 // Enqueue tasks to try to finalize invocations that include ours. 257 // Note that MustAddTask in a Spanner transaction is essentially 258 // a BufferWrite (no RPCs inside), it's fine to call it sequentially 259 // and panic on errors. 260 for _, id := range parentInvs { 261 tq.MustAddTask(ctx, &tq.Task{ 262 Payload: &taskspb.TryFinalizeInvocation{InvocationId: string(id)}, 263 Title: string(id), 264 }) 265 } 266 267 // Enqueue a notification to pub/sub listeners that the invocation 268 // has been finalized. 269 realm, err := invocations.ReadRealm(ctx, invID) 270 if err != nil { 271 return err 272 } 273 274 // Note that this submits the notification transactionally, 275 // i.e. conditionally on this transaction committing. 276 notification := &pb.InvocationFinalizedNotification{ 277 Invocation: invID.Name(), 278 Realm: realm, 279 } 280 tasks.NotifyInvocationFinalized(ctx, notification) 281 282 // Enqueue update test metadata task transactionally. 283 if err := testmetadataupdator.Schedule(ctx, invID); err != nil { 284 return err 285 } 286 287 // Enqueue BigQuery exports transactionally. 288 return bqexporter.Schedule(ctx, invID) 289 } 290 }) 291 if err != nil { 292 return err 293 } 294 295 // Update the invocation. 296 span.BufferWrite(ctx, spanutil.UpdateMap("Invocations", map[string]any{ 297 "InvocationId": invID, 298 "State": pb.Invocation_FINALIZED, 299 "FinalizeTime": spanner.CommitTimestamp, 300 })) 301 302 if err = scheduleBaselineTask(ctx, invID); err != nil { 303 return err 304 } 305 return nil 306 }) 307 switch { 308 case err == errAlreadyFinalized: 309 return nil 310 case err != nil: 311 return err 312 default: 313 return nil 314 } 315 } 316 317 // parentsInFinalizingState returns IDs of invocations in FINALIZING state that 318 // directly include ours. 319 func parentsInFinalizingState(ctx context.Context, invID invocations.ID) (ids []invocations.ID, err error) { 320 st := spanner.NewStatement(` 321 SELECT including.InvocationId 322 FROM IncludedInvocations@{FORCE_INDEX=ReversedIncludedInvocations} incl 323 JOIN Invocations including ON incl.InvocationId = including.InvocationId 324 WHERE IncludedInvocationId = @invID AND including.State = @finalizing 325 `) 326 st.Params = spanutil.ToSpannerMap(map[string]any{ 327 "invID": invID.RowID(), 328 "finalizing": pb.Invocation_FINALIZING, 329 }) 330 err = span.Query(ctx, st).Do(func(row *spanner.Row) error { 331 var id invocations.ID 332 if err := spanutil.FromSpanner(row, &id); err != nil { 333 return err 334 } 335 ids = append(ids, id) 336 return nil 337 }) 338 return ids, err 339 } 340 341 func scheduleBaselineTask(ctx context.Context, invID invocations.ID) error { 342 submitted, err := invocations.ReadSubmitted(ctx, invID) 343 if err != nil { 344 return err 345 } 346 if submitted { 347 baselineupdater.Schedule(ctx, string(invID)) 348 } 349 return nil 350 }