go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/rbe/reservation_test.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package rbe 16 17 import ( 18 "context" 19 "fmt" 20 "testing" 21 "time" 22 23 "google.golang.org/grpc" 24 "google.golang.org/grpc/codes" 25 "google.golang.org/grpc/status" 26 "google.golang.org/protobuf/proto" 27 "google.golang.org/protobuf/types/known/anypb" 28 "google.golang.org/protobuf/types/known/durationpb" 29 "google.golang.org/protobuf/types/known/emptypb" 30 "google.golang.org/protobuf/types/known/timestamppb" 31 32 "go.chromium.org/luci/common/clock" 33 "go.chromium.org/luci/common/clock/testclock" 34 "go.chromium.org/luci/common/retry/transient" 35 "go.chromium.org/luci/gae/impl/memory" 36 "go.chromium.org/luci/gae/service/datastore" 37 "go.chromium.org/luci/server/tq" 38 39 "go.chromium.org/luci/swarming/internal/remoteworkers" 40 internalspb "go.chromium.org/luci/swarming/proto/internals" 41 "go.chromium.org/luci/swarming/server/model" 42 43 . "github.com/smartystreets/goconvey/convey" 44 . "go.chromium.org/luci/common/testing/assertions" 45 ) 46 47 func TestReservationServer(t *testing.T) { 48 t.Parallel() 49 50 Convey("With mocks", t, func() { 51 const rbeInstance = "projects/x/instances/y" 52 const rbeReservation = "reservation-id" 53 54 ctx := memory.Use(context.Background()) 55 ctx, _ = testclock.UseTime(ctx, testclock.TestRecentTimeUTC) 56 rbe := mockedReservationClient{ 57 newState: remoteworkers.ReservationState_RESERVATION_PENDING, 58 } 59 internals := mockedInternalsClient{} 60 srv := ReservationServer{ 61 rbe: &rbe, 62 internals: &internals, 63 serverVersion: "go-version", 64 } 65 66 expirationTimeout := time.Hour 67 executionTimeout := 10 * time.Minute 68 expiry := clock.Now(ctx).Add(expirationTimeout).UTC() 69 70 enqueueTask := &internalspb.EnqueueRBETask{ 71 Payload: &internalspb.TaskPayload{ 72 ReservationId: rbeReservation, 73 TaskId: "60b2ed0a43023110", 74 TaskToRunShard: 14, 75 TaskToRunId: 1, 76 DebugInfo: &internalspb.TaskPayload_DebugInfo{ 77 PySwarmingVersion: "py-version", 78 }, 79 }, 80 RbeInstance: rbeInstance, 81 Expiry: timestamppb.New(expiry), 82 ExecutionTimeout: durationpb.New(executionTimeout), 83 RequestedBotId: "some-bot-id", 84 Constraints: []*internalspb.EnqueueRBETask_Constraint{ 85 {Key: "key1", AllowedValues: []string{"v1", "v2"}}, 86 {Key: "key2", AllowedValues: []string{"v3"}}, 87 }, 88 Priority: 123, 89 } 90 91 taskReqKey, err := model.TaskIDToRequestKey(ctx, enqueueTask.Payload.TaskId) 92 So(err, ShouldBeNil) 93 taskToRun := &model.TaskToRun{ 94 Key: model.TaskToRunKey(ctx, taskReqKey, 95 enqueueTask.Payload.TaskToRunShard, 96 enqueueTask.Payload.TaskToRunId, 97 ), 98 Expiration: datastore.NewIndexedOptional(expiry), 99 } 100 So(datastore.Put(ctx, taskToRun), ShouldBeNil) 101 102 Convey("handleEnqueueRBETask ok", func() { 103 err := srv.handleEnqueueRBETask(ctx, enqueueTask) 104 So(err, ShouldBeNil) 105 106 expectedPayload, _ := anypb.New(&internalspb.TaskPayload{ 107 ReservationId: rbeReservation, 108 TaskId: "60b2ed0a43023110", 109 TaskToRunShard: 14, 110 TaskToRunId: 1, 111 DebugInfo: &internalspb.TaskPayload_DebugInfo{ 112 PySwarmingVersion: "py-version", 113 GoSwarmingVersion: "go-version", 114 }, 115 }) 116 117 So(rbe.reservation, ShouldResembleProto, &remoteworkers.Reservation{ 118 Name: fmt.Sprintf("%s/reservations/%s", rbeInstance, rbeReservation), 119 State: remoteworkers.ReservationState_RESERVATION_PENDING, 120 Payload: expectedPayload, 121 Constraints: []*remoteworkers.Constraint{ 122 {Key: "label:key1", AllowedValues: []string{"v1", "v2"}}, 123 {Key: "label:key2", AllowedValues: []string{"v3"}}, 124 }, 125 ExpireTime: timestamppb.New(expiry.Add(executionTimeout)), 126 QueuingTimeout: durationpb.New(expirationTimeout), 127 ExecutionTimeout: durationpb.New(executionTimeout), 128 Priority: 123, 129 RequestedBotId: "some-bot-id", 130 }) 131 }) 132 133 Convey("handleEnqueueRBETask TaskToRun is gone", func() { 134 So(datastore.Delete(ctx, datastore.KeyForObj(ctx, taskToRun)), ShouldBeNil) 135 136 err := srv.handleEnqueueRBETask(ctx, enqueueTask) 137 So(err, ShouldBeNil) 138 139 // Didn't call RBE. 140 So(rbe.reservation, ShouldBeNil) 141 }) 142 143 Convey("handleEnqueueRBETask TaskToRun is claimed", func() { 144 taskToRun.Expiration.Unset() 145 So(datastore.Put(ctx, taskToRun), ShouldBeNil) 146 147 err := srv.handleEnqueueRBETask(ctx, enqueueTask) 148 So(err, ShouldBeNil) 149 150 // Didn't call RBE. 151 So(rbe.reservation, ShouldBeNil) 152 }) 153 154 Convey("handleEnqueueRBETask transient err", func() { 155 rbe.errCreate = status.Errorf(codes.Internal, "boom") 156 err := srv.handleEnqueueRBETask(ctx, enqueueTask) 157 So(err, ShouldNotBeNil) 158 So(transient.Tag.In(err), ShouldBeTrue) 159 }) 160 161 Convey("handleEnqueueRBETask already exists", func() { 162 rbe.errCreate = status.Errorf(codes.AlreadyExists, "boom") 163 err := srv.handleEnqueueRBETask(ctx, enqueueTask) 164 So(err, ShouldBeNil) 165 }) 166 167 Convey("handleEnqueueRBETask fatal error", func() { 168 Convey("expected error, report ok", func() { 169 rbe.errCreate = status.Errorf(codes.FailedPrecondition, "boom") 170 internals.expireSlice = func(req *internalspb.ExpireSliceRequest) error { 171 So(req, ShouldResembleProto, &internalspb.ExpireSliceRequest{ 172 TaskId: enqueueTask.Payload.TaskId, 173 TaskToRunShard: enqueueTask.Payload.TaskToRunShard, 174 TaskToRunId: enqueueTask.Payload.TaskToRunId, 175 Reason: internalspb.ExpireSliceRequest_NO_RESOURCE, 176 Details: "rpc error: code = FailedPrecondition desc = boom", 177 }) 178 return nil 179 } 180 err := srv.handleEnqueueRBETask(ctx, enqueueTask) 181 So(tq.Ignore.In(err), ShouldBeTrue) 182 }) 183 184 Convey("unexpected error, report ok", func() { 185 rbe.errCreate = status.Errorf(codes.PermissionDenied, "boom") 186 internals.expireSlice = func(req *internalspb.ExpireSliceRequest) error { 187 So(req, ShouldResembleProto, &internalspb.ExpireSliceRequest{ 188 TaskId: enqueueTask.Payload.TaskId, 189 TaskToRunShard: enqueueTask.Payload.TaskToRunShard, 190 TaskToRunId: enqueueTask.Payload.TaskToRunId, 191 Reason: internalspb.ExpireSliceRequest_PERMISSION_DENIED, 192 Details: "rpc error: code = PermissionDenied desc = boom", 193 }) 194 return nil 195 } 196 err := srv.handleEnqueueRBETask(ctx, enqueueTask) 197 So(tq.Fatal.In(err), ShouldBeTrue) 198 }) 199 200 Convey("expected, report failed", func() { 201 rbe.errCreate = status.Errorf(codes.FailedPrecondition, "boom") 202 internals.expireSlice = func(_ *internalspb.ExpireSliceRequest) error { 203 return status.Errorf(codes.InvalidArgument, "boom") 204 } 205 err := srv.handleEnqueueRBETask(ctx, enqueueTask) 206 So(err, ShouldNotBeNil) 207 So(tq.Ignore.In(err), ShouldBeFalse) 208 So(tq.Fatal.In(err), ShouldBeFalse) 209 }) 210 }) 211 212 Convey("handleCancelRBETask ok", func() { 213 err := srv.handleCancelRBETask(ctx, &internalspb.CancelRBETask{ 214 RbeInstance: rbeInstance, 215 ReservationId: rbeReservation, 216 }) 217 So(err, ShouldBeNil) 218 So(rbe.lastCancel, ShouldResembleProto, &remoteworkers.CancelReservationRequest{ 219 Name: fmt.Sprintf("%s/reservations/%s", rbeInstance, rbeReservation), 220 Intent: remoteworkers.CancelReservationIntent_ANY, 221 }) 222 }) 223 224 Convey("handleCancelRBETask not found", func() { 225 rbe.errCancel = status.Errorf(codes.NotFound, "boo") 226 err := srv.handleCancelRBETask(ctx, &internalspb.CancelRBETask{ 227 RbeInstance: rbeInstance, 228 ReservationId: rbeReservation, 229 }) 230 So(tq.Ignore.In(err), ShouldBeTrue) 231 }) 232 233 Convey("handleCancelRBETask internal", func() { 234 rbe.errCancel = status.Errorf(codes.Internal, "boo") 235 err := srv.handleCancelRBETask(ctx, &internalspb.CancelRBETask{ 236 RbeInstance: rbeInstance, 237 ReservationId: rbeReservation, 238 }) 239 So(transient.Tag.In(err), ShouldBeTrue) 240 }) 241 242 Convey("ExpireSliceBasedOnReservation", func() { 243 const ( 244 reservationName = "projects/.../instances/.../reservations/..." 245 taskSliceIndex = 1 246 taskToRunShard = 5 247 taskToRunID = 678 248 taskID = "637f8e221100aa10" 249 ) 250 251 var ( 252 expireSliceReason internalspb.ExpireSliceRequest_Reason 253 expireSliceDetails string 254 ) 255 internals.expireSlice = func(r *internalspb.ExpireSliceRequest) error { 256 So(r.TaskId, ShouldEqual, taskID) 257 So(r.TaskToRunShard, ShouldEqual, taskToRunShard) 258 So(r.TaskToRunId, ShouldEqual, taskToRunID) 259 So(r.Reason, ShouldNotEqual, internalspb.ExpireSliceRequest_REASON_UNSPECIFIED) 260 expireSliceReason = r.Reason 261 expireSliceDetails = r.Details 262 return nil 263 } 264 265 prepTaskToRun := func(reapable bool) { 266 var exp datastore.Optional[time.Time, datastore.Indexed] 267 if reapable { 268 exp.Set(testclock.TestRecentTimeUTC.Add(time.Hour)) 269 } 270 taskReqKey, _ := model.TaskIDToRequestKey(ctx, taskID) 271 So(datastore.Put(ctx, &model.TaskToRun{ 272 Key: model.TaskToRunKey(ctx, taskReqKey, taskToRunShard, taskToRunID), 273 Expiration: exp, 274 }), ShouldBeNil) 275 } 276 277 prepReapableTaskToRun := func() { prepTaskToRun(true) } 278 prepClaimedTaskToRun := func() { prepTaskToRun(false) } 279 280 expireBasedOnReservation := func(state remoteworkers.ReservationState, statusErr error, result *internalspb.TaskResult) { 281 rbe.reservation = &remoteworkers.Reservation{ 282 Name: reservationName, 283 State: state, 284 Status: status.Convert(statusErr).Proto(), 285 } 286 rbe.reservation.Payload, _ = anypb.New(&internalspb.TaskPayload{ 287 ReservationId: "", 288 TaskId: taskID, 289 SliceIndex: taskSliceIndex, 290 TaskToRunShard: taskToRunShard, 291 TaskToRunId: taskToRunID, 292 }) 293 if result != nil { 294 rbe.reservation.Result, _ = anypb.New(result) 295 } 296 expireSliceReason = internalspb.ExpireSliceRequest_REASON_UNSPECIFIED 297 expireSliceDetails = "" 298 So(srv.ExpireSliceBasedOnReservation(ctx, reservationName), ShouldBeNil) 299 } 300 301 expectNoExpireSlice := func() { 302 So(expireSliceReason, ShouldEqual, internalspb.ExpireSliceRequest_REASON_UNSPECIFIED) 303 } 304 305 expectExpireSlice := func(r internalspb.ExpireSliceRequest_Reason, details string) { 306 So(expireSliceReason, ShouldEqual, r) 307 So(expireSliceDetails, ShouldContainSubstring, details) 308 } 309 310 Convey("Still pending", func() { 311 prepReapableTaskToRun() 312 expireBasedOnReservation( 313 remoteworkers.ReservationState_RESERVATION_PENDING, 314 nil, 315 nil, 316 ) 317 expectNoExpireSlice() 318 }) 319 320 Convey("Successful", func() { 321 prepClaimedTaskToRun() 322 expireBasedOnReservation( 323 remoteworkers.ReservationState_RESERVATION_COMPLETED, 324 nil, 325 &internalspb.TaskResult{}, 326 ) 327 expectNoExpireSlice() 328 }) 329 330 Convey("Canceled #1", func() { 331 prepClaimedTaskToRun() 332 expireBasedOnReservation( 333 remoteworkers.ReservationState_RESERVATION_COMPLETED, 334 status.Errorf(codes.Canceled, "canceled"), 335 nil, 336 ) 337 expectNoExpireSlice() 338 }) 339 340 Convey("Canceled #2", func() { 341 prepClaimedTaskToRun() 342 expireBasedOnReservation( 343 remoteworkers.ReservationState_RESERVATION_CANCELLED, 344 nil, 345 nil, 346 ) 347 expectNoExpireSlice() 348 }) 349 350 Convey("Expired", func() { 351 prepReapableTaskToRun() 352 expireBasedOnReservation( 353 remoteworkers.ReservationState_RESERVATION_COMPLETED, 354 status.Errorf(codes.DeadlineExceeded, "deadline"), 355 nil, 356 ) 357 expectExpireSlice(internalspb.ExpireSliceRequest_EXPIRED, "deadline") 358 }) 359 360 Convey("No resources", func() { 361 prepReapableTaskToRun() 362 expireBasedOnReservation( 363 remoteworkers.ReservationState_RESERVATION_COMPLETED, 364 status.Errorf(codes.FailedPrecondition, "no bots"), 365 nil, 366 ) 367 expectExpireSlice(internalspb.ExpireSliceRequest_NO_RESOURCE, "no bots") 368 }) 369 370 Convey("Bot internal error", func() { 371 prepReapableTaskToRun() 372 expireBasedOnReservation( 373 remoteworkers.ReservationState_RESERVATION_COMPLETED, 374 status.Errorf(codes.DeadlineExceeded, "ignored"), 375 &internalspb.TaskResult{BotInternalError: "boom"}, 376 ) 377 expectExpireSlice(internalspb.ExpireSliceRequest_BOT_INTERNAL_ERROR, "boom") 378 }) 379 380 Convey("Aborted before claimed", func() { 381 prepReapableTaskToRun() 382 expireBasedOnReservation( 383 remoteworkers.ReservationState_RESERVATION_COMPLETED, 384 status.Errorf(codes.Aborted, "bot died"), 385 nil, 386 ) 387 expectExpireSlice(internalspb.ExpireSliceRequest_BOT_INTERNAL_ERROR, "bot died") 388 }) 389 390 Convey("Unexpectedly successful reservations", func() { 391 prepReapableTaskToRun() 392 expireBasedOnReservation( 393 remoteworkers.ReservationState_RESERVATION_COMPLETED, 394 nil, 395 nil, 396 ) 397 expectExpireSlice(internalspb.ExpireSliceRequest_BOT_INTERNAL_ERROR, "unexpectedly finished") 398 }) 399 400 Convey("Unexpectedly canceled reservations", func() { 401 prepReapableTaskToRun() 402 expireBasedOnReservation( 403 remoteworkers.ReservationState_RESERVATION_COMPLETED, 404 status.Errorf(codes.Canceled, "ignored"), 405 nil, 406 ) 407 expectNoExpireSlice() 408 }) 409 410 Convey("Skips already claimed TaskToRun", func() { 411 prepClaimedTaskToRun() 412 expireBasedOnReservation( 413 remoteworkers.ReservationState_RESERVATION_COMPLETED, 414 status.Errorf(codes.FailedPrecondition, "no bots"), 415 nil, 416 ) 417 expectNoExpireSlice() 418 }) 419 420 Convey("Skips missing TaskToRun", func() { 421 expireBasedOnReservation( 422 remoteworkers.ReservationState_RESERVATION_COMPLETED, 423 status.Errorf(codes.FailedPrecondition, "no bots"), 424 nil, 425 ) 426 expectNoExpireSlice() 427 }) 428 }) 429 }) 430 } 431 432 type mockedReservationClient struct { 433 lastCreate *remoteworkers.CreateReservationRequest 434 lastGet *remoteworkers.GetReservationRequest 435 lastCancel *remoteworkers.CancelReservationRequest 436 437 errCreate error 438 errGet error 439 errCancel error 440 441 newState remoteworkers.ReservationState 442 reservation *remoteworkers.Reservation 443 } 444 445 func (m *mockedReservationClient) CreateReservation(ctx context.Context, in *remoteworkers.CreateReservationRequest, opts ...grpc.CallOption) (*remoteworkers.Reservation, error) { 446 m.lastCreate = in 447 m.reservation = proto.Clone(in.Reservation).(*remoteworkers.Reservation) 448 m.reservation.State = m.newState 449 if m.errCreate != nil { 450 return nil, m.errCreate 451 } 452 return m.reservation, nil 453 } 454 455 func (m *mockedReservationClient) GetReservation(ctx context.Context, in *remoteworkers.GetReservationRequest, opts ...grpc.CallOption) (*remoteworkers.Reservation, error) { 456 m.lastGet = in 457 if m.errGet != nil { 458 return nil, m.errGet 459 } 460 return m.reservation, nil 461 } 462 463 func (m *mockedReservationClient) CancelReservation(ctx context.Context, in *remoteworkers.CancelReservationRequest, opts ...grpc.CallOption) (*remoteworkers.CancelReservationResponse, error) { 464 m.lastCancel = in 465 if m.errCancel != nil { 466 return nil, m.errCancel 467 } 468 return &remoteworkers.CancelReservationResponse{}, nil 469 } 470 471 type mockedInternalsClient struct { 472 expireSlice func(*internalspb.ExpireSliceRequest) error 473 } 474 475 func (m *mockedInternalsClient) ExpireSlice(ctx context.Context, in *internalspb.ExpireSliceRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) { 476 if m.expireSlice == nil { 477 panic("must not be called") 478 } 479 if err := m.expireSlice(in); err != nil { 480 return nil, err 481 } 482 return &emptypb.Empty{}, nil 483 }