go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/buildbucket/appengine/internal/buildcron/expired_test.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package buildcron
    16  
    17  import (
    18  	"context"
    19  	"sort"
    20  	"testing"
    21  	"time"
    22  
    23  	"go.chromium.org/luci/common/clock/testclock"
    24  	"go.chromium.org/luci/common/tsmon"
    25  	"go.chromium.org/luci/common/tsmon/store"
    26  	"go.chromium.org/luci/gae/filter/txndefer"
    27  	"go.chromium.org/luci/gae/impl/memory"
    28  	"go.chromium.org/luci/gae/service/datastore"
    29  	"go.chromium.org/luci/server/tq"
    30  	"go.chromium.org/luci/server/tq/tqtesting"
    31  
    32  	"go.chromium.org/luci/buildbucket/appengine/internal/buildid"
    33  	"go.chromium.org/luci/buildbucket/appengine/internal/metrics"
    34  	"go.chromium.org/luci/buildbucket/appengine/model"
    35  	taskdefs "go.chromium.org/luci/buildbucket/appengine/tasks/defs"
    36  	pb "go.chromium.org/luci/buildbucket/proto"
    37  
    38  	. "github.com/smartystreets/goconvey/convey"
    39  	. "go.chromium.org/luci/common/testing/assertions"
    40  )
    41  
    42  // now needs to be further fresh enough from buildid.beginningOfTheWorld
    43  var now = time.Date(2020, 01, 01, 0, 0, 0, 0, time.UTC)
    44  
    45  func setUp() (context.Context, store.Store, *tqtesting.Scheduler) {
    46  	ctx := memory.Use(context.Background())
    47  	datastore.GetTestable(ctx).AutoIndex(true)
    48  	datastore.GetTestable(ctx).Consistent(true)
    49  	ctx = txndefer.FilterRDS(ctx)
    50  
    51  	ctx, _ = tsmon.WithDummyInMemory(ctx)
    52  	ctx = metrics.WithServiceInfo(ctx, "svc", "job", "ins")
    53  	ctx = metrics.WithBuilder(ctx, "project", "bucket", "builder")
    54  	store := tsmon.Store(ctx)
    55  	ctx, sch := tq.TestingContext(ctx, nil)
    56  
    57  	ctx, _ = testclock.UseTime(ctx, now)
    58  	return ctx, store, sch
    59  }
    60  
    61  func newBuildAndStatus(ctx context.Context, st pb.Status, t time.Time) (*model.Build, *model.BuildStatus) {
    62  	id := buildid.NewBuildIDs(ctx, t, 1)[0]
    63  	b := &model.Build{
    64  		ID: id,
    65  		Proto: &pb.Build{
    66  			Id: id,
    67  			Builder: &pb.BuilderID{
    68  				Project: "project",
    69  				Bucket:  "bucket",
    70  				Builder: "builder",
    71  			},
    72  			Status: st,
    73  		},
    74  	}
    75  	bs := &model.BuildStatus{
    76  		Build:  datastore.KeyForObj(ctx, b),
    77  		Status: st,
    78  	}
    79  	return b, bs
    80  }
    81  
    82  func TestResetExpiredLeases(t *testing.T) {
    83  	t.Parallel()
    84  
    85  	getBuild := func(ctx context.Context, bid int64) *model.Build {
    86  		b := &model.Build{ID: bid}
    87  		So(datastore.Get(ctx, b), ShouldBeNil)
    88  		return b
    89  	}
    90  
    91  	Convey("ResetExpiredLeases", t, func() {
    92  		ctx, store, sch := setUp()
    93  		createTime := now.Add(-time.Hour)
    94  		_, _ = store, sch
    95  		Convey("skips non-expired leases", func() {
    96  			b1, bs1 := newBuildAndStatus(ctx, pb.Status_SCHEDULED, createTime)
    97  			b2, bs2 := newBuildAndStatus(ctx, pb.Status_STARTED, createTime)
    98  			b2.IsLeased = true
    99  			b2.LeaseExpirationDate = now.Add(time.Hour)
   100  
   101  			So(datastore.Put(ctx, b1, b2, bs1, bs2), ShouldBeNil)
   102  			So(ResetExpiredLeases(ctx), ShouldBeNil)
   103  
   104  			b1 = getBuild(ctx, b1.ID)
   105  			So(b1.IsLeased, ShouldBeFalse)
   106  			So(b1.LeaseExpirationDate.IsZero(), ShouldBeTrue)
   107  			b2 = getBuild(ctx, b2.ID)
   108  			So(b2.IsLeased, ShouldBeTrue)
   109  			So(b2.LeaseExpirationDate, ShouldEqual, now.Add(time.Hour))
   110  			bs2 = &model.BuildStatus{Build: datastore.KeyForObj(ctx, b2)}
   111  			So(datastore.Get(ctx, bs2), ShouldBeNil)
   112  			So(bs2.Status, ShouldEqual, pb.Status_STARTED)
   113  		})
   114  
   115  		Convey("works w/ a large number of expired leases", func() {
   116  			bs := make([]*model.Build, 128)
   117  			bss := make([]*model.BuildStatus, len(bs))
   118  			for i := 0; i < len(bs); i++ {
   119  				bs[i], bss[i] = newBuildAndStatus(ctx, pb.Status_INFRA_FAILURE, createTime)
   120  				bs[i].IsLeased = true
   121  				bs[i].LeaseExpirationDate = now.Add(-time.Hour)
   122  			}
   123  			So(datastore.Put(ctx, bs), ShouldBeNil)
   124  			So(datastore.Put(ctx, bss), ShouldBeNil)
   125  			So(ResetExpiredLeases(ctx), ShouldBeNil)
   126  		})
   127  
   128  		Convey("resets expired, terminated leases", func() {
   129  			b, bs := newBuildAndStatus(ctx, pb.Status_INFRA_FAILURE, createTime)
   130  			b.IsLeased = true
   131  			b.LeaseExpirationDate = now.Add(-time.Hour)
   132  
   133  			So(datastore.Put(ctx, b, bs), ShouldBeNil)
   134  			So(ResetExpiredLeases(ctx), ShouldBeNil)
   135  
   136  			b = getBuild(ctx, b.ID)
   137  			So(b.IsLeased, ShouldBeFalse)
   138  			So(b.LeaseExpirationDate.IsZero(), ShouldBeTrue)
   139  			So(b.Status, ShouldEqual, pb.Status_INFRA_FAILURE)
   140  
   141  			Convey("reports metrics", func() {
   142  				fv := []any{
   143  					"luci.project.bucket",    /* metric:bucket */
   144  					"builder",                /* metric:builder */
   145  					model.Completed.String(), /* metric:status */
   146  				}
   147  				So(store.Get(ctx, metrics.V1.ExpiredLeaseReset, time.Time{}, fv), ShouldEqual, 1)
   148  			})
   149  		})
   150  
   151  		Convey("resets expired, non-terminated leases", func() {
   152  			// don't save BuildStatus to make sure it still works when BuildStatus
   153  			// doesn't exist.
   154  			b, _ := newBuildAndStatus(ctx, pb.Status_STARTED, createTime)
   155  			b.IsLeased = true
   156  			b.LeaseExpirationDate = now.Add(-time.Hour)
   157  
   158  			So(datastore.Put(ctx, b), ShouldBeNil)
   159  			So(ResetExpiredLeases(ctx), ShouldBeNil)
   160  
   161  			b = getBuild(ctx, b.ID)
   162  			So(b.IsLeased, ShouldBeFalse)
   163  			So(b.LeaseExpirationDate.IsZero(), ShouldBeTrue)
   164  			So(b.Status, ShouldEqual, pb.Status_SCHEDULED)
   165  
   166  			Convey("reports metrics", func() {
   167  				fv := []any{
   168  					"luci.project.bucket",    /* metric:bucket */
   169  					"builder",                /* metric:builder */
   170  					model.Scheduled.String(), /* metric:status */
   171  				}
   172  				So(store.Get(ctx, metrics.V1.ExpiredLeaseReset, time.Time{}, fv), ShouldEqual, 1)
   173  			})
   174  
   175  			Convey("adds TQ tasks", func() {
   176  				tasks := sch.Tasks()
   177  				notifyIDs := []int64{}
   178  				notifyGoIDs := []int64{}
   179  
   180  				for _, task := range tasks {
   181  					switch v := task.Payload.(type) {
   182  					case *taskdefs.NotifyPubSub:
   183  						notifyIDs = append(notifyIDs, v.GetBuildId())
   184  					case *taskdefs.NotifyPubSubGoProxy:
   185  						notifyGoIDs = append(notifyGoIDs, v.GetBuildId())
   186  					default:
   187  						panic("invalid task payload")
   188  					}
   189  				}
   190  				So(notifyGoIDs, ShouldResemble, []int64{b.ID})
   191  			})
   192  		})
   193  	})
   194  }
   195  
   196  func TestTimeoutExpiredBuilds(t *testing.T) {
   197  	t.Parallel()
   198  
   199  	Convey("TimeoutExpiredBuilds", t, func() {
   200  		ctx, store, sch := setUp()
   201  
   202  		Convey("skips young, running builds", func() {
   203  			b1, bs1 := newBuildAndStatus(ctx, pb.Status_SCHEDULED, now.Add(-model.BuildMaxCompletionTime))
   204  			b2, bs2 := newBuildAndStatus(ctx, pb.Status_STARTED, now.Add(-model.BuildMaxCompletionTime))
   205  			So(datastore.Put(ctx, b1, b2, bs1, bs2), ShouldBeNil)
   206  			So(TimeoutExpiredBuilds(ctx), ShouldBeNil)
   207  
   208  			b := &model.Build{ID: b1.ID}
   209  			bs := &model.BuildStatus{Build: datastore.KeyForObj(ctx, b)}
   210  			So(datastore.Get(ctx, b, bs), ShouldBeNil)
   211  			So(b.Proto, ShouldResembleProto, b1.Proto)
   212  			So(bs.Status, ShouldEqual, pb.Status_SCHEDULED)
   213  
   214  			b = &model.Build{ID: b2.ID}
   215  			bs = &model.BuildStatus{Build: datastore.KeyForObj(ctx, b)}
   216  			So(datastore.Get(ctx, b, bs), ShouldBeNil)
   217  			So(b.Proto, ShouldResembleProto, b2.Proto)
   218  			So(bs.Status, ShouldEqual, pb.Status_STARTED)
   219  		})
   220  
   221  		Convey("skips old, completed builds", func() {
   222  			b1, bs1 := newBuildAndStatus(ctx, pb.Status_SUCCESS, now.Add(-model.BuildMaxCompletionTime))
   223  			b2, bs2 := newBuildAndStatus(ctx, pb.Status_FAILURE, now.Add(-model.BuildMaxCompletionTime))
   224  			So(datastore.Put(ctx, b1, b2, bs1, bs2), ShouldBeNil)
   225  			So(TimeoutExpiredBuilds(ctx), ShouldBeNil)
   226  
   227  			b := &model.Build{ID: b1.ID}
   228  			So(datastore.Get(ctx, b), ShouldBeNil)
   229  			So(b.Proto, ShouldResembleProto, b1.Proto)
   230  
   231  			b = &model.Build{ID: b2.ID}
   232  			So(datastore.Get(ctx, b), ShouldBeNil)
   233  			So(b.Proto, ShouldResembleProto, b2.Proto)
   234  		})
   235  
   236  		Convey("works w/ a large number of expired builds", func() {
   237  			bs := make([]*model.Build, 128)
   238  			bss := make([]*model.BuildStatus, len(bs))
   239  			createTime := now.Add(-model.BuildMaxCompletionTime - time.Minute)
   240  			for i := 0; i < len(bs); i++ {
   241  				bs[i], bss[i] = newBuildAndStatus(ctx, pb.Status_SCHEDULED, createTime)
   242  			}
   243  			So(datastore.Put(ctx, bs), ShouldBeNil)
   244  			So(datastore.Put(ctx, bss), ShouldBeNil)
   245  			So(TimeoutExpiredBuilds(ctx), ShouldBeNil)
   246  		})
   247  
   248  		Convey("marks old, running builds w/ infra_failure", func() {
   249  			b1, bs1 := newBuildAndStatus(ctx, pb.Status_SCHEDULED, now.Add(-model.BuildMaxCompletionTime-time.Minute))
   250  			b1.LegacyProperties.LeaseProperties.IsLeased = true
   251  			b2, bs2 := newBuildAndStatus(ctx, pb.Status_STARTED, now.Add(-model.BuildMaxCompletionTime-time.Minute))
   252  			So(datastore.Put(ctx, b1, b2, bs1, bs2), ShouldBeNil)
   253  			So(TimeoutExpiredBuilds(ctx), ShouldBeNil)
   254  
   255  			b := &model.Build{ID: b1.ID}
   256  			bs := &model.BuildStatus{Build: datastore.KeyForObj(ctx, b)}
   257  			So(datastore.Get(ctx, b, bs), ShouldBeNil)
   258  			So(b.Proto.Status, ShouldEqual, pb.Status_INFRA_FAILURE)
   259  			So(b.LegacyProperties.LeaseProperties.IsLeased, ShouldBeFalse)
   260  			So(b.Proto.StatusDetails.GetTimeout(), ShouldNotBeNil)
   261  			So(bs.Status, ShouldEqual, pb.Status_INFRA_FAILURE)
   262  
   263  			b = &model.Build{ID: b2.ID}
   264  			bs = &model.BuildStatus{Build: datastore.KeyForObj(ctx, b)}
   265  			So(datastore.Get(ctx, b, bs), ShouldBeNil)
   266  			So(b.Proto.Status, ShouldEqual, pb.Status_INFRA_FAILURE)
   267  			So(b.Proto.StatusDetails.GetTimeout(), ShouldNotBeNil)
   268  			So(bs.Status, ShouldEqual, pb.Status_INFRA_FAILURE)
   269  
   270  			Convey("reports metrics", func() {
   271  				fv := []any{
   272  					"INFRA_FAILURE", /* metric:status */
   273  					"None",          /* metric:experiments */
   274  				}
   275  				So(store.Get(ctx, metrics.V2.BuildCountCompleted, time.Time{}, fv), ShouldEqual, 2)
   276  			})
   277  
   278  			Convey("adds TQ tasks", func() {
   279  				// TQ tasks for pubsub-notification, bq-export, and invocation-finalization.
   280  				tasks := sch.Tasks()
   281  				notifyIDs := []int64{}
   282  				bqIDs := []int64{}
   283  				rdbIDs := []int64{}
   284  				expected := []int64{b1.ID, b2.ID}
   285  				notifyGoIDs := []int64{}
   286  
   287  				for _, task := range tasks {
   288  					switch v := task.Payload.(type) {
   289  					case *taskdefs.NotifyPubSub:
   290  						notifyIDs = append(notifyIDs, v.GetBuildId())
   291  					case *taskdefs.ExportBigQuery:
   292  						bqIDs = append(bqIDs, v.GetBuildId())
   293  					case *taskdefs.FinalizeResultDBGo:
   294  						rdbIDs = append(rdbIDs, v.GetBuildId())
   295  					case *taskdefs.NotifyPubSubGoProxy:
   296  						notifyGoIDs = append(notifyGoIDs, v.GetBuildId())
   297  
   298  					default:
   299  						panic("invalid task payload")
   300  					}
   301  				}
   302  
   303  				sortIDs := func(ids []int64) {
   304  					sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })
   305  				}
   306  				sortIDs(notifyIDs)
   307  				sortIDs(bqIDs)
   308  				sortIDs(rdbIDs)
   309  				sortIDs(expected)
   310  				sortIDs(notifyGoIDs)
   311  
   312  				So(notifyIDs, ShouldHaveLength, 2)
   313  				So(notifyIDs, ShouldResemble, expected)
   314  				So(bqIDs, ShouldHaveLength, 2)
   315  				So(bqIDs, ShouldResemble, expected)
   316  				So(rdbIDs, ShouldHaveLength, 2)
   317  				So(rdbIDs, ShouldResemble, expected)
   318  				So(notifyGoIDs, ShouldHaveLength, 2)
   319  				So(notifyGoIDs, ShouldResemble, expected)
   320  			})
   321  		})
   322  	})
   323  }