go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/prjmanager/manager/manager_test.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package manager
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  	"sync"
    22  	"testing"
    23  	"time"
    24  
    25  	"go.chromium.org/luci/common/errors"
    26  	"go.chromium.org/luci/gae/service/datastore"
    27  	"go.chromium.org/luci/server/tq/tqtesting"
    28  	"google.golang.org/protobuf/proto"
    29  	"google.golang.org/protobuf/types/known/durationpb"
    30  	"google.golang.org/protobuf/types/known/timestamppb"
    31  
    32  	cfgpb "go.chromium.org/luci/cv/api/config/v2"
    33  	"go.chromium.org/luci/cv/internal/changelist"
    34  	"go.chromium.org/luci/cv/internal/common"
    35  	"go.chromium.org/luci/cv/internal/common/eventbox"
    36  	"go.chromium.org/luci/cv/internal/configs/prjcfg/prjcfgtest"
    37  	"go.chromium.org/luci/cv/internal/cvtesting"
    38  	gf "go.chromium.org/luci/cv/internal/gerrit/gerritfake"
    39  	"go.chromium.org/luci/cv/internal/gerrit/gobmap/gobmaptest"
    40  	"go.chromium.org/luci/cv/internal/gerrit/poller"
    41  	gerritupdater "go.chromium.org/luci/cv/internal/gerrit/updater"
    42  	"go.chromium.org/luci/cv/internal/prjmanager"
    43  	"go.chromium.org/luci/cv/internal/prjmanager/pmtest"
    44  	"go.chromium.org/luci/cv/internal/prjmanager/prjpb"
    45  	"go.chromium.org/luci/cv/internal/run"
    46  
    47  	. "github.com/smartystreets/goconvey/convey"
    48  
    49  	. "go.chromium.org/luci/common/testing/assertions"
    50  )
    51  
    52  func TestProjectTQLateTasks(t *testing.T) {
    53  	t.Parallel()
    54  
    55  	Convey("PM task does nothing if it comes too late", t, func() {
    56  		ct := cvtesting.Test{}
    57  		ctx, cancel := ct.SetUp(t)
    58  		defer cancel()
    59  
    60  		pmNotifier := prjmanager.NewNotifier(ct.TQDispatcher)
    61  		runNotifier := runNotifierMock{}
    62  		clMutator := changelist.NewMutator(ct.TQDispatcher, pmNotifier, &runNotifier, &tjMock{})
    63  		clUpdater := changelist.NewUpdater(ct.TQDispatcher, clMutator)
    64  		gerritupdater.RegisterUpdater(clUpdater, ct.GFactory())
    65  		_ = New(pmNotifier, &runNotifier, clMutator, ct.GFactory(), clUpdater)
    66  
    67  		const lProject = "infra"
    68  		recipient := prjmanager.EventboxRecipient(ctx, lProject)
    69  
    70  		prjcfgtest.Create(ctx, lProject, singleRepoConfig("host", "repo"))
    71  
    72  		So(pmNotifier.UpdateConfig(ctx, lProject), ShouldBeNil)
    73  		So(pmtest.Projects(ct.TQ.Tasks()), ShouldResemble, []string{lProject})
    74  		events1, err := eventbox.List(ctx, recipient)
    75  		So(err, ShouldBeNil)
    76  
    77  		// Simulate stuck TQ task, which gets executed with a huge delay.
    78  		ct.Clock.Add(time.Hour)
    79  		ct.TQ.Run(ctx, tqtesting.StopAfterTask(prjpb.ManageProjectTaskClass))
    80  		// It must not modify PM state nor consume events.
    81  		So(datastore.Get(ctx, &prjmanager.Project{ID: lProject}), ShouldEqual, datastore.ErrNoSuchEntity)
    82  		events2, err := eventbox.List(ctx, recipient)
    83  		So(err, ShouldBeNil)
    84  		So(events2, ShouldResemble, events1)
    85  		// But schedules new task instead.
    86  		So(pmtest.Projects(ct.TQ.Tasks()), ShouldResemble, []string{lProject})
    87  
    88  		// Next task coming ~on time proceeds normally.
    89  		ct.TQ.Run(ctx, tqtesting.StopAfterTask(prjpb.ManageProjectTaskClass))
    90  		So(datastore.Get(ctx, &prjmanager.Project{ID: lProject}), ShouldBeNil)
    91  		events3, err := eventbox.List(ctx, recipient)
    92  		So(err, ShouldBeNil)
    93  		So(events3, ShouldBeEmpty)
    94  	})
    95  }
    96  
    97  func TestProjectLifeCycle(t *testing.T) {
    98  	t.Parallel()
    99  
   100  	Convey("Project can be created, updated, deleted", t, func() {
   101  		ct := cvtesting.Test{}
   102  		ctx, cancel := ct.SetUp(t)
   103  		defer cancel()
   104  
   105  		pmNotifier := prjmanager.NewNotifier(ct.TQDispatcher)
   106  		runNotifier := runNotifierMock{}
   107  		clMutator := changelist.NewMutator(ct.TQDispatcher, pmNotifier, &runNotifier, &tjMock{})
   108  		clUpdater := changelist.NewUpdater(ct.TQDispatcher, clMutator)
   109  		gerritupdater.RegisterUpdater(clUpdater, ct.GFactory())
   110  		_ = New(pmNotifier, &runNotifier, clMutator, ct.GFactory(), clUpdater)
   111  
   112  		const lProject = "infra"
   113  		recipient := prjmanager.EventboxRecipient(ctx, lProject)
   114  
   115  		Convey("with new project", func() {
   116  			prjcfgtest.Create(ctx, lProject, singleRepoConfig("host", "repo"))
   117  			So(pmNotifier.UpdateConfig(ctx, lProject), ShouldBeNil)
   118  			// Second event is a noop, but should still be consumed at once.
   119  			So(pmNotifier.UpdateConfig(ctx, lProject), ShouldBeNil)
   120  			So(pmtest.Projects(ct.TQ.Tasks()), ShouldResemble, []string{lProject})
   121  			ct.TQ.Run(ctx, tqtesting.StopAfterTask(prjpb.ManageProjectTaskClass))
   122  			events, err := eventbox.List(ctx, recipient)
   123  			So(err, ShouldBeNil)
   124  			So(events, ShouldHaveLength, 0)
   125  			p, ps, plog := loadProjectEntities(ctx, lProject)
   126  			So(p.EVersion, ShouldEqual, 1)
   127  			So(ps.Status, ShouldEqual, prjpb.Status_STARTED)
   128  			So(plog, ShouldNotBeNil)
   129  			So(poller.FilterProjects(ct.TQ.Tasks().SortByETA().Payloads()), ShouldResemble, []string{lProject})
   130  
   131  			// Ensure first poller task gets executed.
   132  			ct.Clock.Add(time.Hour)
   133  
   134  			Convey("update config with incomplete runs", func() {
   135  				err := datastore.Put(
   136  					ctx,
   137  					&run.Run{ID: common.RunID(lProject + "/111-beef"), CLs: common.CLIDs{111}},
   138  					&run.Run{ID: common.RunID(lProject + "/222-cafe"), CLs: common.CLIDs{222}},
   139  				)
   140  				So(err, ShouldBeNil)
   141  				// This is what pmNotifier.notifyRunCreated func does,
   142  				// but because it's private, it can't be called from this package.
   143  				simulateRunCreated := func(suffix string) {
   144  					e := &prjpb.Event{Event: &prjpb.Event_RunCreated{
   145  						RunCreated: &prjpb.RunCreated{
   146  							RunId: lProject + "/" + suffix,
   147  						},
   148  					}}
   149  					value, err := proto.Marshal(e)
   150  					So(err, ShouldBeNil)
   151  					So(eventbox.Emit(ctx, value, recipient), ShouldBeNil)
   152  				}
   153  				simulateRunCreated("111-beef")
   154  				simulateRunCreated("222-cafe")
   155  
   156  				prjcfgtest.Update(ctx, lProject, singleRepoConfig("host", "repo2"))
   157  				So(pmNotifier.UpdateConfig(ctx, lProject), ShouldBeNil)
   158  
   159  				ct.TQ.Run(ctx, tqtesting.StopAfterTask(prjpb.ManageProjectTaskClass))
   160  
   161  				p, _, plog = loadProjectEntities(ctx, lProject)
   162  				So(p.IncompleteRuns(), ShouldEqual, common.MakeRunIDs(lProject+"/111-beef", lProject+"/222-cafe"))
   163  				So(plog, ShouldNotBeNil)
   164  				So(runNotifier.popUpdateConfig(), ShouldResemble, p.IncompleteRuns())
   165  
   166  				Convey("disable project with incomplete runs", func() {
   167  					prjcfgtest.Disable(ctx, lProject)
   168  					So(pmNotifier.UpdateConfig(ctx, lProject), ShouldBeNil)
   169  					ct.TQ.Run(ctx, tqtesting.StopAfterTask(prjpb.ManageProjectTaskClass))
   170  
   171  					p, ps, plog := loadProjectEntities(ctx, lProject)
   172  					So(p.EVersion, ShouldEqual, 3)
   173  					So(ps.Status, ShouldEqual, prjpb.Status_STOPPING)
   174  					So(plog, ShouldNotBeNil)
   175  					So(poller.FilterProjects(ct.TQ.Tasks().SortByETA().Payloads()), ShouldResemble, []string{lProject})
   176  					// Should ask Runs to cancel themselves.
   177  					reqs := make([]cancellationRequest, len(p.IncompleteRuns()))
   178  					for i, runID := range p.IncompleteRuns() {
   179  						reqs[i] = cancellationRequest{
   180  							id:     runID,
   181  							reason: fmt.Sprintf("CV is disabled for LUCI Project %q", lProject),
   182  						}
   183  					}
   184  					So(runNotifier.popCancel(), ShouldResemble, reqs)
   185  
   186  					Convey("wait for all IncompleteRuns to finish", func() {
   187  						So(pmNotifier.NotifyRunFinished(ctx, common.RunID(lProject+"/111-beef"), run.Status_CANCELLED), ShouldBeNil)
   188  						ct.TQ.Run(ctx, tqtesting.StopAfterTask(prjpb.ManageProjectTaskClass))
   189  
   190  						p, ps, plog := loadProjectEntities(ctx, lProject)
   191  						So(ps.Status, ShouldEqual, prjpb.Status_STOPPING)
   192  						So(p.IncompleteRuns(), ShouldResemble, common.MakeRunIDs(lProject+"/222-cafe"))
   193  						So(plog, ShouldBeNil) // still STOPPING.
   194  
   195  						So(pmNotifier.NotifyRunFinished(ctx, common.RunID(lProject+"/222-cafe"), run.Status_CANCELLED), ShouldBeNil)
   196  						ct.TQ.Run(ctx, tqtesting.StopAfterTask(prjpb.ManageProjectTaskClass))
   197  
   198  						p, ps, plog = loadProjectEntities(ctx, lProject)
   199  						So(ps.Status, ShouldEqual, prjpb.Status_STOPPED)
   200  						So(p.IncompleteRuns(), ShouldBeEmpty)
   201  						So(plog, ShouldNotBeNil)
   202  					})
   203  				})
   204  			})
   205  
   206  			Convey("delete project without incomplete runs", func() {
   207  				// No components means also no runs.
   208  				p.State.Components = nil
   209  				prjcfgtest.Delete(ctx, lProject)
   210  				So(pmNotifier.UpdateConfig(ctx, lProject), ShouldBeNil)
   211  				ct.TQ.Run(ctx, tqtesting.StopAfterTask(prjpb.ManageProjectTaskClass))
   212  
   213  				p, ps, plog := loadProjectEntities(ctx, lProject)
   214  				So(p.EVersion, ShouldEqual, 2)
   215  				So(ps.Status, ShouldEqual, prjpb.Status_STOPPED)
   216  				So(plog, ShouldNotBeNil)
   217  				So(poller.FilterProjects(ct.TQ.Tasks().SortByETA().Payloads()), ShouldResemble, []string{lProject})
   218  			})
   219  		})
   220  	})
   221  }
   222  
   223  func TestProjectHandlesManyEvents(t *testing.T) {
   224  	t.Parallel()
   225  
   226  	Convey("PM handles many events", t, func() {
   227  		ct := cvtesting.Test{}
   228  		ctx, cancel := ct.SetUp(t)
   229  		defer cancel()
   230  
   231  		const lProject = "infra"
   232  		const gHost = "host"
   233  		const gRepo = "repo"
   234  
   235  		recipient := prjmanager.EventboxRecipient(ctx, lProject)
   236  		pmNotifier := prjmanager.NewNotifier(ct.TQDispatcher)
   237  		runNotifier := runNotifierMock{}
   238  		clMutator := changelist.NewMutator(ct.TQDispatcher, pmNotifier, &runNotifier, &tjMock{})
   239  		clUpdater := changelist.NewUpdater(ct.TQDispatcher, clMutator)
   240  		gerritupdater.RegisterUpdater(clUpdater, ct.GFactory())
   241  		pm := New(pmNotifier, &runNotifier, clMutator, ct.GFactory(), clUpdater)
   242  
   243  		cfg := singleRepoConfig(gHost, gRepo)
   244  		cfg.ConfigGroups[0].CombineCls = &cfgpb.CombineCLs{
   245  			// Postpone creation of Runs, which isn't important in this test.
   246  			StabilizationDelay: durationpb.New(time.Hour),
   247  		}
   248  		prjcfgtest.Create(ctx, lProject, cfg)
   249  		gobmaptest.Update(ctx, lProject)
   250  
   251  		// Put #43 CL directly w/o notifying the PM.
   252  		cl43 := changelist.MustGobID(gHost, 43).MustCreateIfNotExists(ctx)
   253  		cl43.Snapshot = &changelist.Snapshot{
   254  			ExternalUpdateTime:    timestamppb.New(ct.Clock.Now()),
   255  			LuciProject:           lProject,
   256  			MinEquivalentPatchset: 1,
   257  			Patchset:              1,
   258  			Kind: &changelist.Snapshot_Gerrit{Gerrit: &changelist.Gerrit{
   259  				Host: gHost,
   260  				Info: gf.CI(43,
   261  					gf.Project(gRepo), gf.Ref("refs/heads/main"),
   262  					gf.CQ(+2, ct.Clock.Now(), gf.U("user-1"))),
   263  			}},
   264  		}
   265  		meta := prjcfgtest.MustExist(ctx, lProject)
   266  		cl43.ApplicableConfig = &changelist.ApplicableConfig{
   267  			Projects: []*changelist.ApplicableConfig_Project{
   268  				{Name: lProject, ConfigGroupIds: []string{string(meta.ConfigGroupIDs[0])}},
   269  			},
   270  		}
   271  		So(datastore.Put(ctx, cl43), ShouldBeNil)
   272  
   273  		cl44 := changelist.MustGobID(gHost, 44).MustCreateIfNotExists(ctx)
   274  		cl44.Snapshot = &changelist.Snapshot{
   275  			ExternalUpdateTime:    timestamppb.New(ct.Clock.Now()),
   276  			LuciProject:           lProject,
   277  			MinEquivalentPatchset: 1,
   278  			Patchset:              1,
   279  			Kind: &changelist.Snapshot_Gerrit{Gerrit: &changelist.Gerrit{
   280  				Info: gf.CI(
   281  					44, gf.Project(gRepo), gf.Ref("refs/heads/main"),
   282  					gf.CQ(+2, ct.Clock.Now(), gf.U("user-1"))),
   283  			}},
   284  		}
   285  		cl44.ApplicableConfig = &changelist.ApplicableConfig{
   286  			Projects: []*changelist.ApplicableConfig_Project{
   287  				{Name: lProject, ConfigGroupIds: []string{string(meta.ConfigGroupIDs[0])}},
   288  			},
   289  		}
   290  		So(datastore.Put(ctx, cl44), ShouldBeNil)
   291  
   292  		// This event is the only event notifying PM about CL#43.
   293  		So(pmNotifier.NotifyCLsUpdated(ctx, lProject, changelist.ToUpdatedEvents(cl43, cl44)), ShouldBeNil)
   294  
   295  		const n = 20
   296  		for i := 0; i < n; i++ {
   297  			So(pmNotifier.UpdateConfig(ctx, lProject), ShouldBeNil)
   298  			So(pmNotifier.Poke(ctx, lProject), ShouldBeNil)
   299  			// Simulate updating a CL.
   300  			cl44.EVersion++
   301  			So(datastore.Put(ctx, cl44), ShouldBeNil)
   302  			So(pmNotifier.NotifyCLsUpdated(ctx, lProject, changelist.ToUpdatedEvents(cl44)), ShouldBeNil)
   303  		}
   304  
   305  		events, err := eventbox.List(ctx, recipient)
   306  		So(err, ShouldBeNil)
   307  		// Expect the following events:
   308  		// +1 from NotifyCLsUpdated on cl43 and cl44,
   309  		// +3*n from loop.
   310  		So(events, ShouldHaveLength, 3*n+1)
   311  
   312  		// Run `w` concurrent PMs.
   313  		const w = 20
   314  		now := ct.Clock.Now()
   315  		errs := make(errors.MultiError, w)
   316  		wg := sync.WaitGroup{}
   317  		wg.Add(w)
   318  		for i := 0; i < w; i++ {
   319  			i := i
   320  			go func() {
   321  				defer wg.Done()
   322  				errs[i] = pm.manageProject(ctx, lProject, now)
   323  			}()
   324  		}
   325  		wg.Wait()
   326  
   327  		// Exactly 1 of the workers must create PM entity, consume events and
   328  		// poke the poller.
   329  		p := prjmanager.Project{ID: lProject}
   330  		So(datastore.Get(ctx, &p), ShouldBeNil)
   331  		// Both cl43 and cl44 must have corresponding PCLs with latest EVersions.
   332  		So(p.State.GetPcls(), ShouldHaveLength, 2)
   333  		for _, pcl := range p.State.GetPcls() {
   334  			switch common.CLID(pcl.GetClid()) {
   335  			case cl43.ID:
   336  				So(pcl.GetEversion(), ShouldEqual, cl43.EVersion)
   337  			case cl44.ID:
   338  				So(pcl.GetEversion(), ShouldEqual, cl44.EVersion)
   339  			default:
   340  				So("must not happen", ShouldBeTrue)
   341  			}
   342  		}
   343  
   344  		events, err = eventbox.List(ctx, recipient)
   345  		So(err, ShouldBeNil)
   346  		So(events, ShouldBeEmpty)
   347  		So(poller.FilterProjects(ct.TQ.Tasks().SortByETA().Payloads()), ShouldResemble, []string{lProject})
   348  
   349  		// At least 1 worker must finish successfully.
   350  		errCnt, _ := errs.Summary()
   351  		t.Logf("%d/%d workers failed", errCnt, w)
   352  		So(errCnt, ShouldBeLessThan, w)
   353  	})
   354  }
   355  
   356  func loadProjectEntities(ctx context.Context, luciProject string) (
   357  	*prjmanager.Project,
   358  	*prjmanager.ProjectStateOffload,
   359  	*prjmanager.ProjectLog,
   360  ) {
   361  	p := &prjmanager.Project{ID: luciProject}
   362  	switch err := datastore.Get(ctx, p); {
   363  	case err == datastore.ErrNoSuchEntity:
   364  		return nil, nil, nil
   365  	case err != nil:
   366  		panic(err)
   367  	}
   368  
   369  	key := datastore.MakeKey(ctx, prjmanager.ProjectKind, luciProject)
   370  	ps := &prjmanager.ProjectStateOffload{Project: key}
   371  	if err := datastore.Get(ctx, ps); err != nil {
   372  		// ProjectStateOffload must exist if Project exists.
   373  		panic(err)
   374  	}
   375  
   376  	plog := &prjmanager.ProjectLog{
   377  		Project:  datastore.MakeKey(ctx, prjmanager.ProjectKind, luciProject),
   378  		EVersion: p.EVersion,
   379  	}
   380  	switch err := datastore.Get(ctx, plog); {
   381  	case err == datastore.ErrNoSuchEntity:
   382  		return p, ps, nil
   383  	case err != nil:
   384  		panic(err)
   385  	default:
   386  		// Quick check invariant that plog replicates what's stored in Project &
   387  		// ProjectStateOffload entities at the same EVersion.
   388  		So(plog.EVersion, ShouldEqual, p.EVersion)
   389  		So(plog.Status, ShouldEqual, ps.Status)
   390  		So(plog.ConfigHash, ShouldEqual, ps.ConfigHash)
   391  		So(plog.State, ShouldResembleProto, p.State)
   392  		So(plog.Reasons, ShouldNotBeEmpty)
   393  		return p, ps, plog
   394  	}
   395  }
   396  
   397  func singleRepoConfig(gHost string, gRepos ...string) *cfgpb.Config {
   398  	projects := make([]*cfgpb.ConfigGroup_Gerrit_Project, len(gRepos))
   399  	for i, gRepo := range gRepos {
   400  		projects[i] = &cfgpb.ConfigGroup_Gerrit_Project{
   401  			Name:      gRepo,
   402  			RefRegexp: []string{"refs/heads/main"},
   403  		}
   404  	}
   405  	return &cfgpb.Config{
   406  		ConfigGroups: []*cfgpb.ConfigGroup{
   407  			{
   408  				Name: "main",
   409  				Gerrit: []*cfgpb.ConfigGroup_Gerrit{
   410  					{
   411  						Url:      "https://" + gHost + "/",
   412  						Projects: projects,
   413  					},
   414  				},
   415  			},
   416  		},
   417  	}
   418  }
   419  
   420  type runNotifierMock struct {
   421  	m            sync.Mutex
   422  	cancel       []cancellationRequest
   423  	updateConfig common.RunIDs
   424  }
   425  
   426  type cancellationRequest struct {
   427  	id     common.RunID
   428  	reason string
   429  }
   430  
   431  func (r *runNotifierMock) NotifyCLsUpdated(ctx context.Context, rid common.RunID, cls *changelist.CLUpdatedEvents) error {
   432  	panic("not implemented")
   433  }
   434  
   435  func (r *runNotifierMock) Start(ctx context.Context, id common.RunID) error {
   436  	return nil
   437  }
   438  
   439  func (r *runNotifierMock) PokeNow(ctx context.Context, id common.RunID) error {
   440  	panic("not implemented")
   441  }
   442  
   443  func (r *runNotifierMock) Cancel(ctx context.Context, id common.RunID, reason string) error {
   444  	r.m.Lock()
   445  	r.cancel = append(r.cancel, cancellationRequest{id: id, reason: reason})
   446  	r.m.Unlock()
   447  	return nil
   448  }
   449  
   450  func (r *runNotifierMock) UpdateConfig(ctx context.Context, id common.RunID, hash string, eversion int64) error {
   451  	r.m.Lock()
   452  	r.updateConfig = append(r.updateConfig, id)
   453  	r.m.Unlock()
   454  	return nil
   455  }
   456  
   457  func (r *runNotifierMock) popUpdateConfig() common.RunIDs {
   458  	r.m.Lock()
   459  	out := r.updateConfig
   460  	r.updateConfig = nil
   461  	r.m.Unlock()
   462  	sort.Sort(out)
   463  	return out
   464  }
   465  
   466  func (r *runNotifierMock) popCancel() []cancellationRequest {
   467  	r.m.Lock()
   468  	out := r.cancel
   469  	r.cancel = nil
   470  	r.m.Unlock()
   471  	sort.Slice(out, func(i, j int) bool {
   472  		return out[i].id < out[j].id
   473  	})
   474  	return out
   475  }
   476  
   477  type tjMock struct{}
   478  
   479  func (t *tjMock) ScheduleCancelStale(ctx context.Context, clid common.CLID, prevMinEquivalentPatchset, currentMinEquivalentPatchset int32, eta time.Time) error {
   480  	return nil
   481  }