go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/gerrit/gobmap/map.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gobmap 16 17 import ( 18 "context" 19 "time" 20 21 "go.chromium.org/luci/common/clock" 22 "go.chromium.org/luci/common/data/stringset" 23 "go.chromium.org/luci/common/errors" 24 "go.chromium.org/luci/common/logging" 25 "go.chromium.org/luci/common/retry/transient" 26 "go.chromium.org/luci/gae/service/datastore" 27 "go.chromium.org/luci/server/tq" 28 29 "go.chromium.org/luci/cv/internal/changelist" 30 "go.chromium.org/luci/cv/internal/common/lease" 31 "go.chromium.org/luci/cv/internal/configs/prjcfg" 32 "go.chromium.org/luci/cv/internal/gerrit/cfgmatcher" 33 ) 34 35 const ( 36 mapKind = "MapPart" 37 parentKind = "MapPartParent" 38 maxUpdateDuration = 60 * time.Second 39 ) 40 41 // mapPart contains config groups for a particular LUCI project and host/repo 42 // combination. 43 // 44 // MapPart entities are stored with a parent key of the form (MapPartParent, 45 // host/repo), so that all mapPart entities with a particular host/repo can be 46 // fetched with an ancestor query; the goal is to have fast reads by host/repo. 47 // 48 // The MapPart entities as a whole store a mapping used to lookup which host, 49 // repo and ref maps to which config group; the map is updated when a project 50 // config is updated. 51 type mapPart struct { 52 // TODO(tandrii): s/MapPart/gobmap.MapPart, since "MapPart" is too generic. 53 _kind string `gae:"$kind,MapPart"` 54 55 // The ID of this MapPart, which is the LUCI project name. 56 ID string `gae:"$id"` 57 58 // LUCI project name. 59 // 60 // This field contains the same value as ID, and is included so 61 // that we can index on it, and thus filter on it in queries. 62 Project string 63 64 // MapPartParent key. This parent has an ID of the form "host/repo". 65 Parent *datastore.Key `gae:"$parent"` 66 67 // Groups keeps config groups of a LUCI project applicable to this 68 // host/repo. 69 Groups *cfgmatcher.Groups 70 71 // ConfigHash is the hash of latest CV project config imported from LUCI 72 // Config; this is updated based on ProjectConfig entity. 73 ConfigHash string `gae:",noindex"` 74 } 75 76 // Update updates the gob map entities according to the given project config. 77 // 78 // This may include adding, removing and modifying entities, which is not done 79 // atomically. 80 // Changes to individual Gerrit repos are atomic. This means that 81 // IF Update() is in progress from config versions 1 to 2, identified by 82 // 83 // hashes h1 and h2, respectively, 84 // 85 // AND both h1 and h2 watch specific Gerrit repo, possibly among many others, 86 // THEN a concurrent Lookup(host,repo,...) is guaranteed to return either 87 // based on @h1 or based on @h2. 88 // 89 // However, there is no atomicity across entire project config. This means that 90 // IF Update() is in progress from config versions 1 to 2, identified by 91 // 92 // hashes h1 and h2, respectively, 93 // 94 // THEN two sequential calls to Lookup with different Gerrit repos may return 95 // results based on @h2 at first and @h1 for the second, ie: 96 // 97 // Lookup(host1,repoA,...) -> @h2 98 // Lookup(host1,repoB,...) -> @h1 99 // 100 // Thus, a failed Update() may leave gobmap in a corrupted state, whereby some 101 // Gerrit repos may represent old and some new config versions. In such a 102 // case it's important that Update() caller retries as soon as possible. 103 // 104 // TODO(crbug.com/1179286): make Update() incorruptible. 105 // See TestGobMapConcurrentUpdates which reproduces corruption. 106 // 107 // Update is idempotent. 108 func Update(ctx context.Context, meta *prjcfg.Meta, cgs []*prjcfg.ConfigGroup) error { 109 ctx, cleanup, err := leaseExclusive(ctx, meta) 110 if err != nil { 111 return err 112 } 113 defer cleanup() 114 return update(ctx, meta, cgs) 115 } 116 117 // getAll returns the gob map entities matching given host and repo. 118 func getAll(ctx context.Context, host, repo string) ([]*mapPart, error) { 119 hostRepo := host + "/" + repo 120 parentKey := datastore.MakeKey(ctx, parentKind, hostRepo) 121 q := datastore.NewQuery(mapKind).Ancestor(parentKey) 122 mps := []*mapPart{} 123 if err := datastore.GetAll(ctx, q, &mps); err != nil { 124 return nil, errors.Annotate(err, hostRepo).Tag(transient.Tag).Err() 125 } 126 return mps, nil 127 } 128 129 // leaseExclusive obtains exclusive lease on the gob map for the LUCI project. 130 // 131 // Returns time-limited context for the duration of the lease and a cleanup 132 // function. 133 func leaseExclusive(ctx context.Context, meta *prjcfg.Meta) (context.Context, func(), error) { 134 taskID := "unknown" 135 if info := tq.TaskExecutionInfo(ctx); info != nil { 136 taskID = info.TaskID 137 } 138 l, err := lease.Apply(ctx, lease.Application{ 139 ResourceID: lease.ResourceID("gobmap/" + meta.Project), 140 ExpireTime: clock.Now(ctx).Add(maxUpdateDuration), 141 Holder: taskID, // Used for debugging, only. 142 }) 143 if err != nil { 144 if _, ok := lease.IsAlreadyInLeaseErr(err); ok { 145 return nil, nil, errors.Annotate(err, "gobmap for %s is already being updated", meta.Project).Tag(transient.Tag).Err() 146 } 147 return nil, nil, err 148 } 149 limitedCtx, cancel := clock.WithDeadline(ctx, l.ExpireTime) 150 cleanup := func() { 151 cancel() 152 if err := l.Terminate(ctx); err != nil { 153 // Best-effort termination since lease will expire naturally. 154 logging.Warningf(ctx, "failed to cancel gobmap Update lease: %s", err) 155 } 156 } 157 return limitedCtx, cleanup, nil 158 } 159 160 // update updates gob map Datastore entities. 161 func update(ctx context.Context, meta *prjcfg.Meta, cgs []*prjcfg.ConfigGroup) error { 162 var toPut, toDelete []*mapPart 163 164 // Fetch stored GWM entities. 165 mps := []*mapPart{} 166 q := datastore.NewQuery(mapKind).Eq("Project", meta.Project) 167 if err := datastore.GetAll(ctx, q, &mps); err != nil { 168 return errors.Annotate(err, "failed to get MapPart entities for project %q", meta.Project).Tag(transient.Tag).Err() 169 } 170 171 if meta.Status != prjcfg.StatusEnabled { 172 // The project was disabled or removed, delete everything. 173 toDelete = mps 174 } else { 175 toPut, toDelete = listUpdates(ctx, mps, cgs, meta.Hash(), meta.Project) 176 } 177 178 if err := datastore.Delete(ctx, toDelete); err != nil { 179 return errors.Annotate(err, "failed to delete %d MapPart entities when updating project %q", 180 len(toDelete), meta.Project).Tag(transient.Tag).Err() 181 } 182 if err := datastore.Put(ctx, toPut); err != nil { 183 return errors.Annotate(err, "failed to put %d MapPart entities when updating project %q", 184 len(toPut), meta.Project).Tag(transient.Tag).Err() 185 } 186 return nil 187 } 188 189 // listUpdates determines what needs to be updated. 190 // 191 // It computes which of the existing MapPart entities should be 192 // removed, and which MapPart entities should be put (added or updated). 193 func listUpdates(ctx context.Context, mps []*mapPart, latestConfigGroups []*prjcfg.ConfigGroup, 194 latestHash, project string) (toPut, toDelete []*mapPart) { 195 // Make a map of host/repo to config hashes for currently 196 // existing MapPart entities; used below. 197 existingHashes := make(map[string]string, len(mps)) 198 for _, mp := range mps { 199 hostRepo := mp.Parent.StringID() 200 existingHashes[hostRepo] = mp.ConfigHash 201 } 202 203 // List `internal.Groups` present in the latest config groups. 204 hostRepoToGroups := internalGroups(latestConfigGroups) 205 206 // List MapParts to put; these are those either have 207 // no existing hash yet or a different existing hash. 208 for hostRepo, groups := range hostRepoToGroups { 209 if existingHashes[hostRepo] == latestHash { 210 // Already up to date. 211 continue 212 } 213 mp := &mapPart{ 214 ID: project, 215 Project: project, 216 Parent: datastore.MakeKey(ctx, parentKind, hostRepo), 217 Groups: groups, 218 ConfigHash: latestHash, 219 } 220 toPut = append(toPut, mp) 221 } 222 223 // List MapParts to delete; these are those that currently exist but 224 // have no groups in the latest config. 225 toDelete = []*mapPart{} 226 for _, mp := range mps { 227 hostRepo := mp.Parent.StringID() 228 if _, ok := hostRepoToGroups[hostRepo]; !ok { 229 toDelete = append(toDelete, mp) 230 } 231 } 232 233 return toPut, toDelete 234 } 235 236 // internalGroups converts config.ConfigGroups to cfgmatcher.Groups. 237 // 238 // It returns a map of host/repo to cfgmatcher.Groups. 239 func internalGroups(configGroups []*prjcfg.ConfigGroup) map[string]*cfgmatcher.Groups { 240 ret := make(map[string]*cfgmatcher.Groups) 241 for _, g := range configGroups { 242 for _, gerrit := range g.Content.Gerrit { 243 host := prjcfg.GerritHost(gerrit) 244 for _, p := range gerrit.Projects { 245 hostRepo := host + "/" + p.Name 246 group := cfgmatcher.MakeGroup(g, p) 247 if groups, ok := ret[hostRepo]; ok { 248 groups.Groups = append(groups.Groups, group) 249 } else { 250 ret[hostRepo] = &cfgmatcher.Groups{Groups: []*cfgmatcher.Group{group}} 251 } 252 } 253 } 254 } 255 return ret 256 } 257 258 // Lookup returns config group IDs which watch the given combination of Gerrit 259 // host, repo and ref. 260 // 261 // For example: the input might be ("chromium-review.googlesource.com", 262 // "chromium/src", "refs/heads/main"), and the output might be 0 or 1 or 263 // multiple IDs which can be used to fetch config groups. 264 // 265 // Due to the ref_regexp[_exclude] options, CV can't ensure that each possible 266 // combination is watched by at most one ConfigGroup, which is why this may 267 // return multiple ConfigGroupIDs even for the same LUCI project. 268 // 269 // Always returns non-nil object, even if there are no watching projects. 270 func Lookup(ctx context.Context, host, repo, ref string) (*changelist.ApplicableConfig, error) { 271 // Fetch all MapPart entities for the given host and repo. 272 mps, err := getAll(ctx, host, repo) 273 if err != nil { 274 return nil, errors.Annotate(err, "failed to fetch MapParts").Err() 275 } 276 277 // For each MapPart entity, inspect the Groups to determine which configs 278 // apply for the given ref. 279 ac := &changelist.ApplicableConfig{} 280 for _, mp := range mps { 281 if groups := mp.Groups.Match(ref); len(groups) != 0 { 282 ids := make([]string, len(groups)) 283 for i, g := range groups { 284 ids[i] = g.GetId() 285 } 286 ac.Projects = append(ac.Projects, &changelist.ApplicableConfig_Project{ 287 Name: mp.Project, 288 ConfigGroupIds: ids, 289 }) 290 } 291 } 292 return ac, nil 293 } 294 295 // LookupProjects returns all the LUCI projects that have at least one 296 // applicable config for a given host and repo. 297 // 298 // Returns a sorted slice with a unique set of the LUCI projects. 299 func LookupProjects(ctx context.Context, host, repo string) ([]string, error) { 300 // Fetch all MapPart entities for the given host and repo. 301 mps, err := getAll(ctx, host, repo) 302 if err != nil { 303 return nil, errors.Annotate(err, "failed to fetch MapParts").Err() 304 } 305 prjs := stringset.New(len(mps)) 306 for _, mp := range mps { 307 prjs.Add(mp.Project) 308 } 309 return prjs.ToSortedSlice(), nil 310 }