go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/milo/internal/git/combined_logs.go (about) 1 // Copyright 2018 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package git 16 17 import ( 18 "container/heap" 19 "context" 20 "fmt" 21 "sync" 22 23 "github.com/golang/protobuf/proto" 24 25 "google.golang.org/grpc/codes" 26 "google.golang.org/grpc/status" 27 28 gitilesapi "go.chromium.org/luci/common/api/gitiles" 29 "go.chromium.org/luci/common/errors" 30 "go.chromium.org/luci/common/logging" 31 gitpb "go.chromium.org/luci/common/proto/git" 32 "go.chromium.org/luci/common/sync/parallel" 33 "go.chromium.org/luci/gae/service/datastore" 34 "go.chromium.org/luci/milo/internal/utils" 35 ) 36 37 // A structure to keep a list of commits for some ref. 38 type refCommits struct { 39 commits []*gitpb.Commit 40 } 41 42 // The pop method removes and returns first commit. Second return value is true 43 // if this was the last commit. Caller must ensure refCommits has commits when 44 // calling the method. 45 func (rc *refCommits) pop() (commit *gitpb.Commit, empty bool) { 46 commit, rc.commits = rc.commits[0], rc.commits[1:] 47 return commit, len(rc.commits) == 0 48 } 49 50 // We use commitHeap to merge slices of commits using max-heap algorithm below. 51 // Only first commit in each slice is used for comparisons. 52 type commitHeap []refCommits 53 54 func (h commitHeap) Len() int { 55 return len(h) 56 } 57 58 func (h commitHeap) Swap(i, j int) { 59 h[i], h[j] = h[j], h[i] 60 } 61 62 func (h commitHeap) Less(i, j int) bool { 63 iTime := h[i].commits[0].Committer.Time.AsTime() 64 jTime := h[j].commits[0].Committer.Time.AsTime() 65 66 // Ensure consistent ordering based on commit hash when times are identical. 67 if iTime == jTime { 68 return h[i].commits[0].Id > h[j].commits[0].Id 69 } 70 71 // To make heap behave as max-heap, we consider later time to be smaller than 72 // earlier timer, i.e. latest commit will be the at the root of the heap. 73 return iTime.After(jTime) 74 } 75 76 func (h *commitHeap) Push(x any) { 77 *h = append(*h, x.(refCommits)) 78 } 79 80 func (h *commitHeap) Pop() any { 81 old := *h 82 n := len(old) 83 x := old[n-1] 84 *h = old[0 : n-1] 85 return x 86 } 87 88 // logCache stores a cached list of commits (log) for a given ref at a given 89 // commit position return by Gerrit. The Key describes the query that was used 90 // to retrieve the log and follows the following format: 91 // 92 // host|project|ref|exclude_ref|limit 93 // 94 // When the ref moves, entity is updated with the new CommitID and updated Log. 95 // The Log field is an encoded list of commits, which is a created by encoding a 96 // varint for the number of commits in the list followed by the corresponding 97 // number of serialized gitpb.Commit messages. 98 type logCache struct { 99 Key string `gae:"$id"` 100 CommitID string `gae:"commit,noindex"` 101 Log []byte `gae:"log,noindex"` 102 103 ref string `gae:"-"` 104 } 105 106 func logCacheFor(host, project, ref, excludeRef string, limit int) logCache { 107 return logCache{ 108 Key: fmt.Sprintf("%s|%s|%s|%s|%d", host, project, ref, excludeRef, limit), 109 ref: ref, 110 } 111 } 112 113 func loadCacheFromDS(c context.Context, host, project, excludeRef string, limit int, refTips map[string]string) (cachedLogs map[string][]*gitpb.Commit) { 114 items := make([]logCache, 0, len(refTips)) 115 for ref := range refTips { 116 items = append(items, logCacheFor(host, project, ref, excludeRef, limit)) 117 } 118 119 cachedLogs = map[string][]*gitpb.Commit{} 120 var merr errors.MultiError 121 switch err := datastore.Get(c, items).(type) { 122 case errors.MultiError: 123 merr = err 124 case nil: 125 merr = nil 126 default: 127 return 128 } 129 130 for i, item := range items { 131 if (merr != nil && merr[i] != nil) || item.CommitID != refTips[item.ref] { 132 continue 133 } 134 135 buf := proto.NewBuffer(item.Log) 136 numCommits, err := buf.DecodeVarint() 137 if err != nil { 138 continue 139 } 140 141 log := make([]*gitpb.Commit, 0, numCommits) 142 for j := uint64(0); j < numCommits; j++ { 143 var commit gitpb.Commit 144 if err = buf.DecodeMessage(&commit); err != nil { 145 continue 146 } 147 148 log = append(log, &commit) 149 } 150 151 cachedLogs[item.ref] = log 152 } 153 154 return 155 } 156 157 func saveCacheToDS(c context.Context, host, project, excludeRef string, limit int, refLogs map[string][]*gitpb.Commit, refTips map[string]string) error { 158 items := make([]logCache, 0, len(refLogs)) 159 totalBytes := 0 160 for ref, log := range refLogs { 161 buf := proto.NewBuffer([]byte{}) 162 if err := buf.EncodeVarint(uint64(len(log))); err != nil { 163 return err 164 } 165 166 for _, commit := range log { 167 if err := buf.EncodeMessage(commit); err != nil { 168 return err 169 } 170 } 171 172 item := logCacheFor(host, project, ref, excludeRef, limit) 173 item.CommitID = refTips[ref] 174 item.Log = buf.Bytes() 175 items = append(items, item) 176 177 // This logic breaks storing caches into datastore into smaller requests to 178 // avoid exceeding 1MB limit on datastore requests set by AppEngine. 179 totalBytes += len(item.Log) 180 if totalBytes > 512*1024 { // 0.5 MiB 181 if err := datastore.Put(c, items); err != nil { 182 return err 183 } 184 totalBytes = 0 185 items = items[:0] 186 } 187 } 188 189 return datastore.Put(c, items) 190 } 191 192 // maxGitilesLogRPCsPerRequest is the max number of Gitiles requests allowed per 193 // user request to avoid exceeding Gitiles quota. 194 const maxGitilesLogRPCsPerRequest = 50 195 196 func (impl *implementation) loadLogsForRefs(c context.Context, host, project, excludeRef string, limit int, refTips map[string]string) ([][]*gitpb.Commit, error) { 197 cachedLogs := loadCacheFromDS(c, host, project, excludeRef, limit, refTips) 198 logging.Infof(c, "Fetched %d logs from cache, will fetch remaining %d logs from Gitiles", len(cachedLogs), len(refTips)-len(cachedLogs)) 199 200 // Load missing logs from Gitiles. 201 newLogs := make(map[string][]*gitpb.Commit) 202 lock := sync.Mutex{} // for concurrent writes to the map 203 err := parallel.WorkPool(8, func(ch chan<- func() error) { 204 numRequests := 0 205 for ref := range refTips { 206 if _, ok := cachedLogs[ref]; ok { 207 continue 208 } 209 210 if numRequests++; numRequests > maxGitilesLogRPCsPerRequest { 211 ch <- func() error { 212 // TODO(sergiyb,tandrii): if you have genuine need for this many refs 213 // at once, implement a cron job that runs this very function 214 // continuously to avoid bursts of gitiles traffic that will make Milo 215 // not functional for the other projects. 216 return errors.Reason("too many refs are new or changed to be "+ 217 "fetched at once, stopping after %d. Check your config and/or "+ 218 "reload the page", maxGitilesLogRPCsPerRequest).Err() 219 } 220 break 221 } 222 223 ref := ref 224 ch <- func() error { 225 log, err := impl.log(c, host, project, refTips[ref], excludeRef, &LogOptions{Limit: limit}) 226 if err != nil { 227 return err 228 } 229 230 lock.Lock() 231 defer lock.Unlock() 232 newLogs[ref] = log 233 return nil 234 } 235 } 236 }) 237 238 // Try to cache what we've fetched even if some requests failed. 239 if derr := saveCacheToDS(c, host, project, excludeRef, limit, newLogs, refTips); derr != nil { 240 logging.WithError(derr).Warningf(c, "Failed to cache logs fetched from Gitiles") 241 } 242 243 if err != nil { 244 return nil, errors.Annotate(err, "failed to fetch %d logs from Gitiles", len(refTips)-len(cachedLogs)-len(newLogs)).Err() 245 } 246 247 // Drop ref names and create a list containing all logs. 248 logs := make([][]*gitpb.Commit, 0, len(cachedLogs)+len(newLogs)) 249 for _, log := range cachedLogs { 250 logs = append(logs, log) 251 } 252 for _, log := range newLogs { 253 logs = append(logs, log) 254 } 255 256 return logs, nil 257 } 258 259 // CombinedLogs implements Client interface. 260 func (impl *implementation) CombinedLogs(c context.Context, host, project, excludeRef string, refs []string, limit int) (commits []*gitpb.Commit, err error) { 261 defer func() { err = errors.Annotate(utils.TagGRPC(c, err), "gitiles.CombinedLogs").Err() }() 262 263 // Check if the user is allowed to access this project. 264 allowed, err := impl.acls.IsAllowed(c, host, project) 265 switch { 266 case err != nil: 267 return 268 case !allowed: 269 err = status.Errorf(codes.NotFound, "not found") 270 return 271 } 272 273 // Prepare Gitiles client. 274 client, err := impl.gitilesClient(c, host) 275 if err != nil { 276 return 277 } 278 279 // Resolve all refs and commits they are pointing at. 280 refTips, missingRefs, err := gitilesapi.NewRefSet(refs).Resolve(c, client, project) 281 if err != nil { 282 return 283 } 284 if len(missingRefs) > 0 { 285 logging.Warningf(c, "configured refs %s weren't resolved to any ref; either incorrect ACLs or redudant refs", missingRefs) 286 } 287 288 var logs [][]*gitpb.Commit 289 if logs, err = impl.loadLogsForRefs(c, host, project, excludeRef, limit, refTips); err != nil { 290 return 291 } 292 293 // We merge commits from all refs sorted by time into a single list up to a 294 // limit. We use max-heap based merging algorithm below. 295 var h commitHeap 296 for _, log := range logs { 297 if len(log) > 0 { 298 h = append(h, refCommits{log}) 299 } 300 } 301 302 // Keep adding commits to the merged list until we reach the limit or run out 303 // of commits on all refs. 304 heap.Init(&h) 305 commits = make([]*gitpb.Commit, 0, limit) 306 for len(commits) < limit && len(h) != 0 { 307 commit, empty := h[0].pop() 308 // Do not add duplicate commits that come from different refs. 309 if len(commits) == 0 || commits[len(commits)-1].Id != commit.Id { 310 commits = append(commits, commit) 311 } 312 if empty { 313 heap.Remove(&h, 0) 314 } else { 315 heap.Fix(&h, 0) 316 } 317 } 318 319 return 320 }