go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/milo/internal/git/log.go (about) 1 // Copyright 2018 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package git 16 17 import ( 18 "context" 19 "encoding/hex" 20 "fmt" 21 "regexp" 22 "time" 23 24 "github.com/gomodule/redigo/redis" 25 "google.golang.org/protobuf/proto" 26 27 "google.golang.org/grpc/codes" 28 "google.golang.org/grpc/status" 29 30 "go.chromium.org/luci/common/clock" 31 "go.chromium.org/luci/common/errors" 32 "go.chromium.org/luci/common/logging" 33 gitpb "go.chromium.org/luci/common/proto/git" 34 gitilespb "go.chromium.org/luci/common/proto/gitiles" 35 "go.chromium.org/luci/common/tsmon/distribution" 36 "go.chromium.org/luci/common/tsmon/field" 37 "go.chromium.org/luci/common/tsmon/metric" 38 "go.chromium.org/luci/common/tsmon/types" 39 "go.chromium.org/luci/milo/internal/utils" 40 "go.chromium.org/luci/server/auth" 41 "go.chromium.org/luci/server/redisconn" 42 ) 43 44 // LogOptions are options for Log function. 45 type LogOptions struct { 46 Limit int 47 WithFiles bool 48 } 49 50 // Log implements Client interface. 51 func (p *implementation) Log(c context.Context, host, project, commitish string, inputOptions *LogOptions) (commits []*gitpb.Commit, err error) { 52 defer func() { err = utils.TagGRPC(c, err) }() 53 54 allowed, err := p.acls.IsAllowed(c, host, project) 55 switch { 56 case err != nil: 57 return 58 case !allowed: 59 logging.Warningf(c, "%q not allowed for host %q project %q according to git ACL configured in milo's settings", auth.CurrentIdentity(c), host, project) 60 err = status.Errorf(codes.NotFound, "not found") 61 return 62 } 63 64 return p.log(c, host, project, commitish, "", inputOptions) 65 } 66 67 func (p *implementation) log(c context.Context, host, project, commitish, ancestor string, inputOptions *LogOptions) (commits []*gitpb.Commit, err error) { 68 var opts LogOptions 69 if inputOptions != nil { 70 opts = *inputOptions 71 } 72 if opts.Limit <= 0 { 73 opts.Limit = 50 74 } 75 76 remaining := opts.Limit // defined as (limit - len(commits)) 77 // add appends page to commits and updates remaining. 78 // If page starts with commits' last commit, page's first commit 79 // is skipped. 80 add := func(page []*gitpb.Commit) { 81 switch { 82 case len(commits) == 0: 83 commits = page 84 case len(page) == 0: 85 // Do nothing. 86 default: 87 if page[0].Id != commits[len(commits)-1].Id { 88 panic(fmt.Sprintf( 89 "pages are not contiguous; want page start %q, got %q", 90 commits[len(commits)-1].Id, page[0].Id)) 91 } 92 commits = append(commits, page[1:]...) 93 } 94 remaining = opts.Limit - len(commits) 95 } 96 97 req := &logReq{ 98 factory: p, 99 host: host, 100 project: project, 101 commitish: commitish, 102 ancestor: ancestor, 103 withFiles: opts.WithFiles, 104 min: 100, 105 } 106 107 var page []*gitpb.Commit 108 109 // We may need to fetch >100 commits, but one logReq can handle only 100. 110 // Call it in a loop. 111 queryLoop: 112 for remaining > 0 { 113 required := remaining 114 if len(commits) > 0 { 115 // +1 because the first returned commit will be discarded. 116 required++ 117 } 118 119 // Don't query excessive commits. 120 if req.min > required { 121 req.min = required 122 } 123 page, err = req.call(c) 124 switch { 125 case err != nil: 126 return 127 case len(page) < 100: 128 // This can happen iff there are no more commits. 129 add(page) 130 break queryLoop 131 case len(page) > 100: 132 panic("impossible: logReq.call() returned >100 commits") 133 default: 134 // There may be more commits. 135 // Continue from the last fetched commit. 136 req.commitish = page[len(page)-1].Id 137 add(page) 138 } 139 } 140 141 // we may end up with more than we were asked for because 142 // gitReq's parameter is minimum, not limit. 143 if len(commits) > opts.Limit { 144 commits = commits[:opts.Limit] 145 } 146 return 147 } 148 149 // possible values for "result" field of logCounter metric below. 150 const ( 151 cacheHit = "hit" 152 cacheMiss = "miss" 153 cacheFailure = "failure-cache" 154 decodingFailure = "failure-decoding" 155 ) 156 157 var logCounter = metric.NewCounter( 158 "luci/milo/git/log/cache", 159 "The number of hits we get in git.Log", 160 nil, 161 field.String("result"), // for possible value see consts above. 162 field.String("host"), 163 field.String("project"), 164 field.String("ref")) 165 166 var latencyMetric = metric.NewCumulativeDistribution( 167 "luci/milo/git/log/latency", 168 "Gitiles response latency", 169 &types.MetricMetadata{Units: types.Milliseconds}, 170 distribution.DefaultBucketer, 171 field.Bool("treeDiff"), 172 field.String("host"), 173 field.String("project")) 174 175 var gitHash = regexp.MustCompile("^[0-9a-fA-F]{40}$") 176 177 // logReq is the implementation of Log(). 178 // 179 // Cached commits are keyed not only off of the requested commitish, 180 // but also ancestors, for example if received 100 ancestor 181 // commits C100, C99, .., C2, C1, 182 // cache with C99, C98, ... C1, too. 183 // 184 // The last byte of a cache value specifies how to interpret the rest: 185 // 186 // 0: the rest is protobuf-marshalled gitilespb.LogResponse message 187 // 1-99: the rest is byte id of a descendant commit that 188 // the commit-in-cache-key is reachable from. 189 // The last byte indicates the distance between the two commits. 190 // 100-255: reserved for future. 191 type logReq struct { 192 factory *implementation 193 host string 194 project string 195 commitish string 196 ancestor string 197 withFiles bool 198 min int // must be in [1..100] 199 200 // fields below are set in call() 201 202 commitishIsHash bool 203 commitishCacheKey string 204 commitishCacheExpiration time.Duration 205 } 206 207 func (l *logReq) call(c context.Context) ([]*gitpb.Commit, error) { 208 if l.min < 1 || l.min > 100 { 209 panic(fmt.Sprintf("invalid min %d", l.min)) 210 } 211 212 c = logging.SetFields(c, logging.Fields{ 213 "host": l.host, 214 "project": l.project, 215 "commitish": l.commitish, 216 }) 217 218 l.commitishIsHash = gitHash.MatchString(l.commitish) 219 l.commitishCacheKey = l.mkCacheKey(c, l.commitish) 220 l.commitishCacheExpiration = 12 * time.Hour 221 if !l.commitishIsHash { 222 // committish is not pinned, may move, so set a short expiration. 223 l.commitishCacheExpiration = 30 * time.Second 224 } 225 226 cacheResult := "" 227 defer func() { 228 ref := l.commitish 229 if l.commitishIsHash { 230 ref = "PINNED" 231 } 232 logCounter.Add(c, 1, cacheResult, l.host, l.project, ref) 233 }() 234 235 cacheResult, commits, ok := l.readCache(c) 236 if ok { 237 return commits, nil 238 } 239 240 // cache miss, cache failure or corrupted cache. 241 // Call Gitiles. 242 req := &gitilespb.LogRequest{ 243 Project: l.project, 244 Committish: l.commitish, 245 PageSize: 100, 246 TreeDiff: l.withFiles, 247 } 248 if l.ancestor != "" { 249 req.ExcludeAncestorsOf = l.ancestor 250 } 251 logging.Infof(c, "gitiles(%q).Log(%#v)", l.host, req) 252 client, err := l.factory.gitilesClient(c, l.host) 253 if err != nil { 254 return nil, err 255 } 256 start := clock.Now(c) 257 res, err := client.Log(c, req) 258 latency := float64(clock.Now(c).Sub(start).Nanoseconds()) / 1000000.0 259 logging.Debugf(c, "gitiles took %fms", latency) 260 latencyMetric.Add(c, latency, l.withFiles, l.host, l.project) 261 if err != nil { 262 return nil, errors.Annotate(err, "gitiles.Log").Err() 263 } 264 265 l.writeCache(c, res) 266 return res.Log, nil 267 } 268 269 func (l *logReq) readCache(c context.Context) (cacheResult string, commits []*gitpb.Commit, ok bool) { 270 conn, err := redisconn.Get(c) 271 if err != nil { 272 return cacheFailure, nil, false 273 } 274 defer conn.Close() 275 276 e := l.commitishCacheKey 277 dist := byte(0) 278 maxDist := byte(100 - l.min) 279 280 for { 281 switch bytes, err := redis.Bytes(conn.Do("GET", e)); { 282 case err == redis.ErrNil: 283 logging.Infof(c, "cache miss") 284 cacheResult = cacheMiss 285 286 case err != nil: 287 logging.WithError(err).Errorf(c, "cache failure") 288 cacheResult = cacheFailure 289 290 case len(bytes) == 0: 291 logging.WithError(err).Errorf(c, "empty cache value at key %q", e) 292 cacheResult = decodingFailure 293 default: 294 n := len(bytes) 295 // see logReq for cache value format. 296 data := bytes[:n-1] 297 meta := bytes[n-1] 298 switch { 299 case meta == 0: 300 var decoded gitilespb.LogResponse 301 if err := proto.Unmarshal(data, &decoded); err != nil { 302 logging.WithError(err).Errorf(c, "could not decode cached commits at key %q", e) 303 cacheResult = decodingFailure 304 } else { 305 cacheResult = cacheHit 306 commits = decoded.Log[dist:] 307 ok = true 308 } 309 310 case meta >= 100: 311 logging.WithError(err).Errorf(c, "unexpected last byte %d in cache value at key %q", meta, e) 312 cacheResult = decodingFailure 313 314 case dist+meta <= maxDist: 315 // meta is distance, and the referenced cache is sufficient for reuse 316 dist += meta 317 descendant := hex.EncodeToString(data) 318 logging.Debugf(c, "recursing into cache %s with distance %d", descendant, meta) 319 e = l.mkCacheKey(c, descendant) 320 // cacheResult is not set => continue the loop. 321 default: 322 // TODO(nodir): if we need commits C200..C100, but we have 323 // C200->C250 here (C250 entry has C250..C150), instead of 324 // discarding cache, reuse C200..C150 from C250 and fetch 325 // C150..C50. 326 logging.Debugf(c, "distance at key %q is too large", e) 327 cacheResult = cacheMiss 328 } 329 } 330 331 if cacheResult != "" { 332 return 333 } 334 } 335 } 336 337 func (l *logReq) writeCache(c context.Context, res *gitilespb.LogResponse) { 338 conn, err := redisconn.Get(c) 339 if err != nil { 340 logging.WithError(err).Errorf(c, "failed to get redis connection") 341 return 342 } 343 defer conn.Close() 344 345 updatedCacheCount := 0 346 if len(res.Log) > 1 { 347 // Also potentially cache with ancestors as cache keys. 348 ancestorCacheKeys := make([]any, 0, len(res.Log)-1) 349 for i := 1; i < len(res.Log) && len(res.Log[i-1].Parents) == 1; i++ { 350 ancestorCacheKeys = append(ancestorCacheKeys, l.mkCacheKey(c, res.Log[i].Id)) 351 } 352 ancestorCacheValues, err := redis.ByteSlices(conn.Do("MGET", ancestorCacheKeys...)) 353 if err != nil { 354 logging.WithError(err).Errorf(c, "Failed to retrieve cache entry") 355 } 356 357 topCommitID, err := hex.DecodeString(res.Log[0].Id) 358 if err != nil { 359 logging.WithError(err).Errorf(c, "commit id %q is not a valid hex", res.Log[0].Id) 360 } else { 361 conn.Send("MULTI") 362 363 // see logReq comment for cache value format. 364 cacheValue, err := proto.Marshal(res) 365 if err != nil { 366 logging.WithError(err).Errorf(c, "failed to marshal gitiles log %s", res) 367 return 368 } 369 cacheValue = append(cacheValue, 0) 370 371 // cache with the commitish as cache key. 372 conn.Send("SET", l.commitishCacheKey, cacheValue, "EX", l.commitishCacheExpiration.Seconds()) 373 updatedCacheCount += 1 374 375 // cache with the commit hash as cache key too. 376 cacheKey := l.mkCacheKey(c, res.Log[0].Id) 377 if cacheKey != l.commitishCacheKey { 378 conn.Send("SET", cacheKey, cacheValue, "EX", (12 * time.Hour).Seconds()) 379 updatedCacheCount += 1 380 } 381 382 for i, cacheValue := range ancestorCacheValues { 383 dist := byte(i + 1) 384 if len(cacheValue) > 0 && cacheValue[len(cacheValue)-1] <= dist { 385 // This cache entry is not worse than what we can offer. 386 continue 387 } 388 // We have data with a shorter distance. 389 // see logReq comment for format of this cache value. 390 cacheValue = make([]byte, len(topCommitID)+1) 391 copy(cacheValue, topCommitID) 392 cacheValue[len(cacheValue)-1] = dist 393 conn.Send("SET", ancestorCacheKeys[i].(string), cacheValue, "EX", (12 * time.Hour).Seconds()) 394 updatedCacheCount += 1 395 } 396 } 397 } 398 399 // This could be potentially improved by using WATCH or Lua script, 400 // but it would significantly complicate this code. 401 _, err = conn.Do("EXEC") 402 if err != nil { 403 logging.WithError(err).Errorf(c, "Failed to cache gitiles log") 404 } else { 405 logging.Debugf(c, "wrote %d entries", updatedCacheCount) 406 } 407 } 408 409 func (l *logReq) mkCacheKey(c context.Context, commitish string) string { 410 // note: better not to include limit in the cache key. 411 return fmt.Sprintf("git-log-%s|%s|%s|%s|%t", l.host, l.project, commitish, 412 l.ancestor, l.withFiles) 413 }