code.gitea.io/gitea@v1.19.3/modules/git/log_name_status.go (about) 1 // Copyright 2021 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package git 5 6 import ( 7 "bufio" 8 "bytes" 9 "context" 10 "errors" 11 "io" 12 "path" 13 "sort" 14 "strings" 15 16 "code.gitea.io/gitea/modules/container" 17 18 "github.com/djherbis/buffer" 19 "github.com/djherbis/nio/v3" 20 ) 21 22 // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function 23 func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) { 24 // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. 25 // so let's create a batch stdin and stdout 26 stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024)) 27 28 // Lets also create a context so that we can absolutely ensure that the command should die when we're done 29 ctx, ctxCancel := context.WithCancel(ctx) 30 31 cancel := func() { 32 ctxCancel() 33 _ = stdoutReader.Close() 34 _ = stdoutWriter.Close() 35 } 36 37 cmd := NewCommand(ctx) 38 cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head) 39 40 var files []string 41 if len(paths) < 70 { 42 if treepath != "" { 43 files = append(files, treepath) 44 for _, pth := range paths { 45 if pth != "" { 46 files = append(files, path.Join(treepath, pth)) 47 } 48 } 49 } else { 50 for _, pth := range paths { 51 if pth != "" { 52 files = append(files, pth) 53 } 54 } 55 } 56 } else if treepath != "" { 57 files = append(files, treepath) 58 } 59 cmd.AddDashesAndList(files...) 60 61 go func() { 62 stderr := strings.Builder{} 63 err := cmd.Run(&RunOpts{ 64 Dir: repository, 65 Stdout: stdoutWriter, 66 Stderr: &stderr, 67 }) 68 if err != nil { 69 _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 70 return 71 } 72 73 _ = stdoutWriter.Close() 74 }() 75 76 // For simplicities sake we'll us a buffered reader to read from the cat-file --batch 77 bufReader := bufio.NewReaderSize(stdoutReader, 32*1024) 78 79 return bufReader, cancel 80 } 81 82 // LogNameStatusRepoParser parses a git log raw output from LogRawRepo 83 type LogNameStatusRepoParser struct { 84 treepath string 85 paths []string 86 next []byte 87 buffull bool 88 rd *bufio.Reader 89 cancel func() 90 } 91 92 // NewLogNameStatusRepoParser returns a new parser for a git log raw output 93 func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser { 94 rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...) 95 return &LogNameStatusRepoParser{ 96 treepath: treepath, 97 paths: paths, 98 rd: rd, 99 cancel: cancel, 100 } 101 } 102 103 // LogNameStatusCommitData represents a commit artefact from git log raw 104 type LogNameStatusCommitData struct { 105 CommitID string 106 ParentIDs []string 107 Paths []bool 108 } 109 110 // Next returns the next LogStatusCommitData 111 func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) { 112 var err error 113 if g.next == nil || len(g.next) == 0 { 114 g.buffull = false 115 g.next, err = g.rd.ReadSlice('\x00') 116 if err != nil { 117 if err == bufio.ErrBufferFull { 118 g.buffull = true 119 } else if err == io.EOF { 120 return nil, nil 121 } else { 122 return nil, err 123 } 124 } 125 } 126 127 ret := LogNameStatusCommitData{} 128 if bytes.Equal(g.next, []byte("commit\000")) { 129 g.next, err = g.rd.ReadSlice('\x00') 130 if err != nil { 131 if err == bufio.ErrBufferFull { 132 g.buffull = true 133 } else if err == io.EOF { 134 return nil, nil 135 } else { 136 return nil, err 137 } 138 } 139 } 140 141 // Our "line" must look like: <commitid> SP (<parent> SP) * NUL 142 ret.CommitID = string(g.next[0:40]) 143 parents := string(g.next[41:]) 144 if g.buffull { 145 more, err := g.rd.ReadString('\x00') 146 if err != nil { 147 return nil, err 148 } 149 parents += more 150 } 151 parents = parents[:len(parents)-1] 152 ret.ParentIDs = strings.Split(parents, " ") 153 154 // now read the next "line" 155 g.buffull = false 156 g.next, err = g.rd.ReadSlice('\x00') 157 if err != nil { 158 if err == bufio.ErrBufferFull { 159 g.buffull = true 160 } else if err != io.EOF { 161 return nil, err 162 } 163 } 164 165 if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') { 166 return &ret, nil 167 } 168 169 // Ok we have some changes. 170 // This line will look like: NL <fname> NUL 171 // 172 // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too. 173 if g.next[0] == '\n' { 174 g.next = g.next[1:] 175 } else { 176 g.buffull = false 177 g.next, err = g.rd.ReadSlice('\x00') 178 if err != nil { 179 if err == bufio.ErrBufferFull { 180 g.buffull = true 181 } else if err != io.EOF { 182 return nil, err 183 } 184 } 185 if len(g.next) == 0 { 186 return &ret, nil 187 } 188 if g.next[0] == '\x00' { 189 g.buffull = false 190 g.next, err = g.rd.ReadSlice('\x00') 191 if err != nil { 192 if err == bufio.ErrBufferFull { 193 g.buffull = true 194 } else if err != io.EOF { 195 return nil, err 196 } 197 } 198 } 199 } 200 201 fnameBuf := make([]byte, 4096) 202 203 diffloop: 204 for { 205 if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) { 206 return &ret, nil 207 } 208 g.next, err = g.rd.ReadSlice('\x00') 209 if err != nil { 210 if err == bufio.ErrBufferFull { 211 g.buffull = true 212 } else if err == io.EOF { 213 return &ret, nil 214 } else { 215 return nil, err 216 } 217 } 218 copy(fnameBuf, g.next) 219 if len(fnameBuf) < len(g.next) { 220 fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...) 221 } else { 222 fnameBuf = fnameBuf[:len(g.next)] 223 } 224 if err != nil { 225 if err != bufio.ErrBufferFull { 226 return nil, err 227 } 228 more, err := g.rd.ReadBytes('\x00') 229 if err != nil { 230 return nil, err 231 } 232 fnameBuf = append(fnameBuf, more...) 233 } 234 235 // read the next line 236 g.buffull = false 237 g.next, err = g.rd.ReadSlice('\x00') 238 if err != nil { 239 if err == bufio.ErrBufferFull { 240 g.buffull = true 241 } else if err != io.EOF { 242 return nil, err 243 } 244 } 245 246 if treepath != "" { 247 if !bytes.HasPrefix(fnameBuf, []byte(treepath)) { 248 fnameBuf = fnameBuf[:cap(fnameBuf)] 249 continue diffloop 250 } 251 } 252 fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1] 253 if len(fnameBuf) > maxpathlen { 254 fnameBuf = fnameBuf[:cap(fnameBuf)] 255 continue diffloop 256 } 257 if len(fnameBuf) > 0 { 258 if len(treepath) > 0 { 259 if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 { 260 fnameBuf = fnameBuf[:cap(fnameBuf)] 261 continue diffloop 262 } 263 fnameBuf = fnameBuf[1:] 264 } else if bytes.IndexByte(fnameBuf, '/') >= 0 { 265 fnameBuf = fnameBuf[:cap(fnameBuf)] 266 continue diffloop 267 } 268 } 269 270 idx, ok := paths2ids[string(fnameBuf)] 271 if !ok { 272 fnameBuf = fnameBuf[:cap(fnameBuf)] 273 continue diffloop 274 } 275 if ret.Paths == nil { 276 ret.Paths = changed 277 } 278 changed[idx] = true 279 } 280 } 281 282 // Close closes the parser 283 func (g *LogNameStatusRepoParser) Close() { 284 g.cancel() 285 } 286 287 // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files 288 func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) { 289 headRef := head.ID.String() 290 291 tree, err := head.SubTree(treepath) 292 if err != nil { 293 return nil, err 294 } 295 296 entries, err := tree.ListEntries() 297 if err != nil { 298 return nil, err 299 } 300 301 if len(paths) == 0 { 302 paths = make([]string, 0, len(entries)+1) 303 paths = append(paths, "") 304 for _, entry := range entries { 305 paths = append(paths, entry.Name()) 306 } 307 } else { 308 sort.Strings(paths) 309 if paths[0] != "" { 310 paths = append([]string{""}, paths...) 311 } 312 // remove duplicates 313 for i := len(paths) - 1; i > 0; i-- { 314 if paths[i] == paths[i-1] { 315 paths = append(paths[:i-1], paths[i:]...) 316 } 317 } 318 } 319 320 path2idx := map[string]int{} 321 maxpathlen := len(treepath) 322 323 for i := range paths { 324 path2idx[paths[i]] = i 325 pthlen := len(paths[i]) + len(treepath) + 1 326 if pthlen > maxpathlen { 327 maxpathlen = pthlen 328 } 329 } 330 331 g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...) 332 // don't use defer g.Close() here as g may change its value - instead wrap in a func 333 defer func() { 334 g.Close() 335 }() 336 337 results := make([]string, len(paths)) 338 remaining := len(paths) 339 nextRestart := (len(paths) * 3) / 4 340 if nextRestart > 70 { 341 nextRestart = 70 342 } 343 lastEmptyParent := head.ID.String() 344 commitSinceLastEmptyParent := uint64(0) 345 commitSinceNextRestart := uint64(0) 346 parentRemaining := make(container.Set[string]) 347 348 changed := make([]bool, len(paths)) 349 350 heaploop: 351 for { 352 select { 353 case <-ctx.Done(): 354 if ctx.Err() == context.DeadlineExceeded { 355 break heaploop 356 } 357 g.Close() 358 return nil, ctx.Err() 359 default: 360 } 361 current, err := g.Next(treepath, path2idx, changed, maxpathlen) 362 if err != nil { 363 if errors.Is(err, context.DeadlineExceeded) { 364 break heaploop 365 } 366 g.Close() 367 return nil, err 368 } 369 if current == nil { 370 break heaploop 371 } 372 parentRemaining.Remove(current.CommitID) 373 if current.Paths != nil { 374 for i, found := range current.Paths { 375 if !found { 376 continue 377 } 378 changed[i] = false 379 if results[i] == "" { 380 results[i] = current.CommitID 381 if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil { 382 return nil, err 383 } 384 delete(path2idx, paths[i]) 385 remaining-- 386 if results[0] == "" { 387 results[0] = current.CommitID 388 if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil { 389 return nil, err 390 } 391 delete(path2idx, "") 392 remaining-- 393 } 394 } 395 } 396 } 397 398 if remaining <= 0 { 399 break heaploop 400 } 401 commitSinceLastEmptyParent++ 402 if len(parentRemaining) == 0 { 403 lastEmptyParent = current.CommitID 404 commitSinceLastEmptyParent = 0 405 } 406 if remaining <= nextRestart { 407 commitSinceNextRestart++ 408 if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent { 409 g.Close() 410 remainingPaths := make([]string, 0, len(paths)) 411 for i, pth := range paths { 412 if results[i] == "" { 413 remainingPaths = append(remainingPaths, pth) 414 } 415 } 416 g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...) 417 parentRemaining = make(container.Set[string]) 418 nextRestart = (remaining * 3) / 4 419 continue heaploop 420 } 421 } 422 parentRemaining.AddMultiple(current.ParentIDs...) 423 } 424 g.Close() 425 426 resultsMap := map[string]string{} 427 for i, pth := range paths { 428 resultsMap[pth] = results[i] 429 } 430 431 return resultsMap, nil 432 }