code.gitea.io/gitea@v1.22.3/modules/git/log_name_status.go (about) 1 // Copyright 2021 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package git 5 6 import ( 7 "bufio" 8 "bytes" 9 "context" 10 "errors" 11 "io" 12 "path" 13 "sort" 14 "strings" 15 16 "code.gitea.io/gitea/modules/container" 17 18 "github.com/djherbis/buffer" 19 "github.com/djherbis/nio/v3" 20 ) 21 22 // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function 23 func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) { 24 // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. 25 // so let's create a batch stdin and stdout 26 stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024)) 27 28 // Lets also create a context so that we can absolutely ensure that the command should die when we're done 29 ctx, ctxCancel := context.WithCancel(ctx) 30 31 cancel := func() { 32 ctxCancel() 33 _ = stdoutReader.Close() 34 _ = stdoutWriter.Close() 35 } 36 37 cmd := NewCommand(ctx) 38 cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head) 39 40 var files []string 41 if len(paths) < 70 { 42 if treepath != "" { 43 files = append(files, treepath) 44 for _, pth := range paths { 45 if pth != "" { 46 files = append(files, path.Join(treepath, pth)) 47 } 48 } 49 } else { 50 for _, pth := range paths { 51 if pth != "" { 52 files = append(files, pth) 53 } 54 } 55 } 56 } else if treepath != "" { 57 files = append(files, treepath) 58 } 59 // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg" 60 for i, file := range files { 61 files[i] = ":(literal)" + file 62 } 63 cmd.AddDashesAndList(files...) 64 65 go func() { 66 stderr := strings.Builder{} 67 err := cmd.Run(&RunOpts{ 68 Dir: repository, 69 Stdout: stdoutWriter, 70 Stderr: &stderr, 71 }) 72 if err != nil { 73 _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 74 return 75 } 76 77 _ = stdoutWriter.Close() 78 }() 79 80 // For simplicities sake we'll us a buffered reader to read from the cat-file --batch 81 bufReader := bufio.NewReaderSize(stdoutReader, 32*1024) 82 83 return bufReader, cancel 84 } 85 86 // LogNameStatusRepoParser parses a git log raw output from LogRawRepo 87 type LogNameStatusRepoParser struct { 88 treepath string 89 paths []string 90 next []byte 91 buffull bool 92 rd *bufio.Reader 93 cancel func() 94 } 95 96 // NewLogNameStatusRepoParser returns a new parser for a git log raw output 97 func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser { 98 rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...) 99 return &LogNameStatusRepoParser{ 100 treepath: treepath, 101 paths: paths, 102 rd: rd, 103 cancel: cancel, 104 } 105 } 106 107 // LogNameStatusCommitData represents a commit artefact from git log raw 108 type LogNameStatusCommitData struct { 109 CommitID string 110 ParentIDs []string 111 Paths []bool 112 } 113 114 // Next returns the next LogStatusCommitData 115 func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) { 116 var err error 117 if g.next == nil || len(g.next) == 0 { 118 g.buffull = false 119 g.next, err = g.rd.ReadSlice('\x00') 120 if err != nil { 121 if err == bufio.ErrBufferFull { 122 g.buffull = true 123 } else if err == io.EOF { 124 return nil, nil 125 } else { 126 return nil, err 127 } 128 } 129 } 130 131 ret := LogNameStatusCommitData{} 132 if bytes.Equal(g.next, []byte("commit\000")) { 133 g.next, err = g.rd.ReadSlice('\x00') 134 if err != nil { 135 if err == bufio.ErrBufferFull { 136 g.buffull = true 137 } else if err == io.EOF { 138 return nil, nil 139 } else { 140 return nil, err 141 } 142 } 143 } 144 145 // Our "line" must look like: <commitid> SP (<parent> SP) * NUL 146 commitIDs := string(g.next) 147 if g.buffull { 148 more, err := g.rd.ReadString('\x00') 149 if err != nil { 150 return nil, err 151 } 152 commitIDs += more 153 } 154 commitIDs = commitIDs[:len(commitIDs)-1] 155 splitIDs := strings.Split(commitIDs, " ") 156 ret.CommitID = splitIDs[0] 157 if len(splitIDs) > 1 { 158 ret.ParentIDs = splitIDs[1:] 159 } 160 161 // now read the next "line" 162 g.buffull = false 163 g.next, err = g.rd.ReadSlice('\x00') 164 if err != nil { 165 if err == bufio.ErrBufferFull { 166 g.buffull = true 167 } else if err != io.EOF { 168 return nil, err 169 } 170 } 171 172 if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') { 173 return &ret, nil 174 } 175 176 // Ok we have some changes. 177 // This line will look like: NL <fname> NUL 178 // 179 // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too. 180 if g.next[0] == '\n' { 181 g.next = g.next[1:] 182 } else { 183 g.buffull = false 184 g.next, err = g.rd.ReadSlice('\x00') 185 if err != nil { 186 if err == bufio.ErrBufferFull { 187 g.buffull = true 188 } else if err != io.EOF { 189 return nil, err 190 } 191 } 192 if len(g.next) == 0 { 193 return &ret, nil 194 } 195 if g.next[0] == '\x00' { 196 g.buffull = false 197 g.next, err = g.rd.ReadSlice('\x00') 198 if err != nil { 199 if err == bufio.ErrBufferFull { 200 g.buffull = true 201 } else if err != io.EOF { 202 return nil, err 203 } 204 } 205 } 206 } 207 208 fnameBuf := make([]byte, 4096) 209 210 diffloop: 211 for { 212 if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) { 213 return &ret, nil 214 } 215 g.next, err = g.rd.ReadSlice('\x00') 216 if err != nil { 217 if err == bufio.ErrBufferFull { 218 g.buffull = true 219 } else if err == io.EOF { 220 return &ret, nil 221 } else { 222 return nil, err 223 } 224 } 225 copy(fnameBuf, g.next) 226 if len(fnameBuf) < len(g.next) { 227 fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...) 228 } else { 229 fnameBuf = fnameBuf[:len(g.next)] 230 } 231 if err != nil { 232 if err != bufio.ErrBufferFull { 233 return nil, err 234 } 235 more, err := g.rd.ReadBytes('\x00') 236 if err != nil { 237 return nil, err 238 } 239 fnameBuf = append(fnameBuf, more...) 240 } 241 242 // read the next line 243 g.buffull = false 244 g.next, err = g.rd.ReadSlice('\x00') 245 if err != nil { 246 if err == bufio.ErrBufferFull { 247 g.buffull = true 248 } else if err != io.EOF { 249 return nil, err 250 } 251 } 252 253 if treepath != "" { 254 if !bytes.HasPrefix(fnameBuf, []byte(treepath)) { 255 fnameBuf = fnameBuf[:cap(fnameBuf)] 256 continue diffloop 257 } 258 } 259 fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1] 260 if len(fnameBuf) > maxpathlen { 261 fnameBuf = fnameBuf[:cap(fnameBuf)] 262 continue diffloop 263 } 264 if len(fnameBuf) > 0 { 265 if len(treepath) > 0 { 266 if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 { 267 fnameBuf = fnameBuf[:cap(fnameBuf)] 268 continue diffloop 269 } 270 fnameBuf = fnameBuf[1:] 271 } else if bytes.IndexByte(fnameBuf, '/') >= 0 { 272 fnameBuf = fnameBuf[:cap(fnameBuf)] 273 continue diffloop 274 } 275 } 276 277 idx, ok := paths2ids[string(fnameBuf)] 278 if !ok { 279 fnameBuf = fnameBuf[:cap(fnameBuf)] 280 continue diffloop 281 } 282 if ret.Paths == nil { 283 ret.Paths = changed 284 } 285 changed[idx] = true 286 } 287 } 288 289 // Close closes the parser 290 func (g *LogNameStatusRepoParser) Close() { 291 g.cancel() 292 } 293 294 // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files 295 func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) { 296 headRef := head.ID.String() 297 298 tree, err := head.SubTree(treepath) 299 if err != nil { 300 return nil, err 301 } 302 303 entries, err := tree.ListEntries() 304 if err != nil { 305 return nil, err 306 } 307 308 if len(paths) == 0 { 309 paths = make([]string, 0, len(entries)+1) 310 paths = append(paths, "") 311 for _, entry := range entries { 312 paths = append(paths, entry.Name()) 313 } 314 } else { 315 sort.Strings(paths) 316 if paths[0] != "" { 317 paths = append([]string{""}, paths...) 318 } 319 // remove duplicates 320 for i := len(paths) - 1; i > 0; i-- { 321 if paths[i] == paths[i-1] { 322 paths = append(paths[:i-1], paths[i:]...) 323 } 324 } 325 } 326 327 path2idx := map[string]int{} 328 maxpathlen := len(treepath) 329 330 for i := range paths { 331 path2idx[paths[i]] = i 332 pthlen := len(paths[i]) + len(treepath) + 1 333 if pthlen > maxpathlen { 334 maxpathlen = pthlen 335 } 336 } 337 338 g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...) 339 // don't use defer g.Close() here as g may change its value - instead wrap in a func 340 defer func() { 341 g.Close() 342 }() 343 344 results := make([]string, len(paths)) 345 remaining := len(paths) 346 nextRestart := (len(paths) * 3) / 4 347 if nextRestart > 70 { 348 nextRestart = 70 349 } 350 lastEmptyParent := head.ID.String() 351 commitSinceLastEmptyParent := uint64(0) 352 commitSinceNextRestart := uint64(0) 353 parentRemaining := make(container.Set[string]) 354 355 changed := make([]bool, len(paths)) 356 357 heaploop: 358 for { 359 select { 360 case <-ctx.Done(): 361 if ctx.Err() == context.DeadlineExceeded { 362 break heaploop 363 } 364 g.Close() 365 return nil, ctx.Err() 366 default: 367 } 368 current, err := g.Next(treepath, path2idx, changed, maxpathlen) 369 if err != nil { 370 if errors.Is(err, context.DeadlineExceeded) { 371 break heaploop 372 } 373 g.Close() 374 return nil, err 375 } 376 if current == nil { 377 break heaploop 378 } 379 parentRemaining.Remove(current.CommitID) 380 for i, found := range current.Paths { 381 if !found { 382 continue 383 } 384 changed[i] = false 385 if results[i] == "" { 386 results[i] = current.CommitID 387 if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil { 388 return nil, err 389 } 390 delete(path2idx, paths[i]) 391 remaining-- 392 if results[0] == "" { 393 results[0] = current.CommitID 394 if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil { 395 return nil, err 396 } 397 delete(path2idx, "") 398 remaining-- 399 } 400 } 401 } 402 403 if remaining <= 0 { 404 break heaploop 405 } 406 commitSinceLastEmptyParent++ 407 if len(parentRemaining) == 0 { 408 lastEmptyParent = current.CommitID 409 commitSinceLastEmptyParent = 0 410 } 411 if remaining <= nextRestart { 412 commitSinceNextRestart++ 413 if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent { 414 g.Close() 415 remainingPaths := make([]string, 0, len(paths)) 416 for i, pth := range paths { 417 if results[i] == "" { 418 remainingPaths = append(remainingPaths, pth) 419 } 420 } 421 g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...) 422 parentRemaining = make(container.Set[string]) 423 nextRestart = (remaining * 3) / 4 424 continue heaploop 425 } 426 } 427 parentRemaining.AddMultiple(current.ParentIDs...) 428 } 429 g.Close() 430 431 resultsMap := map[string]string{} 432 for i, pth := range paths { 433 resultsMap[pth] = results[i] 434 } 435 436 return resultsMap, nil 437 }