github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/log_name_status.go (about) 1 // Copyright 2023 The GitBundle Inc. All rights reserved. 2 // Copyright 2017 The Gitea Authors. All rights reserved. 3 // Use of this source code is governed by a MIT-style 4 // license that can be found in the LICENSE file. 5 6 package git 7 8 import ( 9 "bufio" 10 "bytes" 11 "context" 12 "errors" 13 "io" 14 "path" 15 "sort" 16 "strings" 17 18 "github.com/djherbis/buffer" 19 "github.com/djherbis/nio/v3" 20 ) 21 22 // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function 23 func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) { 24 // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. 25 // so let's create a batch stdin and stdout 26 stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024)) 27 28 // Lets also create a context so that we can absolutely ensure that the command should die when we're done 29 ctx, ctxCancel := context.WithCancel(ctx) 30 31 cancel := func() { 32 ctxCancel() 33 _ = stdoutReader.Close() 34 _ = stdoutWriter.Close() 35 } 36 37 args := make([]string, 0, 8+len(paths)) 38 args = append(args, "log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z", head, "--") 39 if len(paths) < 70 { 40 if treepath != "" { 41 args = append(args, treepath) 42 for _, pth := range paths { 43 if pth != "" { 44 args = append(args, path.Join(treepath, pth)) 45 } 46 } 47 } else { 48 for _, pth := range paths { 49 if pth != "" { 50 args = append(args, pth) 51 } 52 } 53 } 54 } else if treepath != "" { 55 args = append(args, treepath) 56 } 57 58 go func() { 59 stderr := strings.Builder{} 60 err := NewCommand(ctx, args...).Run(&RunOpts{ 61 Dir: repository, 62 Stdout: stdoutWriter, 63 Stderr: &stderr, 64 }) 65 if err != nil { 66 _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 67 return 68 } 69 70 _ = stdoutWriter.Close() 71 }() 72 73 // For simplicities sake we'll us a buffered reader to read from the cat-file --batch 74 bufReader := bufio.NewReaderSize(stdoutReader, 32*1024) 75 76 return bufReader, cancel 77 } 78 79 // LogNameStatusRepoParser parses a git log raw output from LogRawRepo 80 type LogNameStatusRepoParser struct { 81 treepath string 82 paths []string 83 next []byte 84 buffull bool 85 rd *bufio.Reader 86 cancel func() 87 } 88 89 // NewLogNameStatusRepoParser returns a new parser for a git log raw output 90 func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser { 91 rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...) 92 return &LogNameStatusRepoParser{ 93 treepath: treepath, 94 paths: paths, 95 rd: rd, 96 cancel: cancel, 97 } 98 } 99 100 // LogNameStatusCommitData represents a commit artefact from git log raw 101 type LogNameStatusCommitData struct { 102 CommitID string 103 ParentIDs []string 104 Paths []bool 105 } 106 107 // Next returns the next LogStatusCommitData 108 func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) { 109 var err error 110 if g.next == nil || len(g.next) == 0 { 111 g.buffull = false 112 g.next, err = g.rd.ReadSlice('\x00') 113 if err != nil { 114 if err == bufio.ErrBufferFull { 115 g.buffull = true 116 } else if err == io.EOF { 117 return nil, nil 118 } else { 119 return nil, err 120 } 121 } 122 } 123 124 ret := LogNameStatusCommitData{} 125 if bytes.Equal(g.next, []byte("commit\000")) { 126 g.next, err = g.rd.ReadSlice('\x00') 127 if err != nil { 128 if err == bufio.ErrBufferFull { 129 g.buffull = true 130 } else if err == io.EOF { 131 return nil, nil 132 } else { 133 return nil, err 134 } 135 } 136 } 137 138 // Our "line" must look like: <commitid> SP (<parent> SP) * NUL 139 ret.CommitID = string(g.next[0:40]) 140 parents := string(g.next[41:]) 141 if g.buffull { 142 more, err := g.rd.ReadString('\x00') 143 if err != nil { 144 return nil, err 145 } 146 parents += more 147 } 148 parents = parents[:len(parents)-1] 149 ret.ParentIDs = strings.Split(parents, " ") 150 151 // now read the next "line" 152 g.buffull = false 153 g.next, err = g.rd.ReadSlice('\x00') 154 if err != nil { 155 if err == bufio.ErrBufferFull { 156 g.buffull = true 157 } else if err != io.EOF { 158 return nil, err 159 } 160 } 161 162 if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') { 163 return &ret, nil 164 } 165 166 // Ok we have some changes. 167 // This line will look like: NL <fname> NUL 168 // 169 // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too. 170 if g.next[0] == '\n' { 171 g.next = g.next[1:] 172 } else { 173 g.buffull = false 174 g.next, err = g.rd.ReadSlice('\x00') 175 if err != nil { 176 if err == bufio.ErrBufferFull { 177 g.buffull = true 178 } else if err != io.EOF { 179 return nil, err 180 } 181 } 182 if len(g.next) == 0 { 183 return &ret, nil 184 } 185 if g.next[0] == '\x00' { 186 g.buffull = false 187 g.next, err = g.rd.ReadSlice('\x00') 188 if err != nil { 189 if err == bufio.ErrBufferFull { 190 g.buffull = true 191 } else if err != io.EOF { 192 return nil, err 193 } 194 } 195 } 196 } 197 198 fnameBuf := make([]byte, 4096) 199 200 diffloop: 201 for { 202 if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) { 203 return &ret, nil 204 } 205 g.next, err = g.rd.ReadSlice('\x00') 206 if err != nil { 207 if err == bufio.ErrBufferFull { 208 g.buffull = true 209 } else if err == io.EOF { 210 return &ret, nil 211 } else { 212 return nil, err 213 } 214 } 215 copy(fnameBuf, g.next) 216 if len(fnameBuf) < len(g.next) { 217 fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...) 218 } else { 219 fnameBuf = fnameBuf[:len(g.next)] 220 } 221 if err != nil { 222 if err != bufio.ErrBufferFull { 223 return nil, err 224 } 225 more, err := g.rd.ReadBytes('\x00') 226 if err != nil { 227 return nil, err 228 } 229 fnameBuf = append(fnameBuf, more...) 230 } 231 232 // read the next line 233 g.buffull = false 234 g.next, err = g.rd.ReadSlice('\x00') 235 if err != nil { 236 if err == bufio.ErrBufferFull { 237 g.buffull = true 238 } else if err != io.EOF { 239 return nil, err 240 } 241 } 242 243 if treepath != "" { 244 if !bytes.HasPrefix(fnameBuf, []byte(treepath)) { 245 fnameBuf = fnameBuf[:cap(fnameBuf)] 246 continue diffloop 247 } 248 } 249 fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1] 250 if len(fnameBuf) > maxpathlen { 251 fnameBuf = fnameBuf[:cap(fnameBuf)] 252 continue diffloop 253 } 254 if len(fnameBuf) > 0 { 255 if len(treepath) > 0 { 256 if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 { 257 fnameBuf = fnameBuf[:cap(fnameBuf)] 258 continue diffloop 259 } 260 fnameBuf = fnameBuf[1:] 261 } else if bytes.IndexByte(fnameBuf, '/') >= 0 { 262 fnameBuf = fnameBuf[:cap(fnameBuf)] 263 continue diffloop 264 } 265 } 266 267 idx, ok := paths2ids[string(fnameBuf)] 268 if !ok { 269 fnameBuf = fnameBuf[:cap(fnameBuf)] 270 continue diffloop 271 } 272 if ret.Paths == nil { 273 ret.Paths = changed 274 } 275 changed[idx] = true 276 } 277 } 278 279 // Close closes the parser 280 func (g *LogNameStatusRepoParser) Close() { 281 g.cancel() 282 } 283 284 // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files 285 func WalkGitLog(ctx context.Context, cache *LastCommitCache, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) { 286 headRef := head.ID.String() 287 288 tree, err := head.SubTree(treepath) 289 if err != nil { 290 return nil, err 291 } 292 293 entries, err := tree.ListEntries() 294 if err != nil { 295 return nil, err 296 } 297 298 if len(paths) == 0 { 299 paths = make([]string, 0, len(entries)+1) 300 paths = append(paths, "") 301 for _, entry := range entries { 302 paths = append(paths, entry.Name()) 303 } 304 } else { 305 sort.Strings(paths) 306 if paths[0] != "" { 307 paths = append([]string{""}, paths...) 308 } 309 // remove duplicates 310 for i := len(paths) - 1; i > 0; i-- { 311 if paths[i] == paths[i-1] { 312 paths = append(paths[:i-1], paths[i:]...) 313 } 314 } 315 } 316 317 path2idx := map[string]int{} 318 maxpathlen := len(treepath) 319 320 for i := range paths { 321 path2idx[paths[i]] = i 322 pthlen := len(paths[i]) + len(treepath) + 1 323 if pthlen > maxpathlen { 324 maxpathlen = pthlen 325 } 326 } 327 328 g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...) 329 // don't use defer g.Close() here as g may change its value - instead wrap in a func 330 defer func() { 331 g.Close() 332 }() 333 334 results := make([]string, len(paths)) 335 remaining := len(paths) 336 nextRestart := (len(paths) * 3) / 4 337 if nextRestart > 70 { 338 nextRestart = 70 339 } 340 lastEmptyParent := head.ID.String() 341 commitSinceLastEmptyParent := uint64(0) 342 commitSinceNextRestart := uint64(0) 343 parentRemaining := map[string]bool{} 344 345 changed := make([]bool, len(paths)) 346 347 heaploop: 348 for { 349 select { 350 case <-ctx.Done(): 351 if ctx.Err() == context.DeadlineExceeded { 352 break heaploop 353 } 354 g.Close() 355 return nil, ctx.Err() 356 default: 357 } 358 current, err := g.Next(treepath, path2idx, changed, maxpathlen) 359 if err != nil { 360 if errors.Is(err, context.DeadlineExceeded) { 361 break heaploop 362 } 363 g.Close() 364 return nil, err 365 } 366 if current == nil { 367 break heaploop 368 } 369 delete(parentRemaining, current.CommitID) 370 if current.Paths != nil { 371 for i, found := range current.Paths { 372 if !found { 373 continue 374 } 375 changed[i] = false 376 if results[i] == "" { 377 results[i] = current.CommitID 378 if err := cache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil { 379 return nil, err 380 } 381 delete(path2idx, paths[i]) 382 remaining-- 383 if results[0] == "" { 384 results[0] = current.CommitID 385 if err := cache.Put(headRef, treepath, current.CommitID); err != nil { 386 return nil, err 387 } 388 delete(path2idx, "") 389 remaining-- 390 } 391 } 392 } 393 } 394 395 if remaining <= 0 { 396 break heaploop 397 } 398 commitSinceLastEmptyParent++ 399 if len(parentRemaining) == 0 { 400 lastEmptyParent = current.CommitID 401 commitSinceLastEmptyParent = 0 402 } 403 if remaining <= nextRestart { 404 commitSinceNextRestart++ 405 if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent { 406 g.Close() 407 remainingPaths := make([]string, 0, len(paths)) 408 for i, pth := range paths { 409 if results[i] == "" { 410 remainingPaths = append(remainingPaths, pth) 411 } 412 } 413 g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...) 414 parentRemaining = map[string]bool{} 415 nextRestart = (remaining * 3) / 4 416 continue heaploop 417 } 418 } 419 for _, parent := range current.ParentIDs { 420 parentRemaining[parent] = true 421 } 422 } 423 g.Close() 424 425 resultsMap := map[string]string{} 426 for i, pth := range paths { 427 resultsMap[pth] = results[i] 428 } 429 430 return resultsMap, nil 431 }