github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/log_name_status.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  package git
     7  
     8  import (
     9  	"bufio"
    10  	"bytes"
    11  	"context"
    12  	"errors"
    13  	"io"
    14  	"path"
    15  	"sort"
    16  	"strings"
    17  
    18  	"github.com/djherbis/buffer"
    19  	"github.com/djherbis/nio/v3"
    20  )
    21  
    22  // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    23  func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
    24  	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
    25  	// so let's create a batch stdin and stdout
    26  	stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
    27  
    28  	// Lets also create a context so that we can absolutely ensure that the command should die when we're done
    29  	ctx, ctxCancel := context.WithCancel(ctx)
    30  
    31  	cancel := func() {
    32  		ctxCancel()
    33  		_ = stdoutReader.Close()
    34  		_ = stdoutWriter.Close()
    35  	}
    36  
    37  	args := make([]string, 0, 8+len(paths))
    38  	args = append(args, "log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z", head, "--")
    39  	if len(paths) < 70 {
    40  		if treepath != "" {
    41  			args = append(args, treepath)
    42  			for _, pth := range paths {
    43  				if pth != "" {
    44  					args = append(args, path.Join(treepath, pth))
    45  				}
    46  			}
    47  		} else {
    48  			for _, pth := range paths {
    49  				if pth != "" {
    50  					args = append(args, pth)
    51  				}
    52  			}
    53  		}
    54  	} else if treepath != "" {
    55  		args = append(args, treepath)
    56  	}
    57  
    58  	go func() {
    59  		stderr := strings.Builder{}
    60  		err := NewCommand(ctx, args...).Run(&RunOpts{
    61  			Dir:    repository,
    62  			Stdout: stdoutWriter,
    63  			Stderr: &stderr,
    64  		})
    65  		if err != nil {
    66  			_ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
    67  			return
    68  		}
    69  
    70  		_ = stdoutWriter.Close()
    71  	}()
    72  
    73  	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
    74  	bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
    75  
    76  	return bufReader, cancel
    77  }
    78  
    79  // LogNameStatusRepoParser parses a git log raw output from LogRawRepo
    80  type LogNameStatusRepoParser struct {
    81  	treepath string
    82  	paths    []string
    83  	next     []byte
    84  	buffull  bool
    85  	rd       *bufio.Reader
    86  	cancel   func()
    87  }
    88  
    89  // NewLogNameStatusRepoParser returns a new parser for a git log raw output
    90  func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
    91  	rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
    92  	return &LogNameStatusRepoParser{
    93  		treepath: treepath,
    94  		paths:    paths,
    95  		rd:       rd,
    96  		cancel:   cancel,
    97  	}
    98  }
    99  
   100  // LogNameStatusCommitData represents a commit artefact from git log raw
   101  type LogNameStatusCommitData struct {
   102  	CommitID  string
   103  	ParentIDs []string
   104  	Paths     []bool
   105  }
   106  
   107  // Next returns the next LogStatusCommitData
   108  func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
   109  	var err error
   110  	if g.next == nil || len(g.next) == 0 {
   111  		g.buffull = false
   112  		g.next, err = g.rd.ReadSlice('\x00')
   113  		if err != nil {
   114  			if err == bufio.ErrBufferFull {
   115  				g.buffull = true
   116  			} else if err == io.EOF {
   117  				return nil, nil
   118  			} else {
   119  				return nil, err
   120  			}
   121  		}
   122  	}
   123  
   124  	ret := LogNameStatusCommitData{}
   125  	if bytes.Equal(g.next, []byte("commit\000")) {
   126  		g.next, err = g.rd.ReadSlice('\x00')
   127  		if err != nil {
   128  			if err == bufio.ErrBufferFull {
   129  				g.buffull = true
   130  			} else if err == io.EOF {
   131  				return nil, nil
   132  			} else {
   133  				return nil, err
   134  			}
   135  		}
   136  	}
   137  
   138  	// Our "line" must look like: <commitid> SP (<parent> SP) * NUL
   139  	ret.CommitID = string(g.next[0:40])
   140  	parents := string(g.next[41:])
   141  	if g.buffull {
   142  		more, err := g.rd.ReadString('\x00')
   143  		if err != nil {
   144  			return nil, err
   145  		}
   146  		parents += more
   147  	}
   148  	parents = parents[:len(parents)-1]
   149  	ret.ParentIDs = strings.Split(parents, " ")
   150  
   151  	// now read the next "line"
   152  	g.buffull = false
   153  	g.next, err = g.rd.ReadSlice('\x00')
   154  	if err != nil {
   155  		if err == bufio.ErrBufferFull {
   156  			g.buffull = true
   157  		} else if err != io.EOF {
   158  			return nil, err
   159  		}
   160  	}
   161  
   162  	if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
   163  		return &ret, nil
   164  	}
   165  
   166  	// Ok we have some changes.
   167  	// This line will look like: NL <fname> NUL
   168  	//
   169  	// Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
   170  	if g.next[0] == '\n' {
   171  		g.next = g.next[1:]
   172  	} else {
   173  		g.buffull = false
   174  		g.next, err = g.rd.ReadSlice('\x00')
   175  		if err != nil {
   176  			if err == bufio.ErrBufferFull {
   177  				g.buffull = true
   178  			} else if err != io.EOF {
   179  				return nil, err
   180  			}
   181  		}
   182  		if len(g.next) == 0 {
   183  			return &ret, nil
   184  		}
   185  		if g.next[0] == '\x00' {
   186  			g.buffull = false
   187  			g.next, err = g.rd.ReadSlice('\x00')
   188  			if err != nil {
   189  				if err == bufio.ErrBufferFull {
   190  					g.buffull = true
   191  				} else if err != io.EOF {
   192  					return nil, err
   193  				}
   194  			}
   195  		}
   196  	}
   197  
   198  	fnameBuf := make([]byte, 4096)
   199  
   200  diffloop:
   201  	for {
   202  		if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
   203  			return &ret, nil
   204  		}
   205  		g.next, err = g.rd.ReadSlice('\x00')
   206  		if err != nil {
   207  			if err == bufio.ErrBufferFull {
   208  				g.buffull = true
   209  			} else if err == io.EOF {
   210  				return &ret, nil
   211  			} else {
   212  				return nil, err
   213  			}
   214  		}
   215  		copy(fnameBuf, g.next)
   216  		if len(fnameBuf) < len(g.next) {
   217  			fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
   218  		} else {
   219  			fnameBuf = fnameBuf[:len(g.next)]
   220  		}
   221  		if err != nil {
   222  			if err != bufio.ErrBufferFull {
   223  				return nil, err
   224  			}
   225  			more, err := g.rd.ReadBytes('\x00')
   226  			if err != nil {
   227  				return nil, err
   228  			}
   229  			fnameBuf = append(fnameBuf, more...)
   230  		}
   231  
   232  		// read the next line
   233  		g.buffull = false
   234  		g.next, err = g.rd.ReadSlice('\x00')
   235  		if err != nil {
   236  			if err == bufio.ErrBufferFull {
   237  				g.buffull = true
   238  			} else if err != io.EOF {
   239  				return nil, err
   240  			}
   241  		}
   242  
   243  		if treepath != "" {
   244  			if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
   245  				fnameBuf = fnameBuf[:cap(fnameBuf)]
   246  				continue diffloop
   247  			}
   248  		}
   249  		fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
   250  		if len(fnameBuf) > maxpathlen {
   251  			fnameBuf = fnameBuf[:cap(fnameBuf)]
   252  			continue diffloop
   253  		}
   254  		if len(fnameBuf) > 0 {
   255  			if len(treepath) > 0 {
   256  				if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
   257  					fnameBuf = fnameBuf[:cap(fnameBuf)]
   258  					continue diffloop
   259  				}
   260  				fnameBuf = fnameBuf[1:]
   261  			} else if bytes.IndexByte(fnameBuf, '/') >= 0 {
   262  				fnameBuf = fnameBuf[:cap(fnameBuf)]
   263  				continue diffloop
   264  			}
   265  		}
   266  
   267  		idx, ok := paths2ids[string(fnameBuf)]
   268  		if !ok {
   269  			fnameBuf = fnameBuf[:cap(fnameBuf)]
   270  			continue diffloop
   271  		}
   272  		if ret.Paths == nil {
   273  			ret.Paths = changed
   274  		}
   275  		changed[idx] = true
   276  	}
   277  }
   278  
   279  // Close closes the parser
   280  func (g *LogNameStatusRepoParser) Close() {
   281  	g.cancel()
   282  }
   283  
   284  // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
   285  func WalkGitLog(ctx context.Context, cache *LastCommitCache, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
   286  	headRef := head.ID.String()
   287  
   288  	tree, err := head.SubTree(treepath)
   289  	if err != nil {
   290  		return nil, err
   291  	}
   292  
   293  	entries, err := tree.ListEntries()
   294  	if err != nil {
   295  		return nil, err
   296  	}
   297  
   298  	if len(paths) == 0 {
   299  		paths = make([]string, 0, len(entries)+1)
   300  		paths = append(paths, "")
   301  		for _, entry := range entries {
   302  			paths = append(paths, entry.Name())
   303  		}
   304  	} else {
   305  		sort.Strings(paths)
   306  		if paths[0] != "" {
   307  			paths = append([]string{""}, paths...)
   308  		}
   309  		// remove duplicates
   310  		for i := len(paths) - 1; i > 0; i-- {
   311  			if paths[i] == paths[i-1] {
   312  				paths = append(paths[:i-1], paths[i:]...)
   313  			}
   314  		}
   315  	}
   316  
   317  	path2idx := map[string]int{}
   318  	maxpathlen := len(treepath)
   319  
   320  	for i := range paths {
   321  		path2idx[paths[i]] = i
   322  		pthlen := len(paths[i]) + len(treepath) + 1
   323  		if pthlen > maxpathlen {
   324  			maxpathlen = pthlen
   325  		}
   326  	}
   327  
   328  	g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
   329  	// don't use defer g.Close() here as g may change its value - instead wrap in a func
   330  	defer func() {
   331  		g.Close()
   332  	}()
   333  
   334  	results := make([]string, len(paths))
   335  	remaining := len(paths)
   336  	nextRestart := (len(paths) * 3) / 4
   337  	if nextRestart > 70 {
   338  		nextRestart = 70
   339  	}
   340  	lastEmptyParent := head.ID.String()
   341  	commitSinceLastEmptyParent := uint64(0)
   342  	commitSinceNextRestart := uint64(0)
   343  	parentRemaining := map[string]bool{}
   344  
   345  	changed := make([]bool, len(paths))
   346  
   347  heaploop:
   348  	for {
   349  		select {
   350  		case <-ctx.Done():
   351  			if ctx.Err() == context.DeadlineExceeded {
   352  				break heaploop
   353  			}
   354  			g.Close()
   355  			return nil, ctx.Err()
   356  		default:
   357  		}
   358  		current, err := g.Next(treepath, path2idx, changed, maxpathlen)
   359  		if err != nil {
   360  			if errors.Is(err, context.DeadlineExceeded) {
   361  				break heaploop
   362  			}
   363  			g.Close()
   364  			return nil, err
   365  		}
   366  		if current == nil {
   367  			break heaploop
   368  		}
   369  		delete(parentRemaining, current.CommitID)
   370  		if current.Paths != nil {
   371  			for i, found := range current.Paths {
   372  				if !found {
   373  					continue
   374  				}
   375  				changed[i] = false
   376  				if results[i] == "" {
   377  					results[i] = current.CommitID
   378  					if err := cache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
   379  						return nil, err
   380  					}
   381  					delete(path2idx, paths[i])
   382  					remaining--
   383  					if results[0] == "" {
   384  						results[0] = current.CommitID
   385  						if err := cache.Put(headRef, treepath, current.CommitID); err != nil {
   386  							return nil, err
   387  						}
   388  						delete(path2idx, "")
   389  						remaining--
   390  					}
   391  				}
   392  			}
   393  		}
   394  
   395  		if remaining <= 0 {
   396  			break heaploop
   397  		}
   398  		commitSinceLastEmptyParent++
   399  		if len(parentRemaining) == 0 {
   400  			lastEmptyParent = current.CommitID
   401  			commitSinceLastEmptyParent = 0
   402  		}
   403  		if remaining <= nextRestart {
   404  			commitSinceNextRestart++
   405  			if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
   406  				g.Close()
   407  				remainingPaths := make([]string, 0, len(paths))
   408  				for i, pth := range paths {
   409  					if results[i] == "" {
   410  						remainingPaths = append(remainingPaths, pth)
   411  					}
   412  				}
   413  				g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
   414  				parentRemaining = map[string]bool{}
   415  				nextRestart = (remaining * 3) / 4
   416  				continue heaploop
   417  			}
   418  		}
   419  		for _, parent := range current.ParentIDs {
   420  			parentRemaining[parent] = true
   421  		}
   422  	}
   423  	g.Close()
   424  
   425  	resultsMap := map[string]string{}
   426  	for i, pth := range paths {
   427  		resultsMap[pth] = results[i]
   428  	}
   429  
   430  	return resultsMap, nil
   431  }