code.gitea.io/gitea@v1.22.3/modules/git/log_name_status.go (about)

     1  // Copyright 2021 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package git
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"context"
    10  	"errors"
    11  	"io"
    12  	"path"
    13  	"sort"
    14  	"strings"
    15  
    16  	"code.gitea.io/gitea/modules/container"
    17  
    18  	"github.com/djherbis/buffer"
    19  	"github.com/djherbis/nio/v3"
    20  )
    21  
    22  // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    23  func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
    24  	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
    25  	// so let's create a batch stdin and stdout
    26  	stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
    27  
    28  	// Lets also create a context so that we can absolutely ensure that the command should die when we're done
    29  	ctx, ctxCancel := context.WithCancel(ctx)
    30  
    31  	cancel := func() {
    32  		ctxCancel()
    33  		_ = stdoutReader.Close()
    34  		_ = stdoutWriter.Close()
    35  	}
    36  
    37  	cmd := NewCommand(ctx)
    38  	cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head)
    39  
    40  	var files []string
    41  	if len(paths) < 70 {
    42  		if treepath != "" {
    43  			files = append(files, treepath)
    44  			for _, pth := range paths {
    45  				if pth != "" {
    46  					files = append(files, path.Join(treepath, pth))
    47  				}
    48  			}
    49  		} else {
    50  			for _, pth := range paths {
    51  				if pth != "" {
    52  					files = append(files, pth)
    53  				}
    54  			}
    55  		}
    56  	} else if treepath != "" {
    57  		files = append(files, treepath)
    58  	}
    59  	// Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg"
    60  	for i, file := range files {
    61  		files[i] = ":(literal)" + file
    62  	}
    63  	cmd.AddDashesAndList(files...)
    64  
    65  	go func() {
    66  		stderr := strings.Builder{}
    67  		err := cmd.Run(&RunOpts{
    68  			Dir:    repository,
    69  			Stdout: stdoutWriter,
    70  			Stderr: &stderr,
    71  		})
    72  		if err != nil {
    73  			_ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
    74  			return
    75  		}
    76  
    77  		_ = stdoutWriter.Close()
    78  	}()
    79  
    80  	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
    81  	bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
    82  
    83  	return bufReader, cancel
    84  }
    85  
    86  // LogNameStatusRepoParser parses a git log raw output from LogRawRepo
    87  type LogNameStatusRepoParser struct {
    88  	treepath string
    89  	paths    []string
    90  	next     []byte
    91  	buffull  bool
    92  	rd       *bufio.Reader
    93  	cancel   func()
    94  }
    95  
    96  // NewLogNameStatusRepoParser returns a new parser for a git log raw output
    97  func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
    98  	rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
    99  	return &LogNameStatusRepoParser{
   100  		treepath: treepath,
   101  		paths:    paths,
   102  		rd:       rd,
   103  		cancel:   cancel,
   104  	}
   105  }
   106  
   107  // LogNameStatusCommitData represents a commit artefact from git log raw
   108  type LogNameStatusCommitData struct {
   109  	CommitID  string
   110  	ParentIDs []string
   111  	Paths     []bool
   112  }
   113  
   114  // Next returns the next LogStatusCommitData
   115  func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
   116  	var err error
   117  	if g.next == nil || len(g.next) == 0 {
   118  		g.buffull = false
   119  		g.next, err = g.rd.ReadSlice('\x00')
   120  		if err != nil {
   121  			if err == bufio.ErrBufferFull {
   122  				g.buffull = true
   123  			} else if err == io.EOF {
   124  				return nil, nil
   125  			} else {
   126  				return nil, err
   127  			}
   128  		}
   129  	}
   130  
   131  	ret := LogNameStatusCommitData{}
   132  	if bytes.Equal(g.next, []byte("commit\000")) {
   133  		g.next, err = g.rd.ReadSlice('\x00')
   134  		if err != nil {
   135  			if err == bufio.ErrBufferFull {
   136  				g.buffull = true
   137  			} else if err == io.EOF {
   138  				return nil, nil
   139  			} else {
   140  				return nil, err
   141  			}
   142  		}
   143  	}
   144  
   145  	// Our "line" must look like: <commitid> SP (<parent> SP) * NUL
   146  	commitIDs := string(g.next)
   147  	if g.buffull {
   148  		more, err := g.rd.ReadString('\x00')
   149  		if err != nil {
   150  			return nil, err
   151  		}
   152  		commitIDs += more
   153  	}
   154  	commitIDs = commitIDs[:len(commitIDs)-1]
   155  	splitIDs := strings.Split(commitIDs, " ")
   156  	ret.CommitID = splitIDs[0]
   157  	if len(splitIDs) > 1 {
   158  		ret.ParentIDs = splitIDs[1:]
   159  	}
   160  
   161  	// now read the next "line"
   162  	g.buffull = false
   163  	g.next, err = g.rd.ReadSlice('\x00')
   164  	if err != nil {
   165  		if err == bufio.ErrBufferFull {
   166  			g.buffull = true
   167  		} else if err != io.EOF {
   168  			return nil, err
   169  		}
   170  	}
   171  
   172  	if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
   173  		return &ret, nil
   174  	}
   175  
   176  	// Ok we have some changes.
   177  	// This line will look like: NL <fname> NUL
   178  	//
   179  	// Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
   180  	if g.next[0] == '\n' {
   181  		g.next = g.next[1:]
   182  	} else {
   183  		g.buffull = false
   184  		g.next, err = g.rd.ReadSlice('\x00')
   185  		if err != nil {
   186  			if err == bufio.ErrBufferFull {
   187  				g.buffull = true
   188  			} else if err != io.EOF {
   189  				return nil, err
   190  			}
   191  		}
   192  		if len(g.next) == 0 {
   193  			return &ret, nil
   194  		}
   195  		if g.next[0] == '\x00' {
   196  			g.buffull = false
   197  			g.next, err = g.rd.ReadSlice('\x00')
   198  			if err != nil {
   199  				if err == bufio.ErrBufferFull {
   200  					g.buffull = true
   201  				} else if err != io.EOF {
   202  					return nil, err
   203  				}
   204  			}
   205  		}
   206  	}
   207  
   208  	fnameBuf := make([]byte, 4096)
   209  
   210  diffloop:
   211  	for {
   212  		if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
   213  			return &ret, nil
   214  		}
   215  		g.next, err = g.rd.ReadSlice('\x00')
   216  		if err != nil {
   217  			if err == bufio.ErrBufferFull {
   218  				g.buffull = true
   219  			} else if err == io.EOF {
   220  				return &ret, nil
   221  			} else {
   222  				return nil, err
   223  			}
   224  		}
   225  		copy(fnameBuf, g.next)
   226  		if len(fnameBuf) < len(g.next) {
   227  			fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
   228  		} else {
   229  			fnameBuf = fnameBuf[:len(g.next)]
   230  		}
   231  		if err != nil {
   232  			if err != bufio.ErrBufferFull {
   233  				return nil, err
   234  			}
   235  			more, err := g.rd.ReadBytes('\x00')
   236  			if err != nil {
   237  				return nil, err
   238  			}
   239  			fnameBuf = append(fnameBuf, more...)
   240  		}
   241  
   242  		// read the next line
   243  		g.buffull = false
   244  		g.next, err = g.rd.ReadSlice('\x00')
   245  		if err != nil {
   246  			if err == bufio.ErrBufferFull {
   247  				g.buffull = true
   248  			} else if err != io.EOF {
   249  				return nil, err
   250  			}
   251  		}
   252  
   253  		if treepath != "" {
   254  			if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
   255  				fnameBuf = fnameBuf[:cap(fnameBuf)]
   256  				continue diffloop
   257  			}
   258  		}
   259  		fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
   260  		if len(fnameBuf) > maxpathlen {
   261  			fnameBuf = fnameBuf[:cap(fnameBuf)]
   262  			continue diffloop
   263  		}
   264  		if len(fnameBuf) > 0 {
   265  			if len(treepath) > 0 {
   266  				if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
   267  					fnameBuf = fnameBuf[:cap(fnameBuf)]
   268  					continue diffloop
   269  				}
   270  				fnameBuf = fnameBuf[1:]
   271  			} else if bytes.IndexByte(fnameBuf, '/') >= 0 {
   272  				fnameBuf = fnameBuf[:cap(fnameBuf)]
   273  				continue diffloop
   274  			}
   275  		}
   276  
   277  		idx, ok := paths2ids[string(fnameBuf)]
   278  		if !ok {
   279  			fnameBuf = fnameBuf[:cap(fnameBuf)]
   280  			continue diffloop
   281  		}
   282  		if ret.Paths == nil {
   283  			ret.Paths = changed
   284  		}
   285  		changed[idx] = true
   286  	}
   287  }
   288  
   289  // Close closes the parser
   290  func (g *LogNameStatusRepoParser) Close() {
   291  	g.cancel()
   292  }
   293  
   294  // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
   295  func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
   296  	headRef := head.ID.String()
   297  
   298  	tree, err := head.SubTree(treepath)
   299  	if err != nil {
   300  		return nil, err
   301  	}
   302  
   303  	entries, err := tree.ListEntries()
   304  	if err != nil {
   305  		return nil, err
   306  	}
   307  
   308  	if len(paths) == 0 {
   309  		paths = make([]string, 0, len(entries)+1)
   310  		paths = append(paths, "")
   311  		for _, entry := range entries {
   312  			paths = append(paths, entry.Name())
   313  		}
   314  	} else {
   315  		sort.Strings(paths)
   316  		if paths[0] != "" {
   317  			paths = append([]string{""}, paths...)
   318  		}
   319  		// remove duplicates
   320  		for i := len(paths) - 1; i > 0; i-- {
   321  			if paths[i] == paths[i-1] {
   322  				paths = append(paths[:i-1], paths[i:]...)
   323  			}
   324  		}
   325  	}
   326  
   327  	path2idx := map[string]int{}
   328  	maxpathlen := len(treepath)
   329  
   330  	for i := range paths {
   331  		path2idx[paths[i]] = i
   332  		pthlen := len(paths[i]) + len(treepath) + 1
   333  		if pthlen > maxpathlen {
   334  			maxpathlen = pthlen
   335  		}
   336  	}
   337  
   338  	g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
   339  	// don't use defer g.Close() here as g may change its value - instead wrap in a func
   340  	defer func() {
   341  		g.Close()
   342  	}()
   343  
   344  	results := make([]string, len(paths))
   345  	remaining := len(paths)
   346  	nextRestart := (len(paths) * 3) / 4
   347  	if nextRestart > 70 {
   348  		nextRestart = 70
   349  	}
   350  	lastEmptyParent := head.ID.String()
   351  	commitSinceLastEmptyParent := uint64(0)
   352  	commitSinceNextRestart := uint64(0)
   353  	parentRemaining := make(container.Set[string])
   354  
   355  	changed := make([]bool, len(paths))
   356  
   357  heaploop:
   358  	for {
   359  		select {
   360  		case <-ctx.Done():
   361  			if ctx.Err() == context.DeadlineExceeded {
   362  				break heaploop
   363  			}
   364  			g.Close()
   365  			return nil, ctx.Err()
   366  		default:
   367  		}
   368  		current, err := g.Next(treepath, path2idx, changed, maxpathlen)
   369  		if err != nil {
   370  			if errors.Is(err, context.DeadlineExceeded) {
   371  				break heaploop
   372  			}
   373  			g.Close()
   374  			return nil, err
   375  		}
   376  		if current == nil {
   377  			break heaploop
   378  		}
   379  		parentRemaining.Remove(current.CommitID)
   380  		for i, found := range current.Paths {
   381  			if !found {
   382  				continue
   383  			}
   384  			changed[i] = false
   385  			if results[i] == "" {
   386  				results[i] = current.CommitID
   387  				if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
   388  					return nil, err
   389  				}
   390  				delete(path2idx, paths[i])
   391  				remaining--
   392  				if results[0] == "" {
   393  					results[0] = current.CommitID
   394  					if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil {
   395  						return nil, err
   396  					}
   397  					delete(path2idx, "")
   398  					remaining--
   399  				}
   400  			}
   401  		}
   402  
   403  		if remaining <= 0 {
   404  			break heaploop
   405  		}
   406  		commitSinceLastEmptyParent++
   407  		if len(parentRemaining) == 0 {
   408  			lastEmptyParent = current.CommitID
   409  			commitSinceLastEmptyParent = 0
   410  		}
   411  		if remaining <= nextRestart {
   412  			commitSinceNextRestart++
   413  			if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
   414  				g.Close()
   415  				remainingPaths := make([]string, 0, len(paths))
   416  				for i, pth := range paths {
   417  					if results[i] == "" {
   418  						remainingPaths = append(remainingPaths, pth)
   419  					}
   420  				}
   421  				g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
   422  				parentRemaining = make(container.Set[string])
   423  				nextRestart = (remaining * 3) / 4
   424  				continue heaploop
   425  			}
   426  		}
   427  		parentRemaining.AddMultiple(current.ParentIDs...)
   428  	}
   429  	g.Close()
   430  
   431  	resultsMap := map[string]string{}
   432  	for i, pth := range paths {
   433  		resultsMap[pth] = results[i]
   434  	}
   435  
   436  	return resultsMap, nil
   437  }