code.gitea.io/gitea@v1.19.3/modules/git/log_name_status.go (about)

     1  // Copyright 2021 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package git
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"context"
    10  	"errors"
    11  	"io"
    12  	"path"
    13  	"sort"
    14  	"strings"
    15  
    16  	"code.gitea.io/gitea/modules/container"
    17  
    18  	"github.com/djherbis/buffer"
    19  	"github.com/djherbis/nio/v3"
    20  )
    21  
    22  // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    23  func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
    24  	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
    25  	// so let's create a batch stdin and stdout
    26  	stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
    27  
    28  	// Lets also create a context so that we can absolutely ensure that the command should die when we're done
    29  	ctx, ctxCancel := context.WithCancel(ctx)
    30  
    31  	cancel := func() {
    32  		ctxCancel()
    33  		_ = stdoutReader.Close()
    34  		_ = stdoutWriter.Close()
    35  	}
    36  
    37  	cmd := NewCommand(ctx)
    38  	cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head)
    39  
    40  	var files []string
    41  	if len(paths) < 70 {
    42  		if treepath != "" {
    43  			files = append(files, treepath)
    44  			for _, pth := range paths {
    45  				if pth != "" {
    46  					files = append(files, path.Join(treepath, pth))
    47  				}
    48  			}
    49  		} else {
    50  			for _, pth := range paths {
    51  				if pth != "" {
    52  					files = append(files, pth)
    53  				}
    54  			}
    55  		}
    56  	} else if treepath != "" {
    57  		files = append(files, treepath)
    58  	}
    59  	cmd.AddDashesAndList(files...)
    60  
    61  	go func() {
    62  		stderr := strings.Builder{}
    63  		err := cmd.Run(&RunOpts{
    64  			Dir:    repository,
    65  			Stdout: stdoutWriter,
    66  			Stderr: &stderr,
    67  		})
    68  		if err != nil {
    69  			_ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
    70  			return
    71  		}
    72  
    73  		_ = stdoutWriter.Close()
    74  	}()
    75  
    76  	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
    77  	bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
    78  
    79  	return bufReader, cancel
    80  }
    81  
    82  // LogNameStatusRepoParser parses a git log raw output from LogRawRepo
    83  type LogNameStatusRepoParser struct {
    84  	treepath string
    85  	paths    []string
    86  	next     []byte
    87  	buffull  bool
    88  	rd       *bufio.Reader
    89  	cancel   func()
    90  }
    91  
    92  // NewLogNameStatusRepoParser returns a new parser for a git log raw output
    93  func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
    94  	rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
    95  	return &LogNameStatusRepoParser{
    96  		treepath: treepath,
    97  		paths:    paths,
    98  		rd:       rd,
    99  		cancel:   cancel,
   100  	}
   101  }
   102  
   103  // LogNameStatusCommitData represents a commit artefact from git log raw
   104  type LogNameStatusCommitData struct {
   105  	CommitID  string
   106  	ParentIDs []string
   107  	Paths     []bool
   108  }
   109  
   110  // Next returns the next LogStatusCommitData
   111  func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
   112  	var err error
   113  	if g.next == nil || len(g.next) == 0 {
   114  		g.buffull = false
   115  		g.next, err = g.rd.ReadSlice('\x00')
   116  		if err != nil {
   117  			if err == bufio.ErrBufferFull {
   118  				g.buffull = true
   119  			} else if err == io.EOF {
   120  				return nil, nil
   121  			} else {
   122  				return nil, err
   123  			}
   124  		}
   125  	}
   126  
   127  	ret := LogNameStatusCommitData{}
   128  	if bytes.Equal(g.next, []byte("commit\000")) {
   129  		g.next, err = g.rd.ReadSlice('\x00')
   130  		if err != nil {
   131  			if err == bufio.ErrBufferFull {
   132  				g.buffull = true
   133  			} else if err == io.EOF {
   134  				return nil, nil
   135  			} else {
   136  				return nil, err
   137  			}
   138  		}
   139  	}
   140  
   141  	// Our "line" must look like: <commitid> SP (<parent> SP) * NUL
   142  	ret.CommitID = string(g.next[0:40])
   143  	parents := string(g.next[41:])
   144  	if g.buffull {
   145  		more, err := g.rd.ReadString('\x00')
   146  		if err != nil {
   147  			return nil, err
   148  		}
   149  		parents += more
   150  	}
   151  	parents = parents[:len(parents)-1]
   152  	ret.ParentIDs = strings.Split(parents, " ")
   153  
   154  	// now read the next "line"
   155  	g.buffull = false
   156  	g.next, err = g.rd.ReadSlice('\x00')
   157  	if err != nil {
   158  		if err == bufio.ErrBufferFull {
   159  			g.buffull = true
   160  		} else if err != io.EOF {
   161  			return nil, err
   162  		}
   163  	}
   164  
   165  	if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
   166  		return &ret, nil
   167  	}
   168  
   169  	// Ok we have some changes.
   170  	// This line will look like: NL <fname> NUL
   171  	//
   172  	// Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
   173  	if g.next[0] == '\n' {
   174  		g.next = g.next[1:]
   175  	} else {
   176  		g.buffull = false
   177  		g.next, err = g.rd.ReadSlice('\x00')
   178  		if err != nil {
   179  			if err == bufio.ErrBufferFull {
   180  				g.buffull = true
   181  			} else if err != io.EOF {
   182  				return nil, err
   183  			}
   184  		}
   185  		if len(g.next) == 0 {
   186  			return &ret, nil
   187  		}
   188  		if g.next[0] == '\x00' {
   189  			g.buffull = false
   190  			g.next, err = g.rd.ReadSlice('\x00')
   191  			if err != nil {
   192  				if err == bufio.ErrBufferFull {
   193  					g.buffull = true
   194  				} else if err != io.EOF {
   195  					return nil, err
   196  				}
   197  			}
   198  		}
   199  	}
   200  
   201  	fnameBuf := make([]byte, 4096)
   202  
   203  diffloop:
   204  	for {
   205  		if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
   206  			return &ret, nil
   207  		}
   208  		g.next, err = g.rd.ReadSlice('\x00')
   209  		if err != nil {
   210  			if err == bufio.ErrBufferFull {
   211  				g.buffull = true
   212  			} else if err == io.EOF {
   213  				return &ret, nil
   214  			} else {
   215  				return nil, err
   216  			}
   217  		}
   218  		copy(fnameBuf, g.next)
   219  		if len(fnameBuf) < len(g.next) {
   220  			fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
   221  		} else {
   222  			fnameBuf = fnameBuf[:len(g.next)]
   223  		}
   224  		if err != nil {
   225  			if err != bufio.ErrBufferFull {
   226  				return nil, err
   227  			}
   228  			more, err := g.rd.ReadBytes('\x00')
   229  			if err != nil {
   230  				return nil, err
   231  			}
   232  			fnameBuf = append(fnameBuf, more...)
   233  		}
   234  
   235  		// read the next line
   236  		g.buffull = false
   237  		g.next, err = g.rd.ReadSlice('\x00')
   238  		if err != nil {
   239  			if err == bufio.ErrBufferFull {
   240  				g.buffull = true
   241  			} else if err != io.EOF {
   242  				return nil, err
   243  			}
   244  		}
   245  
   246  		if treepath != "" {
   247  			if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
   248  				fnameBuf = fnameBuf[:cap(fnameBuf)]
   249  				continue diffloop
   250  			}
   251  		}
   252  		fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
   253  		if len(fnameBuf) > maxpathlen {
   254  			fnameBuf = fnameBuf[:cap(fnameBuf)]
   255  			continue diffloop
   256  		}
   257  		if len(fnameBuf) > 0 {
   258  			if len(treepath) > 0 {
   259  				if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
   260  					fnameBuf = fnameBuf[:cap(fnameBuf)]
   261  					continue diffloop
   262  				}
   263  				fnameBuf = fnameBuf[1:]
   264  			} else if bytes.IndexByte(fnameBuf, '/') >= 0 {
   265  				fnameBuf = fnameBuf[:cap(fnameBuf)]
   266  				continue diffloop
   267  			}
   268  		}
   269  
   270  		idx, ok := paths2ids[string(fnameBuf)]
   271  		if !ok {
   272  			fnameBuf = fnameBuf[:cap(fnameBuf)]
   273  			continue diffloop
   274  		}
   275  		if ret.Paths == nil {
   276  			ret.Paths = changed
   277  		}
   278  		changed[idx] = true
   279  	}
   280  }
   281  
   282  // Close closes the parser
   283  func (g *LogNameStatusRepoParser) Close() {
   284  	g.cancel()
   285  }
   286  
   287  // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
   288  func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
   289  	headRef := head.ID.String()
   290  
   291  	tree, err := head.SubTree(treepath)
   292  	if err != nil {
   293  		return nil, err
   294  	}
   295  
   296  	entries, err := tree.ListEntries()
   297  	if err != nil {
   298  		return nil, err
   299  	}
   300  
   301  	if len(paths) == 0 {
   302  		paths = make([]string, 0, len(entries)+1)
   303  		paths = append(paths, "")
   304  		for _, entry := range entries {
   305  			paths = append(paths, entry.Name())
   306  		}
   307  	} else {
   308  		sort.Strings(paths)
   309  		if paths[0] != "" {
   310  			paths = append([]string{""}, paths...)
   311  		}
   312  		// remove duplicates
   313  		for i := len(paths) - 1; i > 0; i-- {
   314  			if paths[i] == paths[i-1] {
   315  				paths = append(paths[:i-1], paths[i:]...)
   316  			}
   317  		}
   318  	}
   319  
   320  	path2idx := map[string]int{}
   321  	maxpathlen := len(treepath)
   322  
   323  	for i := range paths {
   324  		path2idx[paths[i]] = i
   325  		pthlen := len(paths[i]) + len(treepath) + 1
   326  		if pthlen > maxpathlen {
   327  			maxpathlen = pthlen
   328  		}
   329  	}
   330  
   331  	g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
   332  	// don't use defer g.Close() here as g may change its value - instead wrap in a func
   333  	defer func() {
   334  		g.Close()
   335  	}()
   336  
   337  	results := make([]string, len(paths))
   338  	remaining := len(paths)
   339  	nextRestart := (len(paths) * 3) / 4
   340  	if nextRestart > 70 {
   341  		nextRestart = 70
   342  	}
   343  	lastEmptyParent := head.ID.String()
   344  	commitSinceLastEmptyParent := uint64(0)
   345  	commitSinceNextRestart := uint64(0)
   346  	parentRemaining := make(container.Set[string])
   347  
   348  	changed := make([]bool, len(paths))
   349  
   350  heaploop:
   351  	for {
   352  		select {
   353  		case <-ctx.Done():
   354  			if ctx.Err() == context.DeadlineExceeded {
   355  				break heaploop
   356  			}
   357  			g.Close()
   358  			return nil, ctx.Err()
   359  		default:
   360  		}
   361  		current, err := g.Next(treepath, path2idx, changed, maxpathlen)
   362  		if err != nil {
   363  			if errors.Is(err, context.DeadlineExceeded) {
   364  				break heaploop
   365  			}
   366  			g.Close()
   367  			return nil, err
   368  		}
   369  		if current == nil {
   370  			break heaploop
   371  		}
   372  		parentRemaining.Remove(current.CommitID)
   373  		if current.Paths != nil {
   374  			for i, found := range current.Paths {
   375  				if !found {
   376  					continue
   377  				}
   378  				changed[i] = false
   379  				if results[i] == "" {
   380  					results[i] = current.CommitID
   381  					if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
   382  						return nil, err
   383  					}
   384  					delete(path2idx, paths[i])
   385  					remaining--
   386  					if results[0] == "" {
   387  						results[0] = current.CommitID
   388  						if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil {
   389  							return nil, err
   390  						}
   391  						delete(path2idx, "")
   392  						remaining--
   393  					}
   394  				}
   395  			}
   396  		}
   397  
   398  		if remaining <= 0 {
   399  			break heaploop
   400  		}
   401  		commitSinceLastEmptyParent++
   402  		if len(parentRemaining) == 0 {
   403  			lastEmptyParent = current.CommitID
   404  			commitSinceLastEmptyParent = 0
   405  		}
   406  		if remaining <= nextRestart {
   407  			commitSinceNextRestart++
   408  			if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
   409  				g.Close()
   410  				remainingPaths := make([]string, 0, len(paths))
   411  				for i, pth := range paths {
   412  					if results[i] == "" {
   413  						remainingPaths = append(remainingPaths, pth)
   414  					}
   415  				}
   416  				g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
   417  				parentRemaining = make(container.Set[string])
   418  				nextRestart = (remaining * 3) / 4
   419  				continue heaploop
   420  			}
   421  		}
   422  		parentRemaining.AddMultiple(current.ParentIDs...)
   423  	}
   424  	g.Close()
   425  
   426  	resultsMap := map[string]string{}
   427  	for i, pth := range paths {
   428  		resultsMap[pth] = results[i]
   429  	}
   430  
   431  	return resultsMap, nil
   432  }