code.gitea.io/gitea@v1.22.3/modules/indexer/code/git.go (about)

     1  // Copyright 2019 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package code
     5  
     6  import (
     7  	"context"
     8  	"strconv"
     9  	"strings"
    10  
    11  	repo_model "code.gitea.io/gitea/models/repo"
    12  	"code.gitea.io/gitea/modules/git"
    13  	"code.gitea.io/gitea/modules/indexer/code/internal"
    14  	"code.gitea.io/gitea/modules/log"
    15  	"code.gitea.io/gitea/modules/setting"
    16  )
    17  
    18  func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) {
    19  	stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
    20  	if err != nil {
    21  		return "", err
    22  	}
    23  	return strings.TrimSpace(stdout), nil
    24  }
    25  
    26  // getRepoChanges returns changes to repo since last indexer update
    27  func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
    28  	status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode)
    29  	if err != nil {
    30  		return nil, err
    31  	}
    32  
    33  	needGenesis := len(status.CommitSha) == 0
    34  	if !needGenesis {
    35  		hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision)
    36  		stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
    37  		needGenesis = len(stdout) == 0
    38  	}
    39  
    40  	if needGenesis {
    41  		return genesisChanges(ctx, repo, revision)
    42  	}
    43  	return nonGenesisChanges(ctx, repo, revision)
    44  }
    45  
    46  func isIndexable(entry *git.TreeEntry) bool {
    47  	if !entry.IsRegular() && !entry.IsExecutable() {
    48  		return false
    49  	}
    50  	name := strings.ToLower(entry.Name())
    51  	for _, g := range setting.Indexer.ExcludePatterns {
    52  		if g.Match(name) {
    53  			return false
    54  		}
    55  	}
    56  	for _, g := range setting.Indexer.IncludePatterns {
    57  		if g.Match(name) {
    58  			return true
    59  		}
    60  	}
    61  	return len(setting.Indexer.IncludePatterns) == 0
    62  }
    63  
    64  // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
    65  func parseGitLsTreeOutput(objectFormat git.ObjectFormat, stdout []byte) ([]internal.FileUpdate, error) {
    66  	entries, err := git.ParseTreeEntries(objectFormat, stdout)
    67  	if err != nil {
    68  		return nil, err
    69  	}
    70  	idxCount := 0
    71  	updates := make([]internal.FileUpdate, len(entries))
    72  	for _, entry := range entries {
    73  		if isIndexable(entry) {
    74  			updates[idxCount] = internal.FileUpdate{
    75  				Filename: entry.Name(),
    76  				BlobSha:  entry.ID.String(),
    77  				Size:     entry.Size(),
    78  				Sized:    true,
    79  			}
    80  			idxCount++
    81  		}
    82  	}
    83  	return updates[:idxCount], nil
    84  }
    85  
    86  // genesisChanges get changes to add repo to the indexer for the first time
    87  func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
    88  	var changes internal.RepoChanges
    89  	stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()})
    90  	if runErr != nil {
    91  		return nil, runErr
    92  	}
    93  
    94  	objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
    95  
    96  	var err error
    97  	changes.Updates, err = parseGitLsTreeOutput(objectFormat, stdout)
    98  	return &changes, err
    99  }
   100  
   101  // nonGenesisChanges get changes since the previous indexer update
   102  func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
   103  	diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
   104  	stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
   105  	if runErr != nil {
   106  		// previous commit sha may have been removed by a force push, so
   107  		// try rebuilding from scratch
   108  		log.Warn("git diff: %v", runErr)
   109  		if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil {
   110  			return nil, err
   111  		}
   112  		return genesisChanges(ctx, repo, revision)
   113  	}
   114  
   115  	var changes internal.RepoChanges
   116  	var err error
   117  	updatedFilenames := make([]string, 0, 10)
   118  	objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
   119  
   120  	updateChanges := func() error {
   121  		cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision).
   122  			AddDashesAndList(updatedFilenames...)
   123  		lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()})
   124  		if err != nil {
   125  			return err
   126  		}
   127  
   128  		updates, err1 := parseGitLsTreeOutput(objectFormat, lsTreeStdout)
   129  		if err1 != nil {
   130  			return err1
   131  		}
   132  		changes.Updates = append(changes.Updates, updates...)
   133  		return nil
   134  	}
   135  	lines := strings.Split(stdout, "\n")
   136  	for _, line := range lines {
   137  		line = strings.TrimSpace(line)
   138  		if len(line) == 0 {
   139  			continue
   140  		}
   141  		fields := strings.Split(line, "\t")
   142  		if len(fields) < 2 {
   143  			log.Warn("Unparseable output for diff --name-status: `%s`)", line)
   144  			continue
   145  		}
   146  		filename := fields[1]
   147  		if len(filename) == 0 {
   148  			continue
   149  		} else if filename[0] == '"' {
   150  			filename, err = strconv.Unquote(filename)
   151  			if err != nil {
   152  				return nil, err
   153  			}
   154  		}
   155  
   156  		switch status := fields[0][0]; status {
   157  		case 'M', 'A':
   158  			updatedFilenames = append(updatedFilenames, filename)
   159  		case 'D':
   160  			changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
   161  		case 'R', 'C':
   162  			if len(fields) < 3 {
   163  				log.Warn("Unparseable output for diff --name-status: `%s`)", line)
   164  				continue
   165  			}
   166  			dest := fields[2]
   167  			if len(dest) == 0 {
   168  				log.Warn("Unparseable output for diff --name-status: `%s`)", line)
   169  				continue
   170  			}
   171  			if dest[0] == '"' {
   172  				dest, err = strconv.Unquote(dest)
   173  				if err != nil {
   174  					return nil, err
   175  				}
   176  			}
   177  			if status == 'R' {
   178  				changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
   179  			}
   180  			updatedFilenames = append(updatedFilenames, dest)
   181  		default:
   182  			log.Warn("Unrecognized status: %c (line=%s)", status, line)
   183  		}
   184  
   185  		// According to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information
   186  		// the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30
   187  		if len(updatedFilenames) >= 30 {
   188  			if err := updateChanges(); err != nil {
   189  				return nil, err
   190  			}
   191  			updatedFilenames = updatedFilenames[0:0]
   192  		}
   193  	}
   194  
   195  	if len(updatedFilenames) > 0 {
   196  		if err := updateChanges(); err != nil {
   197  			return nil, err
   198  		}
   199  	}
   200  
   201  	return &changes, err
   202  }