code.gitea.io/gitea@v1.19.3/modules/indexer/code/git.go (about)

     1  // Copyright 2019 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package code
     5  
     6  import (
     7  	"context"
     8  	"strconv"
     9  	"strings"
    10  
    11  	repo_model "code.gitea.io/gitea/models/repo"
    12  	"code.gitea.io/gitea/modules/git"
    13  	"code.gitea.io/gitea/modules/log"
    14  	"code.gitea.io/gitea/modules/setting"
    15  )
    16  
    17  type fileUpdate struct {
    18  	Filename string
    19  	BlobSha  string
    20  	Size     int64
    21  	Sized    bool
    22  }
    23  
    24  // repoChanges changes (file additions/updates/removals) to a repo
    25  type repoChanges struct {
    26  	Updates          []fileUpdate
    27  	RemovedFilenames []string
    28  }
    29  
    30  func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) {
    31  	stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
    32  	if err != nil {
    33  		return "", err
    34  	}
    35  	return strings.TrimSpace(stdout), nil
    36  }
    37  
    38  // getRepoChanges returns changes to repo since last indexer update
    39  func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) {
    40  	status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode)
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  
    45  	if len(status.CommitSha) == 0 {
    46  		return genesisChanges(ctx, repo, revision)
    47  	}
    48  	return nonGenesisChanges(ctx, repo, revision)
    49  }
    50  
    51  func isIndexable(entry *git.TreeEntry) bool {
    52  	if !entry.IsRegular() && !entry.IsExecutable() {
    53  		return false
    54  	}
    55  	name := strings.ToLower(entry.Name())
    56  	for _, g := range setting.Indexer.ExcludePatterns {
    57  		if g.Match(name) {
    58  			return false
    59  		}
    60  	}
    61  	for _, g := range setting.Indexer.IncludePatterns {
    62  		if g.Match(name) {
    63  			return true
    64  		}
    65  	}
    66  	return len(setting.Indexer.IncludePatterns) == 0
    67  }
    68  
    69  // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
    70  func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
    71  	entries, err := git.ParseTreeEntries(stdout)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	idxCount := 0
    76  	updates := make([]fileUpdate, len(entries))
    77  	for _, entry := range entries {
    78  		if isIndexable(entry) {
    79  			updates[idxCount] = fileUpdate{
    80  				Filename: entry.Name(),
    81  				BlobSha:  entry.ID.String(),
    82  				Size:     entry.Size(),
    83  				Sized:    true,
    84  			}
    85  			idxCount++
    86  		}
    87  	}
    88  	return updates[:idxCount], nil
    89  }
    90  
    91  // genesisChanges get changes to add repo to the indexer for the first time
    92  func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) {
    93  	var changes repoChanges
    94  	stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()})
    95  	if runErr != nil {
    96  		return nil, runErr
    97  	}
    98  
    99  	var err error
   100  	changes.Updates, err = parseGitLsTreeOutput(stdout)
   101  	return &changes, err
   102  }
   103  
   104  // nonGenesisChanges get changes since the previous indexer update
   105  func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) {
   106  	diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
   107  	stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
   108  	if runErr != nil {
   109  		// previous commit sha may have been removed by a force push, so
   110  		// try rebuilding from scratch
   111  		log.Warn("git diff: %v", runErr)
   112  		if err := indexer.Delete(repo.ID); err != nil {
   113  			return nil, err
   114  		}
   115  		return genesisChanges(ctx, repo, revision)
   116  	}
   117  
   118  	var changes repoChanges
   119  	var err error
   120  	updatedFilenames := make([]string, 0, 10)
   121  	for _, line := range strings.Split(stdout, "\n") {
   122  		line = strings.TrimSpace(line)
   123  		if len(line) == 0 {
   124  			continue
   125  		}
   126  		fields := strings.Split(line, "\t")
   127  		if len(fields) < 2 {
   128  			log.Warn("Unparseable output for diff --name-status: `%s`)", line)
   129  			continue
   130  		}
   131  		filename := fields[1]
   132  		if len(filename) == 0 {
   133  			continue
   134  		} else if filename[0] == '"' {
   135  			filename, err = strconv.Unquote(filename)
   136  			if err != nil {
   137  				return nil, err
   138  			}
   139  		}
   140  
   141  		switch status := fields[0][0]; status {
   142  		case 'M', 'A':
   143  			updatedFilenames = append(updatedFilenames, filename)
   144  		case 'D':
   145  			changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
   146  		case 'R', 'C':
   147  			if len(fields) < 3 {
   148  				log.Warn("Unparseable output for diff --name-status: `%s`)", line)
   149  				continue
   150  			}
   151  			dest := fields[2]
   152  			if len(dest) == 0 {
   153  				log.Warn("Unparseable output for diff --name-status: `%s`)", line)
   154  				continue
   155  			}
   156  			if dest[0] == '"' {
   157  				dest, err = strconv.Unquote(dest)
   158  				if err != nil {
   159  					return nil, err
   160  				}
   161  			}
   162  			if status == 'R' {
   163  				changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
   164  			}
   165  			updatedFilenames = append(updatedFilenames, dest)
   166  		default:
   167  			log.Warn("Unrecognized status: %c (line=%s)", status, line)
   168  		}
   169  	}
   170  
   171  	cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision).
   172  		AddDashesAndList(updatedFilenames...)
   173  	lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()})
   174  	if err != nil {
   175  		return nil, err
   176  	}
   177  	changes.Updates, err = parseGitLsTreeOutput(lsTreeStdout)
   178  	return &changes, err
   179  }