code.gitea.io/gitea@v1.19.3/modules/indexer/code/git.go (about) 1 // Copyright 2019 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package code 5 6 import ( 7 "context" 8 "strconv" 9 "strings" 10 11 repo_model "code.gitea.io/gitea/models/repo" 12 "code.gitea.io/gitea/modules/git" 13 "code.gitea.io/gitea/modules/log" 14 "code.gitea.io/gitea/modules/setting" 15 ) 16 17 type fileUpdate struct { 18 Filename string 19 BlobSha string 20 Size int64 21 Sized bool 22 } 23 24 // repoChanges changes (file additions/updates/removals) to a repo 25 type repoChanges struct { 26 Updates []fileUpdate 27 RemovedFilenames []string 28 } 29 30 func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) { 31 stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) 32 if err != nil { 33 return "", err 34 } 35 return strings.TrimSpace(stdout), nil 36 } 37 38 // getRepoChanges returns changes to repo since last indexer update 39 func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) { 40 status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode) 41 if err != nil { 42 return nil, err 43 } 44 45 if len(status.CommitSha) == 0 { 46 return genesisChanges(ctx, repo, revision) 47 } 48 return nonGenesisChanges(ctx, repo, revision) 49 } 50 51 func isIndexable(entry *git.TreeEntry) bool { 52 if !entry.IsRegular() && !entry.IsExecutable() { 53 return false 54 } 55 name := strings.ToLower(entry.Name()) 56 for _, g := range setting.Indexer.ExcludePatterns { 57 if g.Match(name) { 58 return false 59 } 60 } 61 for _, g := range setting.Indexer.IncludePatterns { 62 if g.Match(name) { 63 return true 64 } 65 } 66 return len(setting.Indexer.IncludePatterns) == 0 67 } 68 69 // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command 70 func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { 71 entries, err := git.ParseTreeEntries(stdout) 72 if err != nil { 73 return nil, err 74 } 75 idxCount := 0 76 updates := make([]fileUpdate, len(entries)) 77 for _, entry := range entries { 78 if isIndexable(entry) { 79 updates[idxCount] = fileUpdate{ 80 Filename: entry.Name(), 81 BlobSha: entry.ID.String(), 82 Size: entry.Size(), 83 Sized: true, 84 } 85 idxCount++ 86 } 87 } 88 return updates[:idxCount], nil 89 } 90 91 // genesisChanges get changes to add repo to the indexer for the first time 92 func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) { 93 var changes repoChanges 94 stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) 95 if runErr != nil { 96 return nil, runErr 97 } 98 99 var err error 100 changes.Updates, err = parseGitLsTreeOutput(stdout) 101 return &changes, err 102 } 103 104 // nonGenesisChanges get changes since the previous indexer update 105 func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) { 106 diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) 107 stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) 108 if runErr != nil { 109 // previous commit sha may have been removed by a force push, so 110 // try rebuilding from scratch 111 log.Warn("git diff: %v", runErr) 112 if err := indexer.Delete(repo.ID); err != nil { 113 return nil, err 114 } 115 return genesisChanges(ctx, repo, revision) 116 } 117 118 var changes repoChanges 119 var err error 120 updatedFilenames := make([]string, 0, 10) 121 for _, line := range strings.Split(stdout, "\n") { 122 line = strings.TrimSpace(line) 123 if len(line) == 0 { 124 continue 125 } 126 fields := strings.Split(line, "\t") 127 if len(fields) < 2 { 128 log.Warn("Unparseable output for diff --name-status: `%s`)", line) 129 continue 130 } 131 filename := fields[1] 132 if len(filename) == 0 { 133 continue 134 } else if filename[0] == '"' { 135 filename, err = strconv.Unquote(filename) 136 if err != nil { 137 return nil, err 138 } 139 } 140 141 switch status := fields[0][0]; status { 142 case 'M', 'A': 143 updatedFilenames = append(updatedFilenames, filename) 144 case 'D': 145 changes.RemovedFilenames = append(changes.RemovedFilenames, filename) 146 case 'R', 'C': 147 if len(fields) < 3 { 148 log.Warn("Unparseable output for diff --name-status: `%s`)", line) 149 continue 150 } 151 dest := fields[2] 152 if len(dest) == 0 { 153 log.Warn("Unparseable output for diff --name-status: `%s`)", line) 154 continue 155 } 156 if dest[0] == '"' { 157 dest, err = strconv.Unquote(dest) 158 if err != nil { 159 return nil, err 160 } 161 } 162 if status == 'R' { 163 changes.RemovedFilenames = append(changes.RemovedFilenames, filename) 164 } 165 updatedFilenames = append(updatedFilenames, dest) 166 default: 167 log.Warn("Unrecognized status: %c (line=%s)", status, line) 168 } 169 } 170 171 cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). 172 AddDashesAndList(updatedFilenames...) 173 lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) 174 if err != nil { 175 return nil, err 176 } 177 changes.Updates, err = parseGitLsTreeOutput(lsTreeStdout) 178 return &changes, err 179 }