code.gitea.io/gitea@v1.22.3/modules/indexer/code/git.go (about) 1 // Copyright 2019 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package code 5 6 import ( 7 "context" 8 "strconv" 9 "strings" 10 11 repo_model "code.gitea.io/gitea/models/repo" 12 "code.gitea.io/gitea/modules/git" 13 "code.gitea.io/gitea/modules/indexer/code/internal" 14 "code.gitea.io/gitea/modules/log" 15 "code.gitea.io/gitea/modules/setting" 16 ) 17 18 func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) { 19 stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) 20 if err != nil { 21 return "", err 22 } 23 return strings.TrimSpace(stdout), nil 24 } 25 26 // getRepoChanges returns changes to repo since last indexer update 27 func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { 28 status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode) 29 if err != nil { 30 return nil, err 31 } 32 33 needGenesis := len(status.CommitSha) == 0 34 if !needGenesis { 35 hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision) 36 stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) 37 needGenesis = len(stdout) == 0 38 } 39 40 if needGenesis { 41 return genesisChanges(ctx, repo, revision) 42 } 43 return nonGenesisChanges(ctx, repo, revision) 44 } 45 46 func isIndexable(entry *git.TreeEntry) bool { 47 if !entry.IsRegular() && !entry.IsExecutable() { 48 return false 49 } 50 name := strings.ToLower(entry.Name()) 51 for _, g := range setting.Indexer.ExcludePatterns { 52 if g.Match(name) { 53 return false 54 } 55 } 56 for _, g := range setting.Indexer.IncludePatterns { 57 if g.Match(name) { 58 return true 59 } 60 } 61 return len(setting.Indexer.IncludePatterns) == 0 62 } 63 64 // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command 65 func parseGitLsTreeOutput(objectFormat git.ObjectFormat, stdout []byte) ([]internal.FileUpdate, error) { 66 entries, err := git.ParseTreeEntries(objectFormat, stdout) 67 if err != nil { 68 return nil, err 69 } 70 idxCount := 0 71 updates := make([]internal.FileUpdate, len(entries)) 72 for _, entry := range entries { 73 if isIndexable(entry) { 74 updates[idxCount] = internal.FileUpdate{ 75 Filename: entry.Name(), 76 BlobSha: entry.ID.String(), 77 Size: entry.Size(), 78 Sized: true, 79 } 80 idxCount++ 81 } 82 } 83 return updates[:idxCount], nil 84 } 85 86 // genesisChanges get changes to add repo to the indexer for the first time 87 func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { 88 var changes internal.RepoChanges 89 stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) 90 if runErr != nil { 91 return nil, runErr 92 } 93 94 objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName) 95 96 var err error 97 changes.Updates, err = parseGitLsTreeOutput(objectFormat, stdout) 98 return &changes, err 99 } 100 101 // nonGenesisChanges get changes since the previous indexer update 102 func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { 103 diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) 104 stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) 105 if runErr != nil { 106 // previous commit sha may have been removed by a force push, so 107 // try rebuilding from scratch 108 log.Warn("git diff: %v", runErr) 109 if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil { 110 return nil, err 111 } 112 return genesisChanges(ctx, repo, revision) 113 } 114 115 var changes internal.RepoChanges 116 var err error 117 updatedFilenames := make([]string, 0, 10) 118 objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName) 119 120 updateChanges := func() error { 121 cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). 122 AddDashesAndList(updatedFilenames...) 123 lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) 124 if err != nil { 125 return err 126 } 127 128 updates, err1 := parseGitLsTreeOutput(objectFormat, lsTreeStdout) 129 if err1 != nil { 130 return err1 131 } 132 changes.Updates = append(changes.Updates, updates...) 133 return nil 134 } 135 lines := strings.Split(stdout, "\n") 136 for _, line := range lines { 137 line = strings.TrimSpace(line) 138 if len(line) == 0 { 139 continue 140 } 141 fields := strings.Split(line, "\t") 142 if len(fields) < 2 { 143 log.Warn("Unparseable output for diff --name-status: `%s`)", line) 144 continue 145 } 146 filename := fields[1] 147 if len(filename) == 0 { 148 continue 149 } else if filename[0] == '"' { 150 filename, err = strconv.Unquote(filename) 151 if err != nil { 152 return nil, err 153 } 154 } 155 156 switch status := fields[0][0]; status { 157 case 'M', 'A': 158 updatedFilenames = append(updatedFilenames, filename) 159 case 'D': 160 changes.RemovedFilenames = append(changes.RemovedFilenames, filename) 161 case 'R', 'C': 162 if len(fields) < 3 { 163 log.Warn("Unparseable output for diff --name-status: `%s`)", line) 164 continue 165 } 166 dest := fields[2] 167 if len(dest) == 0 { 168 log.Warn("Unparseable output for diff --name-status: `%s`)", line) 169 continue 170 } 171 if dest[0] == '"' { 172 dest, err = strconv.Unquote(dest) 173 if err != nil { 174 return nil, err 175 } 176 } 177 if status == 'R' { 178 changes.RemovedFilenames = append(changes.RemovedFilenames, filename) 179 } 180 updatedFilenames = append(updatedFilenames, dest) 181 default: 182 log.Warn("Unrecognized status: %c (line=%s)", status, line) 183 } 184 185 // According to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information 186 // the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30 187 if len(updatedFilenames) >= 30 { 188 if err := updateChanges(); err != nil { 189 return nil, err 190 } 191 updatedFilenames = updatedFilenames[0:0] 192 } 193 } 194 195 if len(updatedFilenames) > 0 { 196 if err := updateChanges(); err != nil { 197 return nil, err 198 } 199 } 200 201 return &changes, err 202 }