github.com/cobalt77/jfrog-client-go@v0.14.5/artifactory/services/gitlfsclean.go (about) 1 package services 2 3 import ( 4 "fmt" 5 "net/url" 6 "os" 7 "path" 8 "regexp" 9 "strings" 10 11 rthttpclient "github.com/cobalt77/jfrog-client-go/artifactory/httpclient" 12 "github.com/cobalt77/jfrog-client-go/artifactory/services/utils" 13 "github.com/cobalt77/jfrog-client-go/auth" 14 "github.com/cobalt77/jfrog-client-go/utils/errorutils" 15 "github.com/cobalt77/jfrog-client-go/utils/io/content" 16 "github.com/cobalt77/jfrog-client-go/utils/log" 17 "gopkg.in/src-d/go-git.v4" 18 "gopkg.in/src-d/go-git.v4/plumbing" 19 gitconfig "gopkg.in/src-d/go-git.v4/plumbing/format/config" 20 "gopkg.in/src-d/go-git.v4/plumbing/object" 21 ) 22 23 type GitLfsCleanService struct { 24 client *rthttpclient.ArtifactoryHttpClient 25 ArtDetails auth.ServiceDetails 26 DryRun bool 27 } 28 29 func NewGitLfsCleanService(client *rthttpclient.ArtifactoryHttpClient) *GitLfsCleanService { 30 return &GitLfsCleanService{client: client} 31 } 32 33 func (glc *GitLfsCleanService) GetArtifactoryDetails() auth.ServiceDetails { 34 return glc.ArtDetails 35 } 36 37 func (glc *GitLfsCleanService) SetArtifactoryDetails(art auth.ServiceDetails) { 38 glc.ArtDetails = art 39 } 40 41 func (glc *GitLfsCleanService) IsDryRun() bool { 42 return glc.DryRun 43 } 44 45 func (glc *GitLfsCleanService) GetJfrogHttpClient() (*rthttpclient.ArtifactoryHttpClient, error) { 46 return glc.client, nil 47 } 48 49 func (glc *GitLfsCleanService) GetUnreferencedGitLfsFiles(gitLfsCleanParams GitLfsCleanParams) (*content.ContentReader, error) { 50 var err error 51 repo := gitLfsCleanParams.GetRepo() 52 gitPath := gitLfsCleanParams.GetGitPath() 53 if gitPath == "" { 54 gitPath, err = os.Getwd() 55 if err != nil { 56 return nil, errorutils.CheckError(err) 57 } 58 } 59 if len(repo) <= 0 { 60 repo, err = detectRepo(gitPath, glc.ArtDetails.GetUrl()) 61 if err != nil { 62 return nil, err 63 } 64 } 65 log.Info("Searching files from Artifactory repository", repo, "...") 66 refsRegex := getRefsRegex(gitLfsCleanParams.GetRef()) 67 artifactoryLfsFilesReader, err := glc.searchLfsFilesInArtifactory(repo) 68 if err != nil { 69 return nil, errorutils.CheckError(err) 70 } 71 defer artifactoryLfsFilesReader.Close() 72 log.Info("Collecting files to preserve from Git references matching the pattern", gitLfsCleanParams.GetRef(), "...") 73 gitLfsFiles, err := getLfsFilesFromGit(gitPath, refsRegex) 74 if err != nil { 75 return nil, errorutils.CheckError(err) 76 } 77 filesToDeleteReader, err := findFilesToDelete(artifactoryLfsFilesReader, gitLfsFiles) 78 if err != nil { 79 return nil, err 80 } 81 length, err := filesToDeleteReader.Length() 82 if err != nil { 83 return nil, err 84 } 85 log.Info("Found", len(gitLfsFiles), "files to keep, and", length, "to clean") 86 return filesToDeleteReader, nil 87 } 88 89 func findFilesToDelete(artifactoryLfsFilesReader *content.ContentReader, gitLfsFiles map[string]struct{}) (*content.ContentReader, error) { 90 cw, err := content.NewContentWriter("results", true, false) 91 if err != nil { 92 return nil, err 93 } 94 defer cw.Close() 95 for resultItem := new(utils.ResultItem); artifactoryLfsFilesReader.NextRecord(resultItem) == nil; resultItem = new(utils.ResultItem) { 96 if _, keepFile := gitLfsFiles[resultItem.Name]; !keepFile { 97 cw.Write(*resultItem) 98 } 99 } 100 artifactoryLfsFilesReader.Reset() 101 return content.NewContentReader(cw.GetFilePath(), cw.GetArrayKey()), nil 102 } 103 104 func lfsConfigUrlExtractor(conf *gitconfig.Config) (*url.URL, error) { 105 return url.Parse(conf.Section("lfs").Option("url")) 106 } 107 108 func configLfsUrlExtractor(conf *gitconfig.Config) (*url.URL, error) { 109 return url.Parse(conf.Section("remote").Subsection("origin").Option("lfsurl")) 110 } 111 112 func detectRepo(gitPath, rtUrl string) (string, error) { 113 repo, err := extractRepo(gitPath, ".lfsconfig", rtUrl, lfsConfigUrlExtractor) 114 if err == nil { 115 return repo, nil 116 } 117 errMsg1 := fmt.Sprintf("Cannot detect Git LFS repository from .lfsconfig: %s", err.Error()) 118 repo, err = extractRepo(gitPath, ".git/config", rtUrl, configLfsUrlExtractor) 119 if err == nil { 120 return repo, nil 121 } 122 errMsg2 := fmt.Sprintf("Cannot detect Git LFS repository from .git/config: %s", err.Error()) 123 suggestedSolution := "You may want to try passing the --repo option manually" 124 return "", errorutils.CheckError(fmt.Errorf("%s%s%s", errMsg1, errMsg2, suggestedSolution)) 125 } 126 127 func extractRepo(gitPath, configFile, rtUrl string, lfsUrlExtractor lfsUrlExtractorFunc) (string, error) { 128 lfsUrl, err := getLfsUrl(gitPath, configFile, lfsUrlExtractor) 129 if err != nil { 130 return "", err 131 } 132 artifactoryConfiguredUrl, err := url.Parse(rtUrl) 133 if err != nil { 134 return "", err 135 } 136 if artifactoryConfiguredUrl.Scheme != lfsUrl.Scheme || artifactoryConfiguredUrl.Host != lfsUrl.Host { 137 return "", fmt.Errorf("Configured Git LFS URL %q does not match provided URL %q", lfsUrl.String(), artifactoryConfiguredUrl.String()) 138 } 139 artifactoryConfiguredUrlPath := path.Clean("/"+artifactoryConfiguredUrl.Path+"/api/lfs") + "/" 140 lfsUrlPath := path.Clean(lfsUrl.Path) 141 if strings.HasPrefix(lfsUrlPath, artifactoryConfiguredUrlPath) { 142 return lfsUrlPath[len(artifactoryConfiguredUrlPath):], nil 143 } 144 return "", fmt.Errorf("Configured Git LFS URL %q does not match provided URL %q", lfsUrl.String(), artifactoryConfiguredUrl.String()) 145 } 146 147 type lfsUrlExtractorFunc func(conf *gitconfig.Config) (*url.URL, error) 148 149 func getLfsUrl(gitPath, configFile string, lfsUrlExtractor lfsUrlExtractorFunc) (*url.URL, error) { 150 var lfsUrl *url.URL 151 lfsConf, err := os.Open(path.Join(gitPath, configFile)) 152 if err != nil { 153 return nil, errorutils.CheckError(err) 154 } 155 defer lfsConf.Close() 156 conf := gitconfig.New() 157 err = gitconfig.NewDecoder(lfsConf).Decode(conf) 158 if err != nil { 159 return nil, errorutils.CheckError(err) 160 } 161 lfsUrl, err = lfsUrlExtractor(conf) 162 return lfsUrl, errorutils.CheckError(err) 163 } 164 165 func getRefsRegex(refs string) string { 166 replacer := strings.NewReplacer(",", "|", "\\*", ".*") 167 return replacer.Replace(regexp.QuoteMeta(refs)) 168 } 169 170 func (glc *GitLfsCleanService) searchLfsFilesInArtifactory(repo string) (*content.ContentReader, error) { 171 spec := &utils.ArtifactoryCommonParams{Pattern: repo, Target: "", Props: "", ExcludeProps: "", Build: "", Recursive: true, Regexp: false, IncludeDirs: false} 172 return utils.SearchBySpecWithPattern(spec, glc, utils.NONE) 173 } 174 175 func getLfsFilesFromGit(path, refMatch string) (map[string]struct{}, error) { 176 // a hash set of sha2 sums, to make lookup faster later 177 results := make(map[string]struct{}, 0) 178 repo, err := git.PlainOpen(path) 179 if err != nil { 180 return nil, errorutils.CheckError(err) 181 } 182 log.Debug("Opened Git repo at", path, "for reading") 183 refs, err := repo.References() 184 if err != nil { 185 return nil, errorutils.CheckError(err) 186 } 187 // look for every Git LFS pointer file that exists in any ref (branch, 188 // remote branch, tag, etc.) who's name matches the regex refMatch 189 err = refs.ForEach(func(ref *plumbing.Reference) error { 190 // go-git recognizes three types of refs: regular hash refs, 191 // symbolic refs (e.g. HEAD), and invalid refs. We only care 192 // about the first type here. 193 if ref.Type() != plumbing.HashReference { 194 return nil 195 } 196 log.Debug("Checking ref", ref.Name().String()) 197 match, err := regexp.MatchString(refMatch, ref.Name().String()) 198 if err != nil || !match { 199 return errorutils.CheckError(err) 200 } 201 commit, err := repo.CommitObject(ref.Hash()) 202 if err != nil { 203 return errorutils.CheckError(err) 204 } 205 files, err := commit.Files() 206 if err != nil { 207 return errorutils.CheckError(err) 208 } 209 err = files.ForEach(func(file *object.File) error { 210 return collectLfsFileFromGit(results, file) 211 }) 212 return errorutils.CheckError(err) 213 }) 214 return results, errorutils.CheckError(err) 215 } 216 217 func collectLfsFileFromGit(results map[string]struct{}, file *object.File) error { 218 // A Git LFS pointer is a small file containing a sha2. Any file bigger 219 // than a kilobyte is extremely unlikely to be such a pointer. 220 if file.Size > 1024 { 221 return nil 222 } 223 lines, err := file.Lines() 224 if err != nil { 225 return errorutils.CheckError(err) 226 } 227 // the line containing the sha2 we're looking for will match this regex 228 regex := "^oid sha256:[[:alnum:]]{64}$" 229 for _, line := range lines { 230 if !strings.HasPrefix(line, "oid ") { 231 continue 232 } 233 match, err := regexp.MatchString(regex, line) 234 if err != nil || !match { 235 return errorutils.CheckError(err) 236 } 237 result := line[strings.Index(line, ":")+1:] 238 log.Debug("Found file", result) 239 results[result] = struct{}{} 240 break 241 } 242 return nil 243 } 244 245 type GitLfsCleanParams struct { 246 Refs string 247 Repo string 248 GitPath string 249 } 250 251 func (glc *GitLfsCleanParams) GetRef() string { 252 return glc.Refs 253 } 254 255 func (glc *GitLfsCleanParams) GetRepo() string { 256 return glc.Repo 257 } 258 259 func (glc *GitLfsCleanParams) GetGitPath() string { 260 return glc.GitPath 261 } 262 263 func NewGitLfsCleanParams() GitLfsCleanParams { 264 return GitLfsCleanParams{} 265 }