github.com/cobalt77/jfrog-client-go@v0.14.5/artifactory/services/gitlfsclean.go (about)

     1  package services
     2  
     3  import (
     4  	"fmt"
     5  	"net/url"
     6  	"os"
     7  	"path"
     8  	"regexp"
     9  	"strings"
    10  
    11  	rthttpclient "github.com/cobalt77/jfrog-client-go/artifactory/httpclient"
    12  	"github.com/cobalt77/jfrog-client-go/artifactory/services/utils"
    13  	"github.com/cobalt77/jfrog-client-go/auth"
    14  	"github.com/cobalt77/jfrog-client-go/utils/errorutils"
    15  	"github.com/cobalt77/jfrog-client-go/utils/io/content"
    16  	"github.com/cobalt77/jfrog-client-go/utils/log"
    17  	"gopkg.in/src-d/go-git.v4"
    18  	"gopkg.in/src-d/go-git.v4/plumbing"
    19  	gitconfig "gopkg.in/src-d/go-git.v4/plumbing/format/config"
    20  	"gopkg.in/src-d/go-git.v4/plumbing/object"
    21  )
    22  
    23  type GitLfsCleanService struct {
    24  	client     *rthttpclient.ArtifactoryHttpClient
    25  	ArtDetails auth.ServiceDetails
    26  	DryRun     bool
    27  }
    28  
    29  func NewGitLfsCleanService(client *rthttpclient.ArtifactoryHttpClient) *GitLfsCleanService {
    30  	return &GitLfsCleanService{client: client}
    31  }
    32  
    33  func (glc *GitLfsCleanService) GetArtifactoryDetails() auth.ServiceDetails {
    34  	return glc.ArtDetails
    35  }
    36  
    37  func (glc *GitLfsCleanService) SetArtifactoryDetails(art auth.ServiceDetails) {
    38  	glc.ArtDetails = art
    39  }
    40  
    41  func (glc *GitLfsCleanService) IsDryRun() bool {
    42  	return glc.DryRun
    43  }
    44  
    45  func (glc *GitLfsCleanService) GetJfrogHttpClient() (*rthttpclient.ArtifactoryHttpClient, error) {
    46  	return glc.client, nil
    47  }
    48  
    49  func (glc *GitLfsCleanService) GetUnreferencedGitLfsFiles(gitLfsCleanParams GitLfsCleanParams) (*content.ContentReader, error) {
    50  	var err error
    51  	repo := gitLfsCleanParams.GetRepo()
    52  	gitPath := gitLfsCleanParams.GetGitPath()
    53  	if gitPath == "" {
    54  		gitPath, err = os.Getwd()
    55  		if err != nil {
    56  			return nil, errorutils.CheckError(err)
    57  		}
    58  	}
    59  	if len(repo) <= 0 {
    60  		repo, err = detectRepo(gitPath, glc.ArtDetails.GetUrl())
    61  		if err != nil {
    62  			return nil, err
    63  		}
    64  	}
    65  	log.Info("Searching files from Artifactory repository", repo, "...")
    66  	refsRegex := getRefsRegex(gitLfsCleanParams.GetRef())
    67  	artifactoryLfsFilesReader, err := glc.searchLfsFilesInArtifactory(repo)
    68  	if err != nil {
    69  		return nil, errorutils.CheckError(err)
    70  	}
    71  	defer artifactoryLfsFilesReader.Close()
    72  	log.Info("Collecting files to preserve from Git references matching the pattern", gitLfsCleanParams.GetRef(), "...")
    73  	gitLfsFiles, err := getLfsFilesFromGit(gitPath, refsRegex)
    74  	if err != nil {
    75  		return nil, errorutils.CheckError(err)
    76  	}
    77  	filesToDeleteReader, err := findFilesToDelete(artifactoryLfsFilesReader, gitLfsFiles)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  	length, err := filesToDeleteReader.Length()
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	log.Info("Found", len(gitLfsFiles), "files to keep, and", length, "to clean")
    86  	return filesToDeleteReader, nil
    87  }
    88  
    89  func findFilesToDelete(artifactoryLfsFilesReader *content.ContentReader, gitLfsFiles map[string]struct{}) (*content.ContentReader, error) {
    90  	cw, err := content.NewContentWriter("results", true, false)
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  	defer cw.Close()
    95  	for resultItem := new(utils.ResultItem); artifactoryLfsFilesReader.NextRecord(resultItem) == nil; resultItem = new(utils.ResultItem) {
    96  		if _, keepFile := gitLfsFiles[resultItem.Name]; !keepFile {
    97  			cw.Write(*resultItem)
    98  		}
    99  	}
   100  	artifactoryLfsFilesReader.Reset()
   101  	return content.NewContentReader(cw.GetFilePath(), cw.GetArrayKey()), nil
   102  }
   103  
   104  func lfsConfigUrlExtractor(conf *gitconfig.Config) (*url.URL, error) {
   105  	return url.Parse(conf.Section("lfs").Option("url"))
   106  }
   107  
   108  func configLfsUrlExtractor(conf *gitconfig.Config) (*url.URL, error) {
   109  	return url.Parse(conf.Section("remote").Subsection("origin").Option("lfsurl"))
   110  }
   111  
   112  func detectRepo(gitPath, rtUrl string) (string, error) {
   113  	repo, err := extractRepo(gitPath, ".lfsconfig", rtUrl, lfsConfigUrlExtractor)
   114  	if err == nil {
   115  		return repo, nil
   116  	}
   117  	errMsg1 := fmt.Sprintf("Cannot detect Git LFS repository from .lfsconfig: %s", err.Error())
   118  	repo, err = extractRepo(gitPath, ".git/config", rtUrl, configLfsUrlExtractor)
   119  	if err == nil {
   120  		return repo, nil
   121  	}
   122  	errMsg2 := fmt.Sprintf("Cannot detect Git LFS repository from .git/config: %s", err.Error())
   123  	suggestedSolution := "You may want to try passing the --repo option manually"
   124  	return "", errorutils.CheckError(fmt.Errorf("%s%s%s", errMsg1, errMsg2, suggestedSolution))
   125  }
   126  
   127  func extractRepo(gitPath, configFile, rtUrl string, lfsUrlExtractor lfsUrlExtractorFunc) (string, error) {
   128  	lfsUrl, err := getLfsUrl(gitPath, configFile, lfsUrlExtractor)
   129  	if err != nil {
   130  		return "", err
   131  	}
   132  	artifactoryConfiguredUrl, err := url.Parse(rtUrl)
   133  	if err != nil {
   134  		return "", err
   135  	}
   136  	if artifactoryConfiguredUrl.Scheme != lfsUrl.Scheme || artifactoryConfiguredUrl.Host != lfsUrl.Host {
   137  		return "", fmt.Errorf("Configured Git LFS URL %q does not match provided URL %q", lfsUrl.String(), artifactoryConfiguredUrl.String())
   138  	}
   139  	artifactoryConfiguredUrlPath := path.Clean("/"+artifactoryConfiguredUrl.Path+"/api/lfs") + "/"
   140  	lfsUrlPath := path.Clean(lfsUrl.Path)
   141  	if strings.HasPrefix(lfsUrlPath, artifactoryConfiguredUrlPath) {
   142  		return lfsUrlPath[len(artifactoryConfiguredUrlPath):], nil
   143  	}
   144  	return "", fmt.Errorf("Configured Git LFS URL %q does not match provided URL %q", lfsUrl.String(), artifactoryConfiguredUrl.String())
   145  }
   146  
   147  type lfsUrlExtractorFunc func(conf *gitconfig.Config) (*url.URL, error)
   148  
   149  func getLfsUrl(gitPath, configFile string, lfsUrlExtractor lfsUrlExtractorFunc) (*url.URL, error) {
   150  	var lfsUrl *url.URL
   151  	lfsConf, err := os.Open(path.Join(gitPath, configFile))
   152  	if err != nil {
   153  		return nil, errorutils.CheckError(err)
   154  	}
   155  	defer lfsConf.Close()
   156  	conf := gitconfig.New()
   157  	err = gitconfig.NewDecoder(lfsConf).Decode(conf)
   158  	if err != nil {
   159  		return nil, errorutils.CheckError(err)
   160  	}
   161  	lfsUrl, err = lfsUrlExtractor(conf)
   162  	return lfsUrl, errorutils.CheckError(err)
   163  }
   164  
   165  func getRefsRegex(refs string) string {
   166  	replacer := strings.NewReplacer(",", "|", "\\*", ".*")
   167  	return replacer.Replace(regexp.QuoteMeta(refs))
   168  }
   169  
   170  func (glc *GitLfsCleanService) searchLfsFilesInArtifactory(repo string) (*content.ContentReader, error) {
   171  	spec := &utils.ArtifactoryCommonParams{Pattern: repo, Target: "", Props: "", ExcludeProps: "", Build: "", Recursive: true, Regexp: false, IncludeDirs: false}
   172  	return utils.SearchBySpecWithPattern(spec, glc, utils.NONE)
   173  }
   174  
   175  func getLfsFilesFromGit(path, refMatch string) (map[string]struct{}, error) {
   176  	// a hash set of sha2 sums, to make lookup faster later
   177  	results := make(map[string]struct{}, 0)
   178  	repo, err := git.PlainOpen(path)
   179  	if err != nil {
   180  		return nil, errorutils.CheckError(err)
   181  	}
   182  	log.Debug("Opened Git repo at", path, "for reading")
   183  	refs, err := repo.References()
   184  	if err != nil {
   185  		return nil, errorutils.CheckError(err)
   186  	}
   187  	// look for every Git LFS pointer file that exists in any ref (branch,
   188  	// remote branch, tag, etc.) who's name matches the regex refMatch
   189  	err = refs.ForEach(func(ref *plumbing.Reference) error {
   190  		// go-git recognizes three types of refs: regular hash refs,
   191  		// symbolic refs (e.g. HEAD), and invalid refs. We only care
   192  		// about the first type here.
   193  		if ref.Type() != plumbing.HashReference {
   194  			return nil
   195  		}
   196  		log.Debug("Checking ref", ref.Name().String())
   197  		match, err := regexp.MatchString(refMatch, ref.Name().String())
   198  		if err != nil || !match {
   199  			return errorutils.CheckError(err)
   200  		}
   201  		commit, err := repo.CommitObject(ref.Hash())
   202  		if err != nil {
   203  			return errorutils.CheckError(err)
   204  		}
   205  		files, err := commit.Files()
   206  		if err != nil {
   207  			return errorutils.CheckError(err)
   208  		}
   209  		err = files.ForEach(func(file *object.File) error {
   210  			return collectLfsFileFromGit(results, file)
   211  		})
   212  		return errorutils.CheckError(err)
   213  	})
   214  	return results, errorutils.CheckError(err)
   215  }
   216  
   217  func collectLfsFileFromGit(results map[string]struct{}, file *object.File) error {
   218  	// A Git LFS pointer is a small file containing a sha2. Any file bigger
   219  	// than a kilobyte is extremely unlikely to be such a pointer.
   220  	if file.Size > 1024 {
   221  		return nil
   222  	}
   223  	lines, err := file.Lines()
   224  	if err != nil {
   225  		return errorutils.CheckError(err)
   226  	}
   227  	// the line containing the sha2 we're looking for will match this regex
   228  	regex := "^oid sha256:[[:alnum:]]{64}$"
   229  	for _, line := range lines {
   230  		if !strings.HasPrefix(line, "oid ") {
   231  			continue
   232  		}
   233  		match, err := regexp.MatchString(regex, line)
   234  		if err != nil || !match {
   235  			return errorutils.CheckError(err)
   236  		}
   237  		result := line[strings.Index(line, ":")+1:]
   238  		log.Debug("Found file", result)
   239  		results[result] = struct{}{}
   240  		break
   241  	}
   242  	return nil
   243  }
   244  
   245  type GitLfsCleanParams struct {
   246  	Refs    string
   247  	Repo    string
   248  	GitPath string
   249  }
   250  
   251  func (glc *GitLfsCleanParams) GetRef() string {
   252  	return glc.Refs
   253  }
   254  
   255  func (glc *GitLfsCleanParams) GetRepo() string {
   256  	return glc.Repo
   257  }
   258  
   259  func (glc *GitLfsCleanParams) GetGitPath() string {
   260  	return glc.GitPath
   261  }
   262  
   263  func NewGitLfsCleanParams() GitLfsCleanParams {
   264  	return GitLfsCleanParams{}
   265  }