github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/model/app/fetcher_git.go (about)

     1  package app
     2  
     3  import (
     4  	"archive/tar"
     5  	"bytes"
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"net/http"
    11  	"net/url"
    12  	"os"
    13  	"os/exec"
    14  	"path/filepath"
    15  	"regexp"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/cozy/cozy-stack/pkg/appfs"
    20  	"github.com/cozy/cozy-stack/pkg/logger"
    21  	"github.com/spf13/afero"
    22  )
    23  
    24  var cloneTimeout = 30 * time.Second
    25  
    26  const (
    27  	ghRawManifestURL = "https://raw.githubusercontent.com/%s/%s/%s/%s"
    28  	glRawManifestURL = "https://%s/%s/%s/raw/%s/%s"
    29  )
    30  
    31  var (
    32  	// ghURLRegex is used to identify github
    33  	ghURLRegex = regexp.MustCompile(`/([^/]+)/([^/]+).git`)
    34  	// glURLRegex is used to identify gitlab
    35  	glURLRegex = regexp.MustCompile(`/(.+)/([^/]+).git`)
    36  )
    37  
    38  type gitFetcher struct {
    39  	manFilename string
    40  	log         logger.Logger
    41  }
    42  
    43  func newGitFetcher(manFilename string, log logger.Logger) *gitFetcher {
    44  	return &gitFetcher{
    45  		manFilename: manFilename,
    46  		log:         log,
    47  	}
    48  }
    49  
    50  // ManifestClient is the client used to HTTP resources from the git fetcher. It
    51  // is exported for tests purposes only.
    52  var ManifestClient = &http.Client{
    53  	Timeout: 60 * time.Second,
    54  }
    55  
    56  func isGithub(src *url.URL) bool {
    57  	return src.Host == "github.com"
    58  }
    59  
    60  func isGitlab(src *url.URL) bool {
    61  	return src.Host == "framagit.org" ||
    62  		src.Host == "forge.grandlyon.com" ||
    63  		strings.Contains(src.Host, "gitlab")
    64  }
    65  
    66  func (g *gitFetcher) FetchManifest(src *url.URL) (r io.ReadCloser, err error) {
    67  	defer func() {
    68  		if err != nil {
    69  			g.log.Errorf("Error while fetching app manifest %s: %s",
    70  				src.String(), err.Error())
    71  		}
    72  	}()
    73  
    74  	if isGitSSHScheme(src.Scheme) {
    75  		return g.fetchManifestFromGitArchive(src)
    76  	}
    77  
    78  	var u string
    79  	if isGithub(src) {
    80  		u, err = resolveGithubURL(src, g.manFilename)
    81  	} else if isGitlab(src) {
    82  		u, err = resolveGitlabURL(src, g.manFilename)
    83  	} else {
    84  		u, err = resolveManifestURL(src, g.manFilename)
    85  	}
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	g.log.Infof("Fetching manifest on %s", u)
    91  	res, err := ManifestClient.Get(u)
    92  	if err != nil || res.StatusCode != 200 {
    93  		g.log.Errorf("Error while fetching manifest on %s", u)
    94  		return nil, ErrManifestNotReachable
    95  	}
    96  
    97  	return res.Body, nil
    98  }
    99  
   100  // Use the git archive method to download a manifest from the git repository.
   101  func (g *gitFetcher) fetchManifestFromGitArchive(src *url.URL) (io.ReadCloser, error) {
   102  	var branch string
   103  	src, branch = getRemoteURL(src)
   104  	ctx, cancel := context.WithTimeout(context.Background(), cloneTimeout)
   105  	defer cancel()
   106  
   107  	if branch == "" {
   108  		branch = "main"
   109  		handle, err := g.doFetchManifestFromGitArchive(src, branch, ctx)
   110  		if err == nil {
   111  			return handle, nil
   112  		}
   113  		branch = "master"
   114  	}
   115  	return g.doFetchManifestFromGitArchive(src, branch, ctx)
   116  }
   117  
   118  func (g *gitFetcher) doFetchManifestFromGitArchive(src *url.URL, branch string, ctx context.Context) (io.ReadCloser, error) {
   119  	cmd := exec.CommandContext(ctx, "git",
   120  		"archive",
   121  		"--remote", src.String(),
   122  		fmt.Sprintf("refs/heads/%s", branch),
   123  		g.manFilename)
   124  	g.log.Infof("Fetching manifest %s", strings.Join(cmd.Args, " "))
   125  	stdout, err := cmd.Output()
   126  	if err != nil {
   127  		if errors.Is(err, exec.ErrNotFound) {
   128  			return nil, ErrNotSupportedSource
   129  		}
   130  		return nil, ErrManifestNotReachable
   131  	}
   132  	buf := new(bytes.Buffer)
   133  	r := tar.NewReader(bytes.NewReader(stdout))
   134  	for {
   135  		h, err := r.Next()
   136  		if errors.Is(err, io.EOF) {
   137  			break
   138  		}
   139  		if err != nil {
   140  			return nil, ErrManifestNotReachable
   141  		}
   142  		if h.Name != g.manFilename {
   143  			continue
   144  		}
   145  		if _, err = io.Copy(buf, r); err != nil {
   146  			return nil, ErrManifestNotReachable
   147  		}
   148  		return io.NopCloser(buf), nil
   149  	}
   150  	return nil, ErrManifestNotReachable
   151  }
   152  
   153  func (g *gitFetcher) Fetch(src *url.URL, fs appfs.Copier, man Manifest) (err error) {
   154  	defer func() {
   155  		if err != nil {
   156  			g.log.Errorf("Error while fetching or copying repository %s: %s",
   157  				src.String(), err.Error())
   158  		}
   159  	}()
   160  
   161  	osFs := afero.NewOsFs()
   162  	gitDir, err := afero.TempDir(osFs, "", "cozy-app-"+man.Slug())
   163  	if err != nil {
   164  		return err
   165  	}
   166  	defer func() { _ = osFs.RemoveAll(gitDir) }()
   167  
   168  	gitFs := afero.NewBasePathFs(osFs, gitDir)
   169  	if src.Scheme == "git+https" {
   170  		src.Scheme = "https"
   171  	}
   172  	// XXX Gitlab doesn't support the git protocol
   173  	if src.Scheme == "git" && isGitlab(src) {
   174  		src.Scheme = "https"
   175  	}
   176  
   177  	// If the scheme uses ssh, we have to use the git command.
   178  	if isGitSSHScheme(src.Scheme) {
   179  		err = g.fetchWithGit(gitFs, gitDir, src, fs, man)
   180  		if errors.Is(err, exec.ErrNotFound) {
   181  			return ErrNotSupportedSource
   182  		}
   183  		return err
   184  	}
   185  
   186  	return g.fetchWithGit(gitFs, gitDir, src, fs, man)
   187  }
   188  
   189  func (g *gitFetcher) fetchWithGit(gitFs afero.Fs, gitDir string, src *url.URL, fs appfs.Copier, man Manifest) (err error) {
   190  	var branch string
   191  	src, branch = getRemoteURL(src)
   192  	srcStr := src.String()
   193  
   194  	// GitHub doesn't accept git ls-remote with unencrypted git protocol.
   195  	// Cf https://github.blog/2021-09-01-improving-git-protocol-security-github/
   196  	if isGithub(src) && src.Scheme == "git" {
   197  		srcStr = strings.Replace(srcStr, "git", "https", 1)
   198  	}
   199  
   200  	ctx, cancel := context.WithTimeout(context.Background(), cloneTimeout)
   201  	defer cancel()
   202  
   203  	if branch == "" {
   204  		branch = "main"
   205  		err := g.doFetchWithGit(gitFs, gitDir, srcStr, branch, fs, man, ctx)
   206  		if err == nil {
   207  			return nil
   208  		}
   209  		branch = "master"
   210  	}
   211  	return g.doFetchWithGit(gitFs, gitDir, srcStr, branch, fs, man, ctx)
   212  }
   213  
   214  func (g *gitFetcher) doFetchWithGit(
   215  	gitFs afero.Fs,
   216  	gitDir, srcStr, branch string,
   217  	fs appfs.Copier,
   218  	man Manifest,
   219  	ctx context.Context,
   220  ) (err error) {
   221  	// The first command we execute is a ls-remote to check the last commit from
   222  	// the remote branch and see if we already have a checked-out version of this
   223  	// tree.
   224  	cmd := exec.CommandContext(ctx, "git",
   225  		"ls-remote", "--quiet",
   226  		srcStr, fmt.Sprintf("refs/heads/%s", branch))
   227  	lsRemote, err := cmd.Output()
   228  	if err != nil {
   229  		if !errors.Is(err, exec.ErrNotFound) {
   230  			g.log.Errorf("ls-remote error of %s: %s",
   231  				strings.Join(cmd.Args, " "), err.Error())
   232  		}
   233  		return err
   234  	}
   235  
   236  	lsRemoteFields := bytes.Fields(lsRemote)
   237  	if len(lsRemoteFields) == 0 {
   238  		return fmt.Errorf("git: unexpected ls-remote output")
   239  	}
   240  
   241  	slug := man.Slug()
   242  	version := man.Version() + "-" + string(lsRemoteFields[0])
   243  
   244  	// The git fetcher needs to update the actual version of the application to
   245  	// reflect the git version of the repository.
   246  	man.SetVersion(version)
   247  
   248  	// If the application folder already exists, we can bail early.
   249  	exists, err := fs.Start(slug, version, "")
   250  	if err != nil || exists {
   251  		return err
   252  	}
   253  	defer func() {
   254  		if err != nil {
   255  			_ = fs.Abort()
   256  		} else {
   257  			err = fs.Commit()
   258  		}
   259  	}()
   260  
   261  	cmd = exec.CommandContext(ctx, "git",
   262  		"clone",
   263  		"--quiet",
   264  		"--depth", "1",
   265  		"--single-branch",
   266  		"--branch", branch,
   267  		"--", srcStr, gitDir)
   268  
   269  	g.log.Infof("Clone with git: %s", strings.Join(cmd.Args, " "))
   270  	stdoutStderr, err := cmd.CombinedOutput()
   271  	if err != nil {
   272  		if !errors.Is(err, exec.ErrNotFound) {
   273  			g.log.Errorf("Clone error of %s %s: %s", srcStr, stdoutStderr,
   274  				err.Error())
   275  		}
   276  		return err
   277  	}
   278  
   279  	return afero.Walk(gitFs, "/", func(path string, info os.FileInfo, err error) error {
   280  		if err != nil {
   281  			return err
   282  		}
   283  		if info.IsDir() {
   284  			if info.Name() == ".git" {
   285  				return filepath.SkipDir
   286  			}
   287  			return nil
   288  		}
   289  		src, err := gitFs.Open(path)
   290  		if err != nil {
   291  			return err
   292  		}
   293  		fileinfo := appfs.NewFileInfo(path, info.Size(), info.Mode())
   294  		return fs.Copy(fileinfo, src)
   295  	})
   296  }
   297  
   298  func getWebBranch(src *url.URL) string {
   299  	if src.Fragment != "" {
   300  		return src.Fragment
   301  	}
   302  	return "HEAD"
   303  }
   304  
   305  func getRemoteURL(src *url.URL) (*url.URL, string) {
   306  	branch := src.Fragment
   307  	clonedSrc := *src
   308  	clonedSrc.Fragment = ""
   309  	return &clonedSrc, branch
   310  }
   311  
   312  func resolveGithubURL(src *url.URL, filename string) (string, error) {
   313  	match := ghURLRegex.FindStringSubmatch(src.Path)
   314  	if len(match) != 3 {
   315  		return "", &url.Error{
   316  			Op:  "parsepath",
   317  			URL: src.String(),
   318  			Err: errors.New("Could not parse url git path"),
   319  		}
   320  	}
   321  
   322  	user, project := match[1], match[2]
   323  	branch := getWebBranch(src)
   324  
   325  	u := fmt.Sprintf(ghRawManifestURL, user, project, branch, filename)
   326  	return u, nil
   327  }
   328  
   329  func resolveGitlabURL(src *url.URL, filename string) (string, error) {
   330  	match := glURLRegex.FindStringSubmatch(src.Path)
   331  	if len(match) != 3 {
   332  		return "", &url.Error{
   333  			Op:  "parsepath",
   334  			URL: src.String(),
   335  			Err: errors.New("Could not parse url git path"),
   336  		}
   337  	}
   338  
   339  	user, project := match[1], match[2]
   340  	branch := getWebBranch(src)
   341  
   342  	u := fmt.Sprintf(glRawManifestURL, src.Host, user, project, branch, filename)
   343  	return u, nil
   344  }
   345  
   346  func resolveManifestURL(src *url.URL, filename string) (string, error) {
   347  	srccopy, _ := url.Parse(src.String())
   348  	srccopy.Scheme = "https"
   349  	if srccopy.Path == "" || srccopy.Path[len(srccopy.Path)-1] != '/' {
   350  		srccopy.Path += "/"
   351  	}
   352  	srccopy.Path += filename
   353  	return srccopy.String(), nil
   354  }
   355  
   356  func isGitSSHScheme(scheme string) bool {
   357  	return scheme == "git+ssh" || scheme == "ssh+git"
   358  }
   359  
   360  var _ Fetcher = &gitFetcher{}