github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/model/app/fetcher_git.go (about) 1 package app 2 3 import ( 4 "archive/tar" 5 "bytes" 6 "context" 7 "errors" 8 "fmt" 9 "io" 10 "net/http" 11 "net/url" 12 "os" 13 "os/exec" 14 "path/filepath" 15 "regexp" 16 "strings" 17 "time" 18 19 "github.com/cozy/cozy-stack/pkg/appfs" 20 "github.com/cozy/cozy-stack/pkg/logger" 21 "github.com/spf13/afero" 22 ) 23 24 var cloneTimeout = 30 * time.Second 25 26 const ( 27 ghRawManifestURL = "https://raw.githubusercontent.com/%s/%s/%s/%s" 28 glRawManifestURL = "https://%s/%s/%s/raw/%s/%s" 29 ) 30 31 var ( 32 // ghURLRegex is used to identify github 33 ghURLRegex = regexp.MustCompile(`/([^/]+)/([^/]+).git`) 34 // glURLRegex is used to identify gitlab 35 glURLRegex = regexp.MustCompile(`/(.+)/([^/]+).git`) 36 ) 37 38 type gitFetcher struct { 39 manFilename string 40 log logger.Logger 41 } 42 43 func newGitFetcher(manFilename string, log logger.Logger) *gitFetcher { 44 return &gitFetcher{ 45 manFilename: manFilename, 46 log: log, 47 } 48 } 49 50 // ManifestClient is the client used to HTTP resources from the git fetcher. It 51 // is exported for tests purposes only. 52 var ManifestClient = &http.Client{ 53 Timeout: 60 * time.Second, 54 } 55 56 func isGithub(src *url.URL) bool { 57 return src.Host == "github.com" 58 } 59 60 func isGitlab(src *url.URL) bool { 61 return src.Host == "framagit.org" || 62 src.Host == "forge.grandlyon.com" || 63 strings.Contains(src.Host, "gitlab") 64 } 65 66 func (g *gitFetcher) FetchManifest(src *url.URL) (r io.ReadCloser, err error) { 67 defer func() { 68 if err != nil { 69 g.log.Errorf("Error while fetching app manifest %s: %s", 70 src.String(), err.Error()) 71 } 72 }() 73 74 if isGitSSHScheme(src.Scheme) { 75 return g.fetchManifestFromGitArchive(src) 76 } 77 78 var u string 79 if isGithub(src) { 80 u, err = resolveGithubURL(src, g.manFilename) 81 } else if isGitlab(src) { 82 u, err = resolveGitlabURL(src, g.manFilename) 83 } else { 84 u, err = resolveManifestURL(src, g.manFilename) 85 } 86 if err != nil { 87 return nil, err 88 } 89 90 g.log.Infof("Fetching manifest on %s", u) 91 res, err := ManifestClient.Get(u) 92 if err != nil || res.StatusCode != 200 { 93 g.log.Errorf("Error while fetching manifest on %s", u) 94 return nil, ErrManifestNotReachable 95 } 96 97 return res.Body, nil 98 } 99 100 // Use the git archive method to download a manifest from the git repository. 101 func (g *gitFetcher) fetchManifestFromGitArchive(src *url.URL) (io.ReadCloser, error) { 102 var branch string 103 src, branch = getRemoteURL(src) 104 ctx, cancel := context.WithTimeout(context.Background(), cloneTimeout) 105 defer cancel() 106 107 if branch == "" { 108 branch = "main" 109 handle, err := g.doFetchManifestFromGitArchive(src, branch, ctx) 110 if err == nil { 111 return handle, nil 112 } 113 branch = "master" 114 } 115 return g.doFetchManifestFromGitArchive(src, branch, ctx) 116 } 117 118 func (g *gitFetcher) doFetchManifestFromGitArchive(src *url.URL, branch string, ctx context.Context) (io.ReadCloser, error) { 119 cmd := exec.CommandContext(ctx, "git", 120 "archive", 121 "--remote", src.String(), 122 fmt.Sprintf("refs/heads/%s", branch), 123 g.manFilename) 124 g.log.Infof("Fetching manifest %s", strings.Join(cmd.Args, " ")) 125 stdout, err := cmd.Output() 126 if err != nil { 127 if errors.Is(err, exec.ErrNotFound) { 128 return nil, ErrNotSupportedSource 129 } 130 return nil, ErrManifestNotReachable 131 } 132 buf := new(bytes.Buffer) 133 r := tar.NewReader(bytes.NewReader(stdout)) 134 for { 135 h, err := r.Next() 136 if errors.Is(err, io.EOF) { 137 break 138 } 139 if err != nil { 140 return nil, ErrManifestNotReachable 141 } 142 if h.Name != g.manFilename { 143 continue 144 } 145 if _, err = io.Copy(buf, r); err != nil { 146 return nil, ErrManifestNotReachable 147 } 148 return io.NopCloser(buf), nil 149 } 150 return nil, ErrManifestNotReachable 151 } 152 153 func (g *gitFetcher) Fetch(src *url.URL, fs appfs.Copier, man Manifest) (err error) { 154 defer func() { 155 if err != nil { 156 g.log.Errorf("Error while fetching or copying repository %s: %s", 157 src.String(), err.Error()) 158 } 159 }() 160 161 osFs := afero.NewOsFs() 162 gitDir, err := afero.TempDir(osFs, "", "cozy-app-"+man.Slug()) 163 if err != nil { 164 return err 165 } 166 defer func() { _ = osFs.RemoveAll(gitDir) }() 167 168 gitFs := afero.NewBasePathFs(osFs, gitDir) 169 if src.Scheme == "git+https" { 170 src.Scheme = "https" 171 } 172 // XXX Gitlab doesn't support the git protocol 173 if src.Scheme == "git" && isGitlab(src) { 174 src.Scheme = "https" 175 } 176 177 // If the scheme uses ssh, we have to use the git command. 178 if isGitSSHScheme(src.Scheme) { 179 err = g.fetchWithGit(gitFs, gitDir, src, fs, man) 180 if errors.Is(err, exec.ErrNotFound) { 181 return ErrNotSupportedSource 182 } 183 return err 184 } 185 186 return g.fetchWithGit(gitFs, gitDir, src, fs, man) 187 } 188 189 func (g *gitFetcher) fetchWithGit(gitFs afero.Fs, gitDir string, src *url.URL, fs appfs.Copier, man Manifest) (err error) { 190 var branch string 191 src, branch = getRemoteURL(src) 192 srcStr := src.String() 193 194 // GitHub doesn't accept git ls-remote with unencrypted git protocol. 195 // Cf https://github.blog/2021-09-01-improving-git-protocol-security-github/ 196 if isGithub(src) && src.Scheme == "git" { 197 srcStr = strings.Replace(srcStr, "git", "https", 1) 198 } 199 200 ctx, cancel := context.WithTimeout(context.Background(), cloneTimeout) 201 defer cancel() 202 203 if branch == "" { 204 branch = "main" 205 err := g.doFetchWithGit(gitFs, gitDir, srcStr, branch, fs, man, ctx) 206 if err == nil { 207 return nil 208 } 209 branch = "master" 210 } 211 return g.doFetchWithGit(gitFs, gitDir, srcStr, branch, fs, man, ctx) 212 } 213 214 func (g *gitFetcher) doFetchWithGit( 215 gitFs afero.Fs, 216 gitDir, srcStr, branch string, 217 fs appfs.Copier, 218 man Manifest, 219 ctx context.Context, 220 ) (err error) { 221 // The first command we execute is a ls-remote to check the last commit from 222 // the remote branch and see if we already have a checked-out version of this 223 // tree. 224 cmd := exec.CommandContext(ctx, "git", 225 "ls-remote", "--quiet", 226 srcStr, fmt.Sprintf("refs/heads/%s", branch)) 227 lsRemote, err := cmd.Output() 228 if err != nil { 229 if !errors.Is(err, exec.ErrNotFound) { 230 g.log.Errorf("ls-remote error of %s: %s", 231 strings.Join(cmd.Args, " "), err.Error()) 232 } 233 return err 234 } 235 236 lsRemoteFields := bytes.Fields(lsRemote) 237 if len(lsRemoteFields) == 0 { 238 return fmt.Errorf("git: unexpected ls-remote output") 239 } 240 241 slug := man.Slug() 242 version := man.Version() + "-" + string(lsRemoteFields[0]) 243 244 // The git fetcher needs to update the actual version of the application to 245 // reflect the git version of the repository. 246 man.SetVersion(version) 247 248 // If the application folder already exists, we can bail early. 249 exists, err := fs.Start(slug, version, "") 250 if err != nil || exists { 251 return err 252 } 253 defer func() { 254 if err != nil { 255 _ = fs.Abort() 256 } else { 257 err = fs.Commit() 258 } 259 }() 260 261 cmd = exec.CommandContext(ctx, "git", 262 "clone", 263 "--quiet", 264 "--depth", "1", 265 "--single-branch", 266 "--branch", branch, 267 "--", srcStr, gitDir) 268 269 g.log.Infof("Clone with git: %s", strings.Join(cmd.Args, " ")) 270 stdoutStderr, err := cmd.CombinedOutput() 271 if err != nil { 272 if !errors.Is(err, exec.ErrNotFound) { 273 g.log.Errorf("Clone error of %s %s: %s", srcStr, stdoutStderr, 274 err.Error()) 275 } 276 return err 277 } 278 279 return afero.Walk(gitFs, "/", func(path string, info os.FileInfo, err error) error { 280 if err != nil { 281 return err 282 } 283 if info.IsDir() { 284 if info.Name() == ".git" { 285 return filepath.SkipDir 286 } 287 return nil 288 } 289 src, err := gitFs.Open(path) 290 if err != nil { 291 return err 292 } 293 fileinfo := appfs.NewFileInfo(path, info.Size(), info.Mode()) 294 return fs.Copy(fileinfo, src) 295 }) 296 } 297 298 func getWebBranch(src *url.URL) string { 299 if src.Fragment != "" { 300 return src.Fragment 301 } 302 return "HEAD" 303 } 304 305 func getRemoteURL(src *url.URL) (*url.URL, string) { 306 branch := src.Fragment 307 clonedSrc := *src 308 clonedSrc.Fragment = "" 309 return &clonedSrc, branch 310 } 311 312 func resolveGithubURL(src *url.URL, filename string) (string, error) { 313 match := ghURLRegex.FindStringSubmatch(src.Path) 314 if len(match) != 3 { 315 return "", &url.Error{ 316 Op: "parsepath", 317 URL: src.String(), 318 Err: errors.New("Could not parse url git path"), 319 } 320 } 321 322 user, project := match[1], match[2] 323 branch := getWebBranch(src) 324 325 u := fmt.Sprintf(ghRawManifestURL, user, project, branch, filename) 326 return u, nil 327 } 328 329 func resolveGitlabURL(src *url.URL, filename string) (string, error) { 330 match := glURLRegex.FindStringSubmatch(src.Path) 331 if len(match) != 3 { 332 return "", &url.Error{ 333 Op: "parsepath", 334 URL: src.String(), 335 Err: errors.New("Could not parse url git path"), 336 } 337 } 338 339 user, project := match[1], match[2] 340 branch := getWebBranch(src) 341 342 u := fmt.Sprintf(glRawManifestURL, src.Host, user, project, branch, filename) 343 return u, nil 344 } 345 346 func resolveManifestURL(src *url.URL, filename string) (string, error) { 347 srccopy, _ := url.Parse(src.String()) 348 srccopy.Scheme = "https" 349 if srccopy.Path == "" || srccopy.Path[len(srccopy.Path)-1] != '/' { 350 srccopy.Path += "/" 351 } 352 srccopy.Path += filename 353 return srccopy.String(), nil 354 } 355 356 func isGitSSHScheme(scheme string) bool { 357 return scheme == "git+ssh" || scheme == "ssh+git" 358 } 359 360 var _ Fetcher = &gitFetcher{}