github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/cli/fetch.go (about) 1 package cli 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "net/http" 8 "net/url" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "strconv" 13 "strings" 14 "sync" 15 16 humanize "github.com/dustin/go-humanize" 17 "github.com/evergreen-ci/evergreen" 18 "github.com/evergreen-ci/evergreen/model" 19 "github.com/evergreen-ci/evergreen/service" 20 "github.com/evergreen-ci/evergreen/util" 21 "github.com/pkg/errors" 22 ) 23 24 const defaultCloneDepth = 500 25 26 // FetchCommand is used to fetch the source or artifacts associated with a task. 27 type FetchCommand struct { 28 GlobalOpts *Options `no-flag:"true"` 29 30 Source bool `long:"source" description:"clones the source for the given task"` 31 Artifacts bool `long:"artifacts" description:"fetch artifacts for the task and all its recursive dependents"` 32 Shallow bool `long:"shallow" description:"don't recursively download artifacts from dependency tasks"` 33 NoPatch bool `long:"no-patch" description:"when using --source with a patch task, skip applying the patch"` 34 Dir string `long:"dir" description:"root directory to fetch artifacts into. defaults to current working directory"` 35 TaskId string `short:"t" long:"task" description:"task associated with the data to fetch" required:"true"` 36 } 37 38 // FetchCommand allows the user to download the artifacts for a task (and optionally its dependencies), 39 // clone the source that a task was derived from, or both. 40 func (fc *FetchCommand) Execute(_ []string) error { 41 ac, rc, _, err := getAPIClients(fc.GlobalOpts) 42 if err != nil { 43 return err 44 } 45 notifyUserUpdate(ac) 46 47 wd := fc.Dir 48 if len(wd) == 0 { 49 wd, err = os.Getwd() 50 if err != nil { 51 return err 52 } 53 } 54 55 if len(fc.TaskId) == 0 { 56 return errors.Errorf("must specify a task ID with -t.") 57 } 58 59 if !fc.Source && !fc.Artifacts { 60 return errors.New("must specify at least one of either --artifacts or --source.") 61 } 62 if fc.Source { 63 err = fetchSource(ac, rc, wd, fc.TaskId, fc.NoPatch) 64 if err != nil { 65 return err 66 } 67 } 68 if fc.Artifacts { 69 err = fetchArtifacts(rc, fc.TaskId, wd, fc.Shallow) 70 if err != nil { 71 return err 72 } 73 } 74 return nil 75 } 76 77 func fetchSource(ac, rc *APIClient, rootPath, taskId string, noPatch bool) error { 78 task, err := rc.GetTask(taskId) 79 if err != nil { 80 return err 81 } 82 if task == nil { 83 return errors.New("task not found.") 84 } 85 86 config, err := rc.GetConfig(task.Version) 87 if err != nil { 88 return err 89 } 90 91 project, err := ac.GetProjectRef(task.Project) 92 if err != nil { 93 return err 94 } 95 96 cloneDir := util.CleanForPath(fmt.Sprintf("source-%v", task.Project)) 97 var patch *service.RestPatch 98 if task.Requester == evergreen.PatchVersionRequester { 99 cloneDir = util.CleanForPath(fmt.Sprintf("source-patch-%v_%v", task.PatchNumber, task.Project)) 100 patch, err = rc.GetPatch(task.PatchId) 101 if err != nil { 102 return err 103 } 104 } else { 105 if len(task.Revision) >= 5 { 106 cloneDir = util.CleanForPath(fmt.Sprintf("source-%v-%v", task.Project, task.Revision[0:6])) 107 } 108 } 109 cloneDir = filepath.Join(rootPath, cloneDir) 110 111 err = cloneSource(task, project, config, cloneDir) 112 if err != nil { 113 return err 114 } 115 if patch != nil && !noPatch { 116 err = applyPatch(patch, cloneDir, config, config.FindBuildVariant(task.BuildVariant)) 117 if err != nil { 118 return err 119 } 120 } 121 122 return nil 123 } 124 125 type cloneOptions struct { 126 repo string 127 revision string 128 rootDir string 129 depth uint 130 } 131 132 func clone(opts cloneOptions, verbose bool) error { 133 // clone the repo first 134 cloneArgs := []string{"clone", opts.repo} 135 if opts.depth > 0 { 136 cloneArgs = append(cloneArgs, "--depth", fmt.Sprintf("%d", opts.depth)) 137 } 138 139 cloneArgs = append(cloneArgs, opts.rootDir) 140 if verbose { 141 fmt.Println("Executing git", strings.Join(cloneArgs, " ")) 142 } 143 c := exec.Command("git", cloneArgs...) 144 c.Stdout, c.Stderr = os.Stdout, os.Stderr 145 err := c.Run() 146 if err != nil { 147 return err 148 } 149 150 // try to check out the revision we want 151 checkoutArgs := []string{"checkout", opts.revision} 152 if verbose { 153 fmt.Println("Executing git", strings.Join(checkoutArgs, " ")) 154 } 155 c = exec.Command("git", checkoutArgs...) 156 stdoutBuf, stderrBuf := &bytes.Buffer{}, &bytes.Buffer{} 157 c.Stdout = io.MultiWriter(os.Stdout, stdoutBuf) 158 c.Stderr = io.MultiWriter(os.Stderr, stderrBuf) 159 c.Dir = opts.rootDir 160 err = c.Run() 161 if err != nil { 162 if !bytes.Contains(stderrBuf.Bytes(), []byte("reference is not a tree:")) { 163 return err 164 } 165 166 // we have to go deeper 167 fetchArgs := []string{"fetch", "--unshallow"} 168 if verbose { 169 fmt.Println("Executing git", strings.Join(fetchArgs, " ")) 170 } 171 c = exec.Command("git", fetchArgs...) 172 c.Stdout, c.Stderr, c.Dir = os.Stdout, os.Stderr, opts.rootDir 173 err = c.Run() 174 if err != nil { 175 return err 176 } 177 // now it's unshallow, so try again to check it out 178 checkoutRetryArgs := []string{"checkout", opts.revision} 179 if verbose { 180 fmt.Println("Executing git", strings.Join(checkoutRetryArgs, " ")) 181 } 182 c = exec.Command("git", checkoutRetryArgs...) 183 c.Stdout, c.Stderr, c.Dir = os.Stdout, os.Stderr, opts.rootDir 184 return c.Run() 185 } 186 return nil 187 } 188 189 func cloneSource(task *service.RestTask, project *model.ProjectRef, config *model.Project, cloneDir string) error { 190 // Fetch the outermost repo for the task 191 err := clone( 192 cloneOptions{ 193 repo: fmt.Sprintf("git@github.com:%v/%v.git", project.Owner, project.Repo), 194 revision: task.Revision, 195 rootDir: cloneDir, 196 depth: defaultCloneDepth, 197 }, 198 false, 199 ) 200 201 if err != nil { 202 return err 203 } 204 205 // Then fetch each of the modules 206 variant := config.FindBuildVariant(task.BuildVariant) 207 if variant == nil { 208 return errors.Errorf("couldn't find build variant '%v' in config", task.BuildVariant) 209 } 210 for _, moduleName := range variant.Modules { 211 module, err := config.GetModuleByName(moduleName) 212 if err != nil || module == nil { 213 return errors.Errorf("variant refers to a module '%v' that doesn't exist.", moduleName) 214 } 215 moduleBase := filepath.Join(cloneDir, module.Prefix, module.Name) 216 fmt.Printf("Fetching module %v at %v\n", moduleName, module.Branch) 217 err = clone(cloneOptions{ 218 repo: module.Repo, 219 revision: module.Branch, 220 rootDir: filepath.ToSlash(moduleBase), 221 }, false) 222 if err != nil { 223 return err 224 } 225 } 226 return nil 227 } 228 229 func applyPatch(patch *service.RestPatch, rootCloneDir string, conf *model.Project, variant *model.BuildVariant) error { 230 // patch sets and contain multiple patches, some of them for modules 231 for _, patchPart := range patch.Patches { 232 var dir string 233 if patchPart.ModuleName == "" { 234 // if patch is not part of a module, just apply patch against src root 235 dir = rootCloneDir 236 } else { 237 fmt.Println("Applying patches for module", patchPart.ModuleName) 238 // if patch is part of a module, apply patch in module root 239 module, err := conf.GetModuleByName(patchPart.ModuleName) 240 if err != nil || module == nil { 241 return errors.Errorf("can't find module %v: %v", patchPart.ModuleName, err) 242 } 243 244 // skip the module if this build variant does not use it 245 if !util.SliceContains(variant.Modules, module.Name) { 246 continue 247 } 248 249 dir = filepath.Join(rootCloneDir, module.Prefix, module.Name) 250 } 251 252 args := []string{"apply", "--whitespace=fix"} 253 applyCmd := exec.Command("git", args...) 254 applyCmd.Stdout, applyCmd.Stderr, applyCmd.Dir = os.Stdout, os.Stderr, dir 255 applyCmd.Stdin = bytes.NewReader([]byte(patchPart.PatchSet.Patch)) 256 err := applyCmd.Run() 257 if err != nil { 258 return err 259 } 260 } 261 return nil 262 } 263 264 func fetchArtifacts(rc *APIClient, taskId string, rootDir string, shallow bool) error { 265 task, err := rc.GetTask(taskId) 266 if err != nil { 267 return errors.Wrapf(err, "problem getting task for %s", taskId) 268 } 269 if task == nil { 270 return errors.New("task not found") 271 } 272 273 urls, err := getUrlsChannel(rc, task, shallow) 274 if err != nil { 275 return errors.WithStack(err) 276 } 277 278 return errors.Wrapf(downloadUrls(rootDir, urls, 4), 279 "problem downloading artifacts for task %s", taskId) 280 } 281 282 // searchDependencies does a depth-first search of the dependencies of the "seed" task, returning 283 // a list of all tasks related to it in the dependency graph. It performs this by doing successive 284 // calls to the API to crawl the graph, keeping track of any already-processed tasks in the "found" 285 // map. 286 func searchDependencies(rc *APIClient, seed *service.RestTask, found map[string]bool) ([]*service.RestTask, error) { 287 out := []*service.RestTask{} 288 for _, dep := range seed.DependsOn { 289 if _, ok := found[dep.TaskId]; ok { 290 continue 291 } 292 t, err := rc.GetTask(dep.TaskId) 293 if err != nil { 294 return nil, err 295 } 296 if t != nil { 297 found[t.Id] = true 298 out = append(out, t) 299 more, err := searchDependencies(rc, t, found) 300 if err != nil { 301 return nil, err 302 } 303 out = append(out, more...) 304 for _, d := range more { 305 found[d.Id] = true 306 } 307 } 308 } 309 return out, nil 310 } 311 312 type artifactDownload struct { 313 url string 314 path string 315 } 316 317 func getArtifactFolderName(task *service.RestTask) string { 318 if task.Requester == evergreen.PatchVersionRequester { 319 return fmt.Sprintf("artifacts-patch-%v_%v_%v", task.PatchNumber, task.BuildVariant, task.DisplayName) 320 } 321 322 if len(task.Revision) >= 5 { 323 return fmt.Sprintf("artifacts-%v-%v_%v", task.Revision[0:6], task.BuildVariant, task.DisplayName) 324 } 325 return fmt.Sprintf("artifacts-%v_%v", task.BuildVariant, task.DisplayName) 326 } 327 328 // getUrlsChannel takes a seed task, and returns a channel that streams all of the artifacts 329 // associated with the task and its dependencies. If "shallow" is set, only artifacts from the seed 330 // task will be streamed. 331 func getUrlsChannel(rc *APIClient, seed *service.RestTask, shallow bool) (chan artifactDownload, error) { 332 allTasks := []*service.RestTask{seed} 333 if !shallow { 334 fmt.Printf("Gathering dependencies... ") 335 deps, err := searchDependencies(rc, seed, map[string]bool{}) 336 if err != nil { 337 return nil, err 338 } 339 allTasks = append(allTasks, deps...) 340 } 341 fmt.Printf("Done.\n") 342 343 urls := make(chan artifactDownload) 344 go func() { 345 for _, t := range allTasks { 346 for _, f := range t.Files { 347 directoryName := getArtifactFolderName(t) 348 urls <- artifactDownload{f.URL, directoryName} 349 } 350 } 351 close(urls) 352 }() 353 return urls, nil 354 } 355 356 func fileNameWithIndex(filename string, index int) string { 357 if index-1 == 0 { 358 return filename 359 } 360 parts := strings.Split(filename, ".") 361 // If the file has no extension, just append the number with _ 362 if len(parts) == 1 { 363 return fmt.Sprintf("%s_(%d)", filename, index-1) 364 } 365 // If the file has an extension, add _N (index) just before the extension. 366 return fmt.Sprintf("%s_(%d).%s", parts[0], index-1, strings.Join(parts[1:], ".")) 367 } 368 369 // downloadUrls pulls a set of artifacts from the given channel and downloads them, using up to 370 // the given number of workers in parallel. The given root directory determines the base location 371 // where all the artifact files will be downloaded to. 372 func downloadUrls(root string, urls chan artifactDownload, workers int) error { 373 if workers <= 0 { 374 panic("invalid workers count") 375 } 376 wg := sync.WaitGroup{} 377 errs := make(chan error) 378 wg.Add(workers) 379 380 // Keep track of filenames being downloaded, so that if there are collisions, we can detect 381 // and re-name the file to something else. 382 fileNamesUsed := struct { 383 nameCounts map[string]int 384 sync.Mutex 385 }{nameCounts: map[string]int{}} 386 387 for i := 0; i < workers; i++ { 388 go func(workerId int) { 389 defer wg.Done() 390 counter := 0 391 for u := range urls { 392 393 // Try to determinate the file location for the output. 394 folder := filepath.Join(root, u.path) 395 // As a backup plan in case we can't figure out the file name from the URL, 396 // the file name will just be named after the worker ID and file index. 397 justFile := fmt.Sprintf("%v_%v", workerId, counter) 398 parsedUrl, err := url.Parse(u.url) 399 if err == nil { 400 // under normal operation, the file name written to disk will match the name 401 // of the file in the URL. For instance, http://www.website.com/file.tgz 402 // will assume "file.tgz". 403 pathParts := strings.Split(parsedUrl.Path, "/") 404 if len(pathParts) >= 1 { 405 justFile = util.CleanForPath(pathParts[len(pathParts)-1]) 406 } 407 } 408 409 fileName := filepath.Join(folder, justFile) 410 fileNamesUsed.Lock() 411 for { 412 fileNamesUsed.nameCounts[fileName] += 1 413 testFileName := fileNameWithIndex(fileName, fileNamesUsed.nameCounts[fileName]) 414 _, err = os.Stat(testFileName) 415 if err != nil { 416 if os.IsNotExist(err) { 417 // we found a file name to safely create without collisions.. 418 fileName = testFileName 419 break 420 } 421 // something else went wrong. 422 errs <- errors.Errorf("failed to check if file exists: %v", err) 423 return 424 } 425 } 426 427 fileNamesUsed.Unlock() 428 429 err = os.MkdirAll(folder, 0777) 430 if err != nil { 431 errs <- errors.Errorf("Couldn't create output directory %v: %v", folder, err) 432 continue 433 } 434 435 out, err := os.Create(fileName) 436 if err != nil { 437 errs <- errors.Errorf("Couldn't download %v: %v", u.url, err) 438 continue 439 } 440 defer out.Close() 441 resp, err := http.Get(u.url) 442 if err != nil { 443 errs <- errors.Errorf("Couldn't download %v: %v", u.url, err) 444 continue 445 } 446 defer resp.Body.Close() 447 448 // If we can get the info, determine the file size so that the human can get an 449 // idea of how long the file might take to download. 450 // TODO: progress bars. 451 length, _ := strconv.Atoi(resp.Header.Get("Content-Length")) 452 sizeLog := "" 453 if length > 0 { 454 sizeLog = fmt.Sprintf(" (%s)", humanize.Bytes(uint64(length))) 455 } 456 457 justFile = filepath.Base(fileName) 458 fmt.Printf("(worker %v) Downloading %v to directory %s%s\n", workerId, justFile, u.path, sizeLog) 459 //sizeTracker := util.SizeTrackingReader{0, resp.Body} 460 _, err = io.Copy(out, resp.Body) 461 if err != nil { 462 errs <- errors.Errorf("Couldn't download %v: %v", u.url, err) 463 continue 464 } 465 counter++ 466 } 467 }(i) 468 } 469 470 done := make(chan struct{}) 471 var hasErrors error 472 go func() { 473 defer close(done) 474 for e := range errs { 475 hasErrors = errors.New("some files could not be downloaded successfully") 476 fmt.Println("error: ", e) 477 } 478 }() 479 wg.Wait() 480 close(errs) 481 <-done 482 483 return hasErrors 484 }