github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/cli/fetch.go (about)

     1  package cli
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"net/http"
     8  	"net/url"
     9  	"os"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  
    16  	humanize "github.com/dustin/go-humanize"
    17  	"github.com/evergreen-ci/evergreen"
    18  	"github.com/evergreen-ci/evergreen/model"
    19  	"github.com/evergreen-ci/evergreen/service"
    20  	"github.com/evergreen-ci/evergreen/util"
    21  	"github.com/pkg/errors"
    22  )
    23  
    24  const defaultCloneDepth = 500
    25  
    26  // FetchCommand is used to fetch the source or artifacts associated with a task.
    27  type FetchCommand struct {
    28  	GlobalOpts *Options `no-flag:"true"`
    29  
    30  	Source    bool   `long:"source" description:"clones the source for the given task"`
    31  	Artifacts bool   `long:"artifacts" description:"fetch artifacts for the task and all its recursive dependents"`
    32  	Shallow   bool   `long:"shallow" description:"don't recursively download artifacts from dependency tasks"`
    33  	NoPatch   bool   `long:"no-patch" description:"when using --source with a patch task, skip applying the patch"`
    34  	Dir       string `long:"dir" description:"root directory to fetch artifacts into. defaults to current working directory"`
    35  	TaskId    string `short:"t" long:"task" description:"task associated with the data to fetch" required:"true"`
    36  }
    37  
    38  // FetchCommand allows the user to download the artifacts for a task (and optionally its dependencies),
    39  // clone the source that a task was derived from, or both.
    40  func (fc *FetchCommand) Execute(_ []string) error {
    41  	ac, rc, _, err := getAPIClients(fc.GlobalOpts)
    42  	if err != nil {
    43  		return err
    44  	}
    45  	notifyUserUpdate(ac)
    46  
    47  	wd := fc.Dir
    48  	if len(wd) == 0 {
    49  		wd, err = os.Getwd()
    50  		if err != nil {
    51  			return err
    52  		}
    53  	}
    54  
    55  	if len(fc.TaskId) == 0 {
    56  		return errors.Errorf("must specify a task ID with -t.")
    57  	}
    58  
    59  	if !fc.Source && !fc.Artifacts {
    60  		return errors.New("must specify at least one of either --artifacts or --source.")
    61  	}
    62  	if fc.Source {
    63  		err = fetchSource(ac, rc, wd, fc.TaskId, fc.NoPatch)
    64  		if err != nil {
    65  			return err
    66  		}
    67  	}
    68  	if fc.Artifacts {
    69  		err = fetchArtifacts(rc, fc.TaskId, wd, fc.Shallow)
    70  		if err != nil {
    71  			return err
    72  		}
    73  	}
    74  	return nil
    75  }
    76  
    77  func fetchSource(ac, rc *APIClient, rootPath, taskId string, noPatch bool) error {
    78  	task, err := rc.GetTask(taskId)
    79  	if err != nil {
    80  		return err
    81  	}
    82  	if task == nil {
    83  		return errors.New("task not found.")
    84  	}
    85  
    86  	config, err := rc.GetConfig(task.Version)
    87  	if err != nil {
    88  		return err
    89  	}
    90  
    91  	project, err := ac.GetProjectRef(task.Project)
    92  	if err != nil {
    93  		return err
    94  	}
    95  
    96  	cloneDir := util.CleanForPath(fmt.Sprintf("source-%v", task.Project))
    97  	var patch *service.RestPatch
    98  	if task.Requester == evergreen.PatchVersionRequester {
    99  		cloneDir = util.CleanForPath(fmt.Sprintf("source-patch-%v_%v", task.PatchNumber, task.Project))
   100  		patch, err = rc.GetPatch(task.PatchId)
   101  		if err != nil {
   102  			return err
   103  		}
   104  	} else {
   105  		if len(task.Revision) >= 5 {
   106  			cloneDir = util.CleanForPath(fmt.Sprintf("source-%v-%v", task.Project, task.Revision[0:6]))
   107  		}
   108  	}
   109  	cloneDir = filepath.Join(rootPath, cloneDir)
   110  
   111  	err = cloneSource(task, project, config, cloneDir)
   112  	if err != nil {
   113  		return err
   114  	}
   115  	if patch != nil && !noPatch {
   116  		err = applyPatch(patch, cloneDir, config, config.FindBuildVariant(task.BuildVariant))
   117  		if err != nil {
   118  			return err
   119  		}
   120  	}
   121  
   122  	return nil
   123  }
   124  
   125  type cloneOptions struct {
   126  	repo     string
   127  	revision string
   128  	rootDir  string
   129  	depth    uint
   130  }
   131  
   132  func clone(opts cloneOptions, verbose bool) error {
   133  	// clone the repo first
   134  	cloneArgs := []string{"clone", opts.repo}
   135  	if opts.depth > 0 {
   136  		cloneArgs = append(cloneArgs, "--depth", fmt.Sprintf("%d", opts.depth))
   137  	}
   138  
   139  	cloneArgs = append(cloneArgs, opts.rootDir)
   140  	if verbose {
   141  		fmt.Println("Executing git", strings.Join(cloneArgs, " "))
   142  	}
   143  	c := exec.Command("git", cloneArgs...)
   144  	c.Stdout, c.Stderr = os.Stdout, os.Stderr
   145  	err := c.Run()
   146  	if err != nil {
   147  		return err
   148  	}
   149  
   150  	// try to check out the revision we want
   151  	checkoutArgs := []string{"checkout", opts.revision}
   152  	if verbose {
   153  		fmt.Println("Executing git", strings.Join(checkoutArgs, " "))
   154  	}
   155  	c = exec.Command("git", checkoutArgs...)
   156  	stdoutBuf, stderrBuf := &bytes.Buffer{}, &bytes.Buffer{}
   157  	c.Stdout = io.MultiWriter(os.Stdout, stdoutBuf)
   158  	c.Stderr = io.MultiWriter(os.Stderr, stderrBuf)
   159  	c.Dir = opts.rootDir
   160  	err = c.Run()
   161  	if err != nil {
   162  		if !bytes.Contains(stderrBuf.Bytes(), []byte("reference is not a tree:")) {
   163  			return err
   164  		}
   165  
   166  		// we have to go deeper
   167  		fetchArgs := []string{"fetch", "--unshallow"}
   168  		if verbose {
   169  			fmt.Println("Executing git", strings.Join(fetchArgs, " "))
   170  		}
   171  		c = exec.Command("git", fetchArgs...)
   172  		c.Stdout, c.Stderr, c.Dir = os.Stdout, os.Stderr, opts.rootDir
   173  		err = c.Run()
   174  		if err != nil {
   175  			return err
   176  		}
   177  		// now it's unshallow, so try again to check it out
   178  		checkoutRetryArgs := []string{"checkout", opts.revision}
   179  		if verbose {
   180  			fmt.Println("Executing git", strings.Join(checkoutRetryArgs, " "))
   181  		}
   182  		c = exec.Command("git", checkoutRetryArgs...)
   183  		c.Stdout, c.Stderr, c.Dir = os.Stdout, os.Stderr, opts.rootDir
   184  		return c.Run()
   185  	}
   186  	return nil
   187  }
   188  
   189  func cloneSource(task *service.RestTask, project *model.ProjectRef, config *model.Project, cloneDir string) error {
   190  	// Fetch the outermost repo for the task
   191  	err := clone(
   192  		cloneOptions{
   193  			repo:     fmt.Sprintf("git@github.com:%v/%v.git", project.Owner, project.Repo),
   194  			revision: task.Revision,
   195  			rootDir:  cloneDir,
   196  			depth:    defaultCloneDepth,
   197  		},
   198  		false,
   199  	)
   200  
   201  	if err != nil {
   202  		return err
   203  	}
   204  
   205  	// Then fetch each of the modules
   206  	variant := config.FindBuildVariant(task.BuildVariant)
   207  	if variant == nil {
   208  		return errors.Errorf("couldn't find build variant '%v' in config", task.BuildVariant)
   209  	}
   210  	for _, moduleName := range variant.Modules {
   211  		module, err := config.GetModuleByName(moduleName)
   212  		if err != nil || module == nil {
   213  			return errors.Errorf("variant refers to a module '%v' that doesn't exist.", moduleName)
   214  		}
   215  		moduleBase := filepath.Join(cloneDir, module.Prefix, module.Name)
   216  		fmt.Printf("Fetching module %v at %v\n", moduleName, module.Branch)
   217  		err = clone(cloneOptions{
   218  			repo:     module.Repo,
   219  			revision: module.Branch,
   220  			rootDir:  filepath.ToSlash(moduleBase),
   221  		}, false)
   222  		if err != nil {
   223  			return err
   224  		}
   225  	}
   226  	return nil
   227  }
   228  
   229  func applyPatch(patch *service.RestPatch, rootCloneDir string, conf *model.Project, variant *model.BuildVariant) error {
   230  	// patch sets and contain multiple patches, some of them for modules
   231  	for _, patchPart := range patch.Patches {
   232  		var dir string
   233  		if patchPart.ModuleName == "" {
   234  			// if patch is not part of a module, just apply patch against src root
   235  			dir = rootCloneDir
   236  		} else {
   237  			fmt.Println("Applying patches for module", patchPart.ModuleName)
   238  			// if patch is part of a module, apply patch in module root
   239  			module, err := conf.GetModuleByName(patchPart.ModuleName)
   240  			if err != nil || module == nil {
   241  				return errors.Errorf("can't find module %v: %v", patchPart.ModuleName, err)
   242  			}
   243  
   244  			// skip the module if this build variant does not use it
   245  			if !util.SliceContains(variant.Modules, module.Name) {
   246  				continue
   247  			}
   248  
   249  			dir = filepath.Join(rootCloneDir, module.Prefix, module.Name)
   250  		}
   251  
   252  		args := []string{"apply", "--whitespace=fix"}
   253  		applyCmd := exec.Command("git", args...)
   254  		applyCmd.Stdout, applyCmd.Stderr, applyCmd.Dir = os.Stdout, os.Stderr, dir
   255  		applyCmd.Stdin = bytes.NewReader([]byte(patchPart.PatchSet.Patch))
   256  		err := applyCmd.Run()
   257  		if err != nil {
   258  			return err
   259  		}
   260  	}
   261  	return nil
   262  }
   263  
   264  func fetchArtifacts(rc *APIClient, taskId string, rootDir string, shallow bool) error {
   265  	task, err := rc.GetTask(taskId)
   266  	if err != nil {
   267  		return errors.Wrapf(err, "problem getting task for %s", taskId)
   268  	}
   269  	if task == nil {
   270  		return errors.New("task not found")
   271  	}
   272  
   273  	urls, err := getUrlsChannel(rc, task, shallow)
   274  	if err != nil {
   275  		return errors.WithStack(err)
   276  	}
   277  
   278  	return errors.Wrapf(downloadUrls(rootDir, urls, 4),
   279  		"problem downloading artifacts for task %s", taskId)
   280  }
   281  
   282  // searchDependencies does a depth-first search of the dependencies of the "seed" task, returning
   283  // a list of all tasks related to it in the dependency graph. It performs this by doing successive
   284  // calls to the API to crawl the graph, keeping track of any already-processed tasks in the "found"
   285  // map.
   286  func searchDependencies(rc *APIClient, seed *service.RestTask, found map[string]bool) ([]*service.RestTask, error) {
   287  	out := []*service.RestTask{}
   288  	for _, dep := range seed.DependsOn {
   289  		if _, ok := found[dep.TaskId]; ok {
   290  			continue
   291  		}
   292  		t, err := rc.GetTask(dep.TaskId)
   293  		if err != nil {
   294  			return nil, err
   295  		}
   296  		if t != nil {
   297  			found[t.Id] = true
   298  			out = append(out, t)
   299  			more, err := searchDependencies(rc, t, found)
   300  			if err != nil {
   301  				return nil, err
   302  			}
   303  			out = append(out, more...)
   304  			for _, d := range more {
   305  				found[d.Id] = true
   306  			}
   307  		}
   308  	}
   309  	return out, nil
   310  }
   311  
   312  type artifactDownload struct {
   313  	url  string
   314  	path string
   315  }
   316  
   317  func getArtifactFolderName(task *service.RestTask) string {
   318  	if task.Requester == evergreen.PatchVersionRequester {
   319  		return fmt.Sprintf("artifacts-patch-%v_%v_%v", task.PatchNumber, task.BuildVariant, task.DisplayName)
   320  	}
   321  
   322  	if len(task.Revision) >= 5 {
   323  		return fmt.Sprintf("artifacts-%v-%v_%v", task.Revision[0:6], task.BuildVariant, task.DisplayName)
   324  	}
   325  	return fmt.Sprintf("artifacts-%v_%v", task.BuildVariant, task.DisplayName)
   326  }
   327  
   328  // getUrlsChannel takes a seed task, and returns a channel that streams all of the artifacts
   329  // associated with the task and its dependencies. If "shallow" is set, only artifacts from the seed
   330  // task will be streamed.
   331  func getUrlsChannel(rc *APIClient, seed *service.RestTask, shallow bool) (chan artifactDownload, error) {
   332  	allTasks := []*service.RestTask{seed}
   333  	if !shallow {
   334  		fmt.Printf("Gathering dependencies... ")
   335  		deps, err := searchDependencies(rc, seed, map[string]bool{})
   336  		if err != nil {
   337  			return nil, err
   338  		}
   339  		allTasks = append(allTasks, deps...)
   340  	}
   341  	fmt.Printf("Done.\n")
   342  
   343  	urls := make(chan artifactDownload)
   344  	go func() {
   345  		for _, t := range allTasks {
   346  			for _, f := range t.Files {
   347  				directoryName := getArtifactFolderName(t)
   348  				urls <- artifactDownload{f.URL, directoryName}
   349  			}
   350  		}
   351  		close(urls)
   352  	}()
   353  	return urls, nil
   354  }
   355  
   356  func fileNameWithIndex(filename string, index int) string {
   357  	if index-1 == 0 {
   358  		return filename
   359  	}
   360  	parts := strings.Split(filename, ".")
   361  	// If the file has no extension, just append the number with _
   362  	if len(parts) == 1 {
   363  		return fmt.Sprintf("%s_(%d)", filename, index-1)
   364  	}
   365  	// If the file has an extension, add _N (index) just before the extension.
   366  	return fmt.Sprintf("%s_(%d).%s", parts[0], index-1, strings.Join(parts[1:], "."))
   367  }
   368  
   369  // downloadUrls pulls a set of artifacts from the given channel and downloads them, using up to
   370  // the given number of workers in parallel. The given root directory determines the base location
   371  // where all the artifact files will be downloaded to.
   372  func downloadUrls(root string, urls chan artifactDownload, workers int) error {
   373  	if workers <= 0 {
   374  		panic("invalid workers count")
   375  	}
   376  	wg := sync.WaitGroup{}
   377  	errs := make(chan error)
   378  	wg.Add(workers)
   379  
   380  	// Keep track of filenames being downloaded, so that if there are collisions, we can detect
   381  	// and re-name the file to something else.
   382  	fileNamesUsed := struct {
   383  		nameCounts map[string]int
   384  		sync.Mutex
   385  	}{nameCounts: map[string]int{}}
   386  
   387  	for i := 0; i < workers; i++ {
   388  		go func(workerId int) {
   389  			defer wg.Done()
   390  			counter := 0
   391  			for u := range urls {
   392  
   393  				// Try to determinate the file location for the output.
   394  				folder := filepath.Join(root, u.path)
   395  				// As a backup plan in case we can't figure out the file name from the URL,
   396  				// the file name will just be named after the worker ID and file index.
   397  				justFile := fmt.Sprintf("%v_%v", workerId, counter)
   398  				parsedUrl, err := url.Parse(u.url)
   399  				if err == nil {
   400  					// under normal operation, the file name written to disk will match the name
   401  					// of the file in the URL. For instance, http://www.website.com/file.tgz
   402  					// will assume "file.tgz".
   403  					pathParts := strings.Split(parsedUrl.Path, "/")
   404  					if len(pathParts) >= 1 {
   405  						justFile = util.CleanForPath(pathParts[len(pathParts)-1])
   406  					}
   407  				}
   408  
   409  				fileName := filepath.Join(folder, justFile)
   410  				fileNamesUsed.Lock()
   411  				for {
   412  					fileNamesUsed.nameCounts[fileName] += 1
   413  					testFileName := fileNameWithIndex(fileName, fileNamesUsed.nameCounts[fileName])
   414  					_, err = os.Stat(testFileName)
   415  					if err != nil {
   416  						if os.IsNotExist(err) {
   417  							// we found a file name to safely create without collisions..
   418  							fileName = testFileName
   419  							break
   420  						}
   421  						// something else went wrong.
   422  						errs <- errors.Errorf("failed to check if file exists: %v", err)
   423  						return
   424  					}
   425  				}
   426  
   427  				fileNamesUsed.Unlock()
   428  
   429  				err = os.MkdirAll(folder, 0777)
   430  				if err != nil {
   431  					errs <- errors.Errorf("Couldn't create output directory %v: %v", folder, err)
   432  					continue
   433  				}
   434  
   435  				out, err := os.Create(fileName)
   436  				if err != nil {
   437  					errs <- errors.Errorf("Couldn't download %v: %v", u.url, err)
   438  					continue
   439  				}
   440  				defer out.Close()
   441  				resp, err := http.Get(u.url)
   442  				if err != nil {
   443  					errs <- errors.Errorf("Couldn't download %v: %v", u.url, err)
   444  					continue
   445  				}
   446  				defer resp.Body.Close()
   447  
   448  				// If we can get the info, determine the file size so that the human can get an
   449  				// idea of how long the file might take to download.
   450  				// TODO: progress bars.
   451  				length, _ := strconv.Atoi(resp.Header.Get("Content-Length"))
   452  				sizeLog := ""
   453  				if length > 0 {
   454  					sizeLog = fmt.Sprintf(" (%s)", humanize.Bytes(uint64(length)))
   455  				}
   456  
   457  				justFile = filepath.Base(fileName)
   458  				fmt.Printf("(worker %v) Downloading %v to directory %s%s\n", workerId, justFile, u.path, sizeLog)
   459  				//sizeTracker := util.SizeTrackingReader{0, resp.Body}
   460  				_, err = io.Copy(out, resp.Body)
   461  				if err != nil {
   462  					errs <- errors.Errorf("Couldn't download %v: %v", u.url, err)
   463  					continue
   464  				}
   465  				counter++
   466  			}
   467  		}(i)
   468  	}
   469  
   470  	done := make(chan struct{})
   471  	var hasErrors error
   472  	go func() {
   473  		defer close(done)
   474  		for e := range errs {
   475  			hasErrors = errors.New("some files could not be downloaded successfully")
   476  			fmt.Println("error: ", e)
   477  		}
   478  	}()
   479  	wg.Wait()
   480  	close(errs)
   481  	<-done
   482  
   483  	return hasErrors
   484  }