github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/experiments/internal/pkg/experiment/experiment.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package experiment is responsible for running experiments on GCE VMs.
    16  package experiment
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"os"
    22  	"path/filepath"
    23  	"strings"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/bazelbuild/reclient/experiments/internal/pkg/gcs"
    28  	"github.com/bazelbuild/reclient/experiments/internal/pkg/vm"
    29  
    30  	epb "github.com/bazelbuild/reclient/experiments/api/experiment"
    31  
    32  	"google.golang.org/protobuf/proto"
    33  
    34  	log "github.com/golang/glog"
    35  	"golang.org/x/sync/errgroup"
    36  
    37  	"github.com/bazelbuild/rules_go/go/tools/bazel"
    38  )
    39  
    40  const (
    41  	logFile     = "/tmp/exp.log"
    42  	timeFile    = "/tmp/time.txt"
    43  	trialFile   = "/tmp/trial.txt"
    44  	outDir      = "outputs"
    45  	reclientDir = "reclient"
    46  )
    47  
    48  var (
    49  	reclientBinaries = []string{
    50  		"cmd/bootstrap",
    51  		"cmd/dumpstats",
    52  		"cmd/reproxy",
    53  		"cmd/rewrapper",
    54  	}
    55  )
    56  
    57  // Experiment encapsulates information about an experiment and logic to run it.
    58  type Experiment struct {
    59  	date         string
    60  	resBucket    string
    61  	gcpProject   string
    62  	expPb        *epb.Experiment
    63  	baseDir      string
    64  	vms          map[string]*vm.VM
    65  	tmpDir       map[string]string
    66  	runConfigs   map[string]*epb.RunConfiguration
    67  	setupScripts map[string]string
    68  	buildScripts map[string]string
    69  	logFrequency int
    70  }
    71  
    72  // NewExperiment creates a new experiment using the experiment proto.
    73  func NewExperiment(expPb *epb.Experiment, baseDir string, gcpProject, date, resBucket string, logFrequency int) (*Experiment, error) {
    74  	if err := verify(expPb); err != nil {
    75  		return nil, fmt.Errorf("Experiment proto invalid: %v", err)
    76  	}
    77  	return &Experiment{
    78  		expPb:        expPb,
    79  		baseDir:      baseDir,
    80  		gcpProject:   gcpProject,
    81  		date:         date,
    82  		resBucket:    resBucket,
    83  		setupScripts: map[string]string{},
    84  		buildScripts: map[string]string{},
    85  		logFrequency: logFrequency,
    86  	}, nil
    87  }
    88  
    89  func verify(expPb *epb.Experiment) error {
    90  	// TODO: Check RC Names are unique
    91  	// TODO: Check that you are using the reclient setup command
    92  	return nil
    93  }
    94  
    95  func (e *Experiment) tryDownloadLogFiles(ctx context.Context) error {
    96  	return e.runOnConfigs(func(rc *epb.RunConfiguration) error {
    97  		v := e.vms[rc.GetName()]
    98  		log.Infof("Downloading exp.log from %v", v.Name())
    99  		v.CopyFilesFromVM(ctx, logFile, filepath.Join(e.tmpDir[rc.GetName()], "exp.log"))
   100  		return nil
   101  	})
   102  }
   103  
   104  // Run runs the experiment.
   105  func (e *Experiment) Run(ctx context.Context) (err error) {
   106  	defer func() {
   107  		if err == nil {
   108  			e.cleanup(ctx)
   109  		} else {
   110  			log.Errorf("Experiment failed! Resources were not cleaned for investigation. Please cleanup resources manually.")
   111  			log.Errorf(e.debugMessage(ctx))
   112  			e.tryDownloadLogFiles(ctx)
   113  		}
   114  	}()
   115  
   116  	if err := e.prepRCs(ctx); err != nil {
   117  		return err
   118  	}
   119  	if err := e.startVMs(ctx); err != nil {
   120  		return err
   121  	}
   122  	if err := e.copyInputs(ctx); err != nil {
   123  		return err
   124  	}
   125  	if err := e.downloadReclient(ctx); err != nil {
   126  		return err
   127  	}
   128  	if err := e.runSetup(ctx); err != nil {
   129  		return err
   130  	}
   131  	for i := 0; i < int(e.expPb.NumTrials); i++ {
   132  		log.Infof("Starting Trial %v", i)
   133  		if err := e.runBuilds(ctx); err != nil {
   134  			return err
   135  		}
   136  		if err := e.downloadOutputs(ctx, i); err != nil {
   137  			return err
   138  		}
   139  		if err := e.tearDown(ctx); err != nil {
   140  			return err
   141  		}
   142  		if err := e.cleanOutDirs(ctx); err != nil {
   143  			return err
   144  		}
   145  	}
   146  	return nil
   147  }
   148  
   149  func (e *Experiment) cleanup(ctx context.Context) {
   150  	g := new(errgroup.Group)
   151  	for _, v := range e.vms {
   152  		v := v
   153  		g.Go(func() error {
   154  			_, err := v.Delete(ctx, false)
   155  			return err
   156  		})
   157  	}
   158  	g.Wait()
   159  
   160  	for _, d := range e.tmpDir {
   161  		os.RemoveAll(d)
   162  	}
   163  }
   164  
   165  func (e *Experiment) cleanOutDirs(ctx context.Context) error {
   166  	for _, d := range e.tmpDir {
   167  		od := filepath.Join(d, outDir)
   168  		if err := os.RemoveAll(od); err != nil {
   169  			return err
   170  		}
   171  		// Recreate the directory for future outputs.
   172  		if err := os.Mkdir(od, 0777); err != nil {
   173  			return err
   174  		}
   175  	}
   176  	return nil
   177  }
   178  
   179  func (e *Experiment) prepRCs(ctx context.Context) error {
   180  	e.tmpDir = make(map[string]string)
   181  	e.runConfigs = make(map[string]*epb.RunConfiguration)
   182  	for _, rc := range e.expPb.GetRunConfigurations() {
   183  		if rc.NumMachines == 0 {
   184  			rc.NumMachines = 1
   185  		}
   186  
   187  		for i := uint32(0); i < rc.GetNumMachines(); i++ {
   188  			dest, err := os.MkdirTemp("", e.expPb.GetName())
   189  			if err != nil {
   190  				return err
   191  			}
   192  
   193  			rcCopy := proto.Clone(rc).(*epb.RunConfiguration)
   194  			rcCopy.Name += fmt.Sprintf("-%d", i)
   195  
   196  			rc := applyConfig(e.expPb.GetBaseConfiguration(), rcCopy)
   197  			e.runConfigs[rc.GetName()] = rc
   198  			e.tmpDir[rc.GetName()] = dest
   199  			if err := os.Mkdir(filepath.Join(e.tmpDir[rc.GetName()], outDir), 0777); err != nil {
   200  				return err
   201  			}
   202  			if err := os.Mkdir(filepath.Join(e.tmpDir[rc.GetName()], reclientDir), 0777); err != nil {
   203  				return err
   204  			}
   205  		}
   206  	}
   207  	return nil
   208  }
   209  
   210  func (e *Experiment) debugMessage(ctx context.Context) string {
   211  	debugMsg := ""
   212  	delCmds := ""
   213  
   214  	for name, vm := range e.vms {
   215  		tmp, ok := e.tmpDir[name]
   216  		if !ok {
   217  			tmp = "MISSING_TMP_DIR_NAME"
   218  		}
   219  
   220  		debugMsg += fmt.Sprintf("Experiment %s ran at VM %s. Local temp folder at %s.\n", name, vm.Name(), tmp)
   221  		vmDelCommand, _ := vm.Delete(ctx, true)
   222  		delCmds += fmt.Sprintf("$ %s\n", vmDelCommand)
   223  	}
   224  	debugMsg += fmt.Sprintf("There may be partial results at gs://%v/%v_%v/\n", e.resBucket, e.expPb.GetName(), e.date)
   225  	debugMsg += fmt.Sprintf("Delete the VMs running:\n")
   226  	debugMsg += delCmds
   227  
   228  	return debugMsg
   229  }
   230  
   231  func (e *Experiment) runOnConfigs(f func(rc *epb.RunConfiguration) error) error {
   232  	g := new(errgroup.Group)
   233  	for _, rc := range e.runConfigs {
   234  		rc := rc
   235  		g.Go(func() error {
   236  			return f(rc)
   237  		})
   238  	}
   239  	return g.Wait()
   240  }
   241  
   242  func (e *Experiment) startVMs(ctx context.Context) error {
   243  	e.vms = make(map[string]*vm.VM)
   244  	var lock sync.Mutex
   245  	return e.runOnConfigs(func(rc *epb.RunConfiguration) error {
   246  		v := vm.NewMachine(e.rcID(rc), e.gcpProject, rc.GetMachineSettings())
   247  		if v == nil {
   248  			log.Fatal("Could not create machine based on configuration settings")
   249  		}
   250  		lock.Lock()
   251  		e.vms[rc.GetName()] = v
   252  		lock.Unlock()
   253  		if err := v.CreateWithDisk(ctx); err != nil {
   254  			return err
   255  		}
   256  		if err := v.Mount(ctx); err != nil {
   257  			return err
   258  		}
   259  		return nil
   260  	})
   261  }
   262  
   263  func (e *Experiment) copyInputs(ctx context.Context) error {
   264  	return e.runOnConfigs(func(rc *epb.RunConfiguration) error {
   265  		v, ok := e.vms[rc.GetName()]
   266  		if !ok {
   267  			return fmt.Errorf("could not find VM for run configuration %v", rc.GetName())
   268  		}
   269  
   270  		for _, input := range rc.GetInputs() {
   271  			src := filepath.Join(e.baseDir, input.Source)
   272  
   273  			if input.GetVmDirect() {
   274  				if err := v.CopyFilesToVM(ctx, src, input.Destination); err != nil {
   275  					return err
   276  				}
   277  			} else {
   278  				base := filepath.Base(input.Destination)
   279  				gcsDest := fmt.Sprintf("gs://%v/%v_%v/%v/inputs/%v", e.resBucket, e.expPb.GetName(), e.date, rc.GetName(), base)
   280  				gcs.Copy(ctx, src, gcsDest)
   281  
   282  				var ok bool
   283  				var err error
   284  				// gsutil sometimes is reported as non-existent - no idea why that happens.
   285  				// Heuristically, retrying fixes this problem.
   286  				for i := 0; i < 5; i++ {
   287  					var oe string
   288  					if oe, err = v.RunCommand(ctx, &epb.Command{
   289  						Args: []string{"gsutil", "cp", gcsDest, input.Destination},
   290  					}); err == nil {
   291  						ok = true
   292  						break
   293  					}
   294  					log.Errorf("Failed to download inputs in remote machine %v: %v <Output: %v> --- Retrying...", rc.Name, err, oe)
   295  				}
   296  				if !ok {
   297  					return fmt.Errorf("Error downloading input on remote machine: %v", err)
   298  				}
   299  
   300  				if oe, err := v.RunCommand(ctx, &epb.Command{
   301  					Args: []string{"chmod", "+755", input.Destination},
   302  				}); err != nil {
   303  					return fmt.Errorf("Error setting permissions on input: %v <Output: %v>", err, oe)
   304  				}
   305  			}
   306  		}
   307  
   308  		return nil
   309  	})
   310  }
   311  
   312  func (e *Experiment) runSetup(ctx context.Context) error {
   313  	log.Infof("Will run setup scripts")
   314  	for _, rc := range e.runConfigs {
   315  		s, ok := e.setupScripts[rc.GetName()]
   316  		if !ok {
   317  			var err error
   318  			s, err = e.setupScript(rc)
   319  			if err != nil {
   320  				return err
   321  			}
   322  			e.setupScripts[rc.GetName()] = s
   323  		}
   324  	}
   325  	defer log.Infof("Finished setup scripts")
   326  	return e.runScripts(ctx, e.setupScripts)
   327  }
   328  
   329  func (e *Experiment) runBuilds(ctx context.Context) error {
   330  	log.Infof("Will run build scripts")
   331  	for _, rc := range e.runConfigs {
   332  		s, ok := e.buildScripts[rc.GetName()]
   333  		if !ok {
   334  			var err error
   335  			s, err = e.buildScript(rc)
   336  			if err != nil {
   337  				return err
   338  			}
   339  			e.buildScripts[rc.GetName()] = s
   340  		}
   341  	}
   342  	defer log.Infof("Finished build scripts")
   343  	return e.runScripts(ctx, e.buildScripts)
   344  }
   345  
   346  func (e *Experiment) runScripts(ctx context.Context, scripts map[string]string) error {
   347  	return e.runOnConfigs(func(rc *epb.RunConfiguration) error {
   348  		script := scripts[rc.GetName()]
   349  		base := filepath.Base(script)
   350  		v, ok := e.vms[rc.GetName()]
   351  		if !ok {
   352  			return fmt.Errorf("could not find VM for run configuration %v", rc.GetName())
   353  		}
   354  		if err := v.CopyFilesToVM(ctx, script, base); err != nil {
   355  			return err
   356  		}
   357  		if _, err := v.RunCommand(ctx, &epb.Command{
   358  			Args: []string{
   359  				"chmod +x " + base,
   360  			}}); err != nil {
   361  			return err
   362  		}
   363  		cCtx, cancel := context.WithCancel(ctx)
   364  		defer cancel()
   365  
   366  		if e.logFrequency < 0 {
   367  			if v.IsVM() {
   368  				e.logFrequency = 5
   369  			} else {
   370  				e.logFrequency = 0
   371  			}
   372  		}
   373  		if e.logFrequency > 0 {
   374  			go func() {
   375  				readOE := ""
   376  				ticker := time.NewTicker(time.Duration(e.logFrequency) * time.Second)
   377  				for {
   378  					select {
   379  					case <-ticker.C:
   380  						if oe, err := v.RunCommand(cCtx, &epb.Command{Args: []string{"cat", logFile}}); err == nil {
   381  							delta := strings.TrimPrefix(oe, readOE)
   382  							for _, l := range strings.Split(delta, "\n") {
   383  								if l == "" {
   384  									continue
   385  								}
   386  								log.V(2).Infof("%v: %v", rc.GetName(), l)
   387  							}
   388  							readOE = oe
   389  						}
   390  					case <-cCtx.Done():
   391  						break
   392  					}
   393  				}
   394  			}()
   395  		}
   396  		_, err := v.RunCommand(ctx, &epb.Command{
   397  			Args: []string{
   398  				"./" + base,
   399  			},
   400  		})
   401  		return err
   402  	})
   403  }
   404  
   405  func (e *Experiment) downloadReclient(ctx context.Context) error {
   406  	return e.runOnConfigs(func(rc *epb.RunConfiguration) error {
   407  		path := rc.GetReclientBinPath()
   408  		if path == "" {
   409  			return nil
   410  		}
   411  		dest := filepath.Join(e.tmpDir[rc.GetName()], reclientDir)
   412  		if err := e.copyReclientBinaries(ctx, path, dest); err != nil {
   413  			return err
   414  		}
   415  		v, ok := e.vms[rc.GetName()]
   416  		if !ok {
   417  			return fmt.Errorf("could not find VM for run configuration %v", rc.GetName())
   418  		}
   419  		if _, err := v.RunCommand(ctx, &epb.Command{
   420  			Args: []string{
   421  				fmt.Sprintf(v.Sudo("rm -rf %v")+" && "+v.Sudo("mkdir %v")+" && "+v.Sudo("chmod -R 777 %v"),
   422  					rc.ReclientDestination, rc.ReclientDestination, rc.ReclientDestination),
   423  			}}); err != nil {
   424  			return err
   425  		}
   426  		if err := v.CopyFilesToVM(ctx, dest+"/*", rc.ReclientDestination); err != nil {
   427  			return err
   428  		}
   429  		// Set 777 permissions after files are created
   430  		if _, err := v.RunCommand(ctx, &epb.Command{
   431  			Args: []string{
   432  				v.Sudo("chmod -R 777 " + rc.ReclientDestination),
   433  			}}); err != nil {
   434  			return err
   435  		}
   436  		return nil
   437  	})
   438  }
   439  
   440  func (e *Experiment) copyReclientBinaries(ctx context.Context, path, dest string) error {
   441  	switch path {
   442  	case "local":
   443  		reclientBinaryPaths := []string{}
   444  		for _, bl := range reclientBinaries {
   445  			binPath, ok := bazel.FindBinary(bl, strings.Split(bl, "/")[1])
   446  			if !ok {
   447  				return fmt.Errorf("Reclient binary %v not found", bl)
   448  			}
   449  			reclientBinaryPaths = append(reclientBinaryPaths, binPath)
   450  		}
   451  		for _, p := range reclientBinaryPaths {
   452  			if err := gcs.Copy(ctx, p, dest); err != nil {
   453  				return err
   454  			}
   455  		}
   456  	default:
   457  		if !strings.HasPrefix(path, "gs://") {
   458  			path = fmt.Sprintf("gs://%v", path)
   459  		}
   460  		if !strings.HasSuffix(path, "/*") {
   461  			path = fmt.Sprintf("%v/*", path)
   462  		}
   463  		if err := gcs.Copy(ctx, path, dest); err != nil {
   464  			return err
   465  		}
   466  	}
   467  	return nil
   468  }
   469  
   470  func exportEnv(key, value string) string {
   471  	return fmt.Sprintf("export %v=%v\n", key, value) +
   472  		fmt.Sprintf("echo %v=${%v} >> %v\n", key, key, logFile)
   473  }
   474  
   475  func (e *Experiment) exportEnvironmentVariables(rc *epb.RunConfiguration, sb *strings.Builder) {
   476  	sb.WriteString(exportEnv("EXPNAME", e.expPb.GetName()))
   477  	sb.WriteString(exportEnv("RCNAME", rc.GetName()))
   478  	sb.WriteString(exportEnv("RECLIENT_BIN_PATH", rc.GetReclientBinPath()))
   479  	sb.WriteString(exportEnv("RECLIENT_DESTINATION", rc.GetReclientDestination()))
   480  	for _, environment := range rc.GetEnvironment() {
   481  		sb.WriteString(exportEnv(environment.GetKey(), environment.GetValue()))
   482  	}
   483  }
   484  
   485  func (e *Experiment) setupScript(rc *epb.RunConfiguration) (string, error) {
   486  	var sb strings.Builder
   487  	sb.WriteString("#!/bin/bash\n")
   488  	sb.WriteString("set -e\n")
   489  	sb.WriteString(fmt.Sprintf("touch %v\n", logFile))
   490  	sb.WriteString(fmt.Sprintf("chmod 666 %v\n", logFile))
   491  	sb.WriteString(fmt.Sprintf("touch %v\n", timeFile))
   492  	sb.WriteString(fmt.Sprintf("chmod 666 %v\n", timeFile))
   493  	sb.WriteString(fmt.Sprintf("echo 0 > %v\n", trialFile))
   494  	sb.WriteString(fmt.Sprintf("chmod 666 %v\n", trialFile))
   495  
   496  	e.exportEnvironmentVariables(rc, &sb)
   497  
   498  	hasSu := false
   499  	for _, cmd := range rc.GetSetupCommands() {
   500  		for _, a := range cmd.Args {
   501  			if a == "su" {
   502  				hasSu = true
   503  				cmd.Args = append(cmd.Args, "<<'EOSU'")
   504  				break
   505  			}
   506  		}
   507  		sb.WriteString(fmt.Sprintf("echo \"`date`: Will run: %v\" >> %v\n", strings.Join(cmd.Args, " "), logFile))
   508  		sb.WriteString(strings.Join(cmd.Args, " ") + "\n")
   509  		sb.WriteString(fmt.Sprintf("echo \"`date`: Finished: %v\" >> %v\n", strings.Join(cmd.Args, " "), logFile))
   510  	}
   511  	if hasSu {
   512  		sb.WriteString("EOSU\n")
   513  	}
   514  	tmpfile, err := os.CreateTemp(e.tmpDir[rc.GetName()], "setup-*.sh")
   515  	if err != nil {
   516  		return "", err
   517  	}
   518  	if _, err := tmpfile.Write([]byte(sb.String())); err != nil {
   519  		return "", err
   520  	}
   521  	if err := tmpfile.Close(); err != nil {
   522  		return "", err
   523  	}
   524  	log.Infof("Wrote script for %v to %v", rc.GetName(), tmpfile.Name())
   525  	return tmpfile.Name(), nil
   526  }
   527  
   528  func (e *Experiment) buildScript(rc *epb.RunConfiguration) (string, error) {
   529  	var sb strings.Builder
   530  	sb.WriteString("#!/bin/bash\n")
   531  	sb.WriteString("set -e\n")
   532  	sb.WriteString(fmt.Sprintf("touch %v\n", logFile))
   533  	sb.WriteString(fmt.Sprintf("chmod 666 %v\n", logFile))
   534  	sb.WriteString(fmt.Sprintf("touch %v\n", timeFile))
   535  	sb.WriteString(fmt.Sprintf("chmod 666 %v\n", timeFile))
   536  
   537  	e.exportEnvironmentVariables(rc, &sb)
   538  
   539  	sb.WriteString(fmt.Sprintf("trial=$(cat %v)\n", trialFile))
   540  	sb.WriteString(exportEnv("TRIAL", "${trial}"))
   541  	sb.WriteString(fmt.Sprint("trial=$((trial + 1))\n"))
   542  	sb.WriteString(fmt.Sprintf("echo ${trial} > %v\n", trialFile))
   543  
   544  	hasSu := false
   545  	for _, cmd := range rc.GetPreBuildCommands() {
   546  		toAppend := ""
   547  		for _, a := range cmd.Args {
   548  			if a == "su" {
   549  				hasSu = true
   550  				toAppend = "<<'EOSU'"
   551  				break
   552  			}
   553  		}
   554  		if toAppend == "" {
   555  			toAppend = " >> " + logFile + " 2>&1 "
   556  		}
   557  		sb.WriteString(fmt.Sprintf("echo \"`date`: Will run: %v\" >> %v\n", strings.Join(cmd.Args, " "), logFile))
   558  		sb.WriteString(strings.Join(cmd.Args, " ") + toAppend + "\n")
   559  		sb.WriteString(fmt.Sprintf("echo \"`date`: Finished: %v\" >> %v\n", strings.Join(cmd.Args, " "), logFile))
   560  	}
   561  	sb.WriteString("START=`date +%s`\n")
   562  	sb.WriteString(fmt.Sprintf("echo \"`date`: Will run: %v\" >> %v\n", strings.Join(rc.BuildCommand.Args, " "), logFile))
   563  	sb.WriteString(strings.Join(rc.BuildCommand.Args, " ") + " >> " + logFile + " 2>&1 " + "\n")
   564  	sb.WriteString(fmt.Sprintf("echo \"`date`: Finished: %v\" >> %v\n", strings.Join(rc.BuildCommand.Args, " "), logFile))
   565  	sb.WriteString("END=`date +%s`\n")
   566  	sb.WriteString(fmt.Sprintf("echo \"$((($END-$START)))s\" > %v\n", timeFile))
   567  
   568  	for _, cmd := range rc.GetPostBuildCommands() {
   569  		toAppend := ""
   570  		for _, a := range cmd.Args {
   571  			if a == "su" {
   572  				hasSu = true
   573  				toAppend = "<<'EOSU'"
   574  				break
   575  			}
   576  		}
   577  		if toAppend == "" {
   578  			toAppend = " >> " + logFile + " 2>&1 "
   579  		}
   580  		sb.WriteString(fmt.Sprintf("echo \"`date`: Will run: %v\" >> %v\n", strings.Join(cmd.Args, " "), logFile))
   581  		sb.WriteString(strings.Join(cmd.Args, " ") + toAppend + "\n")
   582  		sb.WriteString(fmt.Sprintf("echo \"`date`: Finished: %v\" >> %v\n", strings.Join(cmd.Args, " "), logFile))
   583  	}
   584  
   585  	if hasSu {
   586  		sb.WriteString("EOSU\n")
   587  	}
   588  	tmpfile, err := os.CreateTemp(e.tmpDir[rc.GetName()], "build-*.sh")
   589  	if err != nil {
   590  		return "", err
   591  	}
   592  	if _, err := tmpfile.Write([]byte(sb.String())); err != nil {
   593  		return "", err
   594  	}
   595  	if err := tmpfile.Close(); err != nil {
   596  		return "", err
   597  	}
   598  	log.Infof("Wrote script for %v to %v", rc.GetName(), tmpfile.Name())
   599  	return tmpfile.Name(), nil
   600  }
   601  
   602  func (e *Experiment) downloadOutputs(ctx context.Context, trial int) error {
   603  	return e.runOnConfigs(func(rc *epb.RunConfiguration) error {
   604  		v, ok := e.vms[rc.GetName()]
   605  		if !ok {
   606  			return fmt.Errorf("could not find VM for run configuration %v", rc.GetName())
   607  		}
   608  		for _, o := range rc.GetOutputs() {
   609  			if err := v.CopyFilesFromVM(ctx, o, filepath.Join(e.tmpDir[rc.GetName()], outDir)+"/"); err != nil {
   610  				log.Warningf("Couldn't copy file %v: %v", o, err)
   611  			}
   612  		}
   613  		if err := v.CopyFilesFromVM(ctx, timeFile, filepath.Join(e.tmpDir[rc.GetName()], outDir)+"/"); err != nil {
   614  			return err
   615  		}
   616  		if err := v.CopyFilesFromVM(ctx, logFile, filepath.Join(e.tmpDir[rc.GetName()], outDir)+"/"); err != nil {
   617  			return err
   618  		}
   619  		dest := fmt.Sprintf("gs://%v/%v_%v/%v/%v", e.resBucket, e.expPb.GetName(), e.date, rc.GetName(), trial)
   620  		if err := gcs.Copy(ctx, filepath.Join(e.tmpDir[rc.GetName()], outDir)+"/*", dest); err != nil {
   621  			return err
   622  		}
   623  		return nil
   624  	})
   625  }
   626  
   627  func (e *Experiment) tearDown(ctx context.Context) error {
   628  	log.Infof("Will run teardown")
   629  	defer log.Infof("Finished teardown")
   630  	return e.runOnConfigs(func(rc *epb.RunConfiguration) error {
   631  		v, ok := e.vms[rc.GetName()]
   632  		if !ok {
   633  			return fmt.Errorf("could not find VM for run configuration %v", rc.GetName())
   634  		}
   635  		if _, err := v.RunCommand(ctx, &epb.Command{
   636  			Args: []string{
   637  				v.Sudo("rm"), logFile, "&&", v.Sudo("rm"), timeFile,
   638  			}}); err != nil {
   639  			return err
   640  		}
   641  		for _, cmd := range rc.GetTeardownCommands() {
   642  			if _, err := v.RunCommand(ctx, cmd); err != nil {
   643  				return err
   644  			}
   645  		}
   646  		return nil
   647  	})
   648  }
   649  
   650  func (e *Experiment) rcID(rc *epb.RunConfiguration) string {
   651  	return fmt.Sprintf("%v-%v-%v", e.expPb.GetName(), e.date, rc.GetName())
   652  }
   653  
   654  func applyConfig(base, run *epb.RunConfiguration) *epb.RunConfiguration {
   655  	c := proto.Clone(base).(*epb.RunConfiguration)
   656  	c.Name = run.GetName()
   657  
   658  	vm.MergeSettings(c.GetMachineSettings(), run.GetMachineSettings())
   659  
   660  	c.ReclientBinPath = mergeVal(c.ReclientBinPath, run.GetReclientBinPath())
   661  	c.ReclientDestination = mergeVal(c.ReclientDestination, run.GetReclientDestination())
   662  	c.NumMachines = mergeValUint32(c.NumMachines, run.GetNumMachines())
   663  	c.Inputs = append(c.Inputs, run.GetInputs()...)
   664  	c.SetupCommands = append(c.SetupCommands, run.GetSetupCommands()...)
   665  	c.PreBuildCommands = append(c.PreBuildCommands, run.GetPreBuildCommands()...)
   666  	c.PostBuildCommands = append(c.PostBuildCommands, run.GetPostBuildCommands()...)
   667  	c.Environment = mergeEnvironments(c.GetEnvironment(), run.GetEnvironment())
   668  	if run.GetBuildCommand() != nil {
   669  		c.BuildCommand = run.GetBuildCommand()
   670  	}
   671  	c.Outputs = append(c.Outputs, run.GetOutputs()...)
   672  	c.TeardownCommands = append(c.TeardownCommands, run.GetTeardownCommands()...)
   673  	return c
   674  }
   675  
   676  func mergeVal(base, v string) string {
   677  	if v == "" {
   678  		return base
   679  	}
   680  	return v
   681  }
   682  
   683  func mergeValEnum(base, v int32) int32 {
   684  	if v == 0 {
   685  		return base
   686  	}
   687  	return v
   688  }
   689  
   690  func mergeValUint32(base, v uint32) uint32 {
   691  	if v == 0 {
   692  		return base
   693  	}
   694  	return v
   695  }
   696  
   697  func mergeEnvironments(base, run []*epb.Environment) []*epb.Environment {
   698  	if run == nil {
   699  		return base
   700  	} else if base == nil {
   701  		return run
   702  	}
   703  	var e []*epb.Environment
   704  	for _, baseEnv := range base {
   705  		e = append(e, proto.Clone(baseEnv).(*epb.Environment))
   706  	}
   707  	for _, runEnv := range run {
   708  		found := false
   709  		for _, baseEnv := range e {
   710  			if baseEnv.GetKey() == runEnv.GetKey() {
   711  				found = true
   712  				baseEnv.Value = runEnv.GetValue()
   713  			}
   714  
   715  		}
   716  		if !found {
   717  			e = append(e, runEnv)
   718  		}
   719  	}
   720  	return e
   721  }