gopkg.in/dedis/onet.v2@v2.0.0-20181115163211-c8f3724038a7/simul/platform/deterlab.go (about)

     1  // Deterlab is responsible for setting up everything to test the application
     2  // on deterlab.net
     3  // Given a list of hostnames, it will create an overlay
     4  // tree topology, using all but the last node. It will create multiple
     5  // nodes per server and run timestamping processes. The last node is
     6  // reserved for the logging server, which is forwarded to localhost:8081
     7  //
     8  // Creates the following directory structure:
     9  // build/ - where all cross-compiled executables are stored
    10  // remote/ - directory to be copied to the deterlab server
    11  //
    12  // The following apps are used:
    13  //   deter - runs on the user-machine in deterlab and launches the others
    14  //   forkexec - runs on the other servers and launches the app, so it can measure its cpu usage
    15  
    16  package platform
    17  
    18  import (
    19  	"bufio"
    20  	"errors"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"os"
    24  	"os/exec"
    25  	"os/user"
    26  	"path"
    27  	"path/filepath"
    28  	"runtime"
    29  	"strconv"
    30  	"strings"
    31  	"sync"
    32  	"time"
    33  
    34  	"github.com/BurntSushi/toml"
    35  	"gopkg.in/dedis/onet.v2"
    36  	"gopkg.in/dedis/onet.v2/app"
    37  	"gopkg.in/dedis/onet.v2/log"
    38  )
    39  
    40  // Deterlab holds all fields necessary for a Deterlab-run
    41  type Deterlab struct {
    42  	// *** Deterlab-related configuration
    43  	// The login on the platform
    44  	Login string
    45  	// The outside host on the platform
    46  	Host string
    47  	// The name of the project
    48  	Project string
    49  	// Name of the Experiment - also name of hosts
    50  	Experiment string
    51  	// Directory holding the simulation-main file
    52  	simulDir string
    53  	// Directory where the deterlab-users-file is held
    54  	usersDir string
    55  	// Directory where everything is copied into
    56  	deployDir string
    57  	// Directory for building
    58  	buildDir string
    59  	// Directory holding all go-files of onet/simul/platform
    60  	platformDir string
    61  	// DNS-resolvable names
    62  	Phys []string
    63  	// VLAN-IP names (physical machines)
    64  	Virt []string
    65  	// Channel to communication stopping of experiment
    66  	sshDeter chan string
    67  	// Whether the simulation is started
    68  	started bool
    69  
    70  	// ProxyAddress : the proxy will redirect every traffic it
    71  	// receives to this address
    72  	ProxyAddress string
    73  	// MonitorAddress is the address given to clients to connect to the monitor
    74  	// It is actually the Proxy that will listen to that address and clients
    75  	// won't know a thing about it
    76  	MonitorAddress string
    77  	// Port number of the monitor and the proxy
    78  	MonitorPort int
    79  
    80  	// Number of available servers
    81  	Servers int
    82  	// Name of the simulation
    83  	Simulation string
    84  	// Number of machines
    85  	Hosts int
    86  	// Debugging-level: 0 is none - 5 is everything
    87  	Debug int
    88  	// RunWait for long simulations
    89  	RunWait string
    90  	// suite used for the simulation
    91  	Suite string
    92  	// PreScript defines a script that is run before the simulation
    93  	PreScript string
    94  	// Tags to use when compiling
    95  	Tags string
    96  }
    97  
    98  var simulConfig *onet.SimulationConfig
    99  
   100  // Configure initialises the directories and loads the saved config
   101  // for Deterlab
   102  func (d *Deterlab) Configure(pc *Config) {
   103  	// Directory setup - would also be possible in /tmp
   104  	pwd, _ := os.Getwd()
   105  	d.Suite = pc.Suite
   106  	d.simulDir = pwd
   107  	d.deployDir = pwd + "/deploy"
   108  	d.buildDir = pwd + "/build"
   109  	_, file, _, _ := runtime.Caller(0)
   110  	d.platformDir = path.Dir(file)
   111  	os.RemoveAll(d.deployDir)
   112  	os.Mkdir(d.deployDir, 0770)
   113  	os.Mkdir(d.buildDir, 0770)
   114  	d.MonitorPort = pc.MonitorPort
   115  	log.Lvl3("Dirs are:", pwd, d.deployDir)
   116  	d.loadAndCheckDeterlabVars()
   117  
   118  	d.Debug = pc.Debug
   119  	if d.Simulation == "" {
   120  		log.Fatal("No simulation defined in runconfig")
   121  	}
   122  
   123  	// Setting up channel
   124  	d.sshDeter = make(chan string)
   125  }
   126  
   127  type pkg struct {
   128  	name      string
   129  	processor string
   130  	system    string
   131  	path      string
   132  }
   133  
   134  // Build prepares all binaries for the Deterlab-simulation.
   135  // If 'build' is empty, all binaries are created, else only
   136  // the ones indicated. Either "simul" or "users"
   137  func (d *Deterlab) Build(build string, arg ...string) error {
   138  	log.Lvl1("Building for", d.Login, d.Host, d.Project, build, "simulDir=", d.simulDir)
   139  	start := time.Now()
   140  
   141  	var wg sync.WaitGroup
   142  
   143  	if err := os.RemoveAll(d.buildDir); err != nil {
   144  		return err
   145  	}
   146  	if err := os.Mkdir(d.buildDir, 0777); err != nil {
   147  		return err
   148  	}
   149  
   150  	// start building the necessary binaries - it's always the same,
   151  	// but built for another architecture.
   152  	packages := []pkg{
   153  		{"simul", "amd64", "linux", d.simulDir},
   154  		{"users", "386", "freebsd", path.Join(d.platformDir, "deterlab_users")},
   155  	}
   156  	if build == "" {
   157  		build = "simul,users"
   158  	}
   159  	var tags []string
   160  	if d.Tags != "" {
   161  		tags = append([]string{"-tags"}, strings.Split(d.Tags, " ")...)
   162  	}
   163  	log.Lvl3("Starting to build all executables", packages)
   164  	for _, p := range packages {
   165  		if !strings.Contains(build, p.name) {
   166  			log.Lvl2("Skipping build of", p.name)
   167  			continue
   168  		}
   169  		log.Lvl3("Building", p)
   170  		wg.Add(1)
   171  		go func(p pkg) {
   172  			defer wg.Done()
   173  			dst := path.Join(d.buildDir, p.name)
   174  			path, err := filepath.Rel(d.simulDir, p.path)
   175  			log.ErrFatal(err)
   176  			// deter has an amd64, linux architecture
   177  			var out string
   178  			if p.name == "simul" {
   179  				out, err = Build(path, dst,
   180  					p.processor, p.system, append(arg, tags...)...)
   181  			} else {
   182  				out, err = Build(path, dst,
   183  					p.processor, p.system, arg...)
   184  			}
   185  			if err != nil {
   186  				KillGo()
   187  				log.Lvl1(out)
   188  				log.Fatal(err)
   189  			}
   190  		}(p)
   191  	}
   192  	// wait for the build to finish
   193  	wg.Wait()
   194  	log.Lvl1("Build is finished after", time.Since(start))
   195  	return nil
   196  }
   197  
   198  // Cleanup kills all eventually remaining processes from the last Deploy-run
   199  func (d *Deterlab) Cleanup() error {
   200  	// Cleanup eventual ssh from the proxy-forwarding to the logserver
   201  	err := exec.Command("pkill", "-9", "-f", "ssh -nNTf").Run()
   202  	if err != nil {
   203  		log.Lvl3("Error stopping ssh:", err)
   204  	}
   205  
   206  	// SSH to the deterlab-server and end all running users-processes
   207  	log.Lvl3("Going to kill everything")
   208  	var sshKill chan string
   209  	sshKill = make(chan string)
   210  	go func() {
   211  		// Cleanup eventual residues of previous round - users and sshd
   212  		if _, err := SSHRun(d.Login, d.Host, "killall -9 users sshd"); err != nil {
   213  			log.Lvl3("Error while cleaning up:", err)
   214  		}
   215  
   216  		err := SSHRunStdout(d.Login, d.Host, "test -f remote/users && ( cd remote; ./users -kill )")
   217  		if err != nil {
   218  			log.Lvl1("NOT-Normal error from cleanup")
   219  			sshKill <- "error"
   220  		}
   221  		sshKill <- "stopped"
   222  	}()
   223  
   224  	for {
   225  		select {
   226  		case msg := <-sshKill:
   227  			if msg == "stopped" {
   228  				log.Lvl3("Users stopped")
   229  				return nil
   230  			}
   231  			log.Lvl2("Received other command", msg, "probably the app didn't quit correctly")
   232  		case <-time.After(time.Second * 20):
   233  			log.Lvl3("Timeout error when waiting for end of ssh")
   234  			return nil
   235  		}
   236  	}
   237  }
   238  
   239  // Deploy creates the appropriate configuration-files and copies everything to the
   240  // deterlab-installation.
   241  func (d *Deterlab) Deploy(rc *RunConfig) error {
   242  	if err := os.RemoveAll(d.deployDir); err != nil {
   243  		return err
   244  	}
   245  	if err := os.Mkdir(d.deployDir, 0777); err != nil {
   246  		return err
   247  	}
   248  
   249  	// Check for PreScript and copy it to the deploy-dir
   250  	d.PreScript = rc.Get("PreScript")
   251  	if d.PreScript != "" {
   252  		_, err := os.Stat(d.PreScript)
   253  		if !os.IsNotExist(err) {
   254  			if err := app.Copy(d.deployDir, d.PreScript); err != nil {
   255  				return err
   256  			}
   257  		}
   258  	}
   259  
   260  	// deploy will get rsync to /remote on the NFS
   261  
   262  	log.Lvl2("Localhost: Deploying and writing config-files")
   263  	sim, err := onet.NewSimulation(d.Simulation, string(rc.Toml()))
   264  	if err != nil {
   265  		return err
   266  	}
   267  	// Initialize the deter-struct with our current structure (for debug-levels
   268  	// and such), then read in the app-configuration to overwrite eventual
   269  	// 'Machines', 'ppm', '' or other fields
   270  	deter := *d
   271  	deterConfig := d.deployDir + "/deter.toml"
   272  	_, err = toml.Decode(string(rc.Toml()), &deter)
   273  	if err != nil {
   274  		return err
   275  	}
   276  	log.Lvl3("Creating hosts")
   277  	deter.createHosts()
   278  	log.Lvl3("Writing the config file :", deter)
   279  	onet.WriteTomlConfig(deter, deterConfig, d.deployDir)
   280  
   281  	simulConfig, err = sim.Setup(d.deployDir, deter.Virt)
   282  	if err != nil {
   283  		return err
   284  	}
   285  	simulConfig.Config = string(rc.Toml())
   286  	log.Lvl3("Saving configuration")
   287  	if err := simulConfig.Save(d.deployDir); err != nil {
   288  		log.Error("Couldn't save configuration:", err)
   289  	}
   290  
   291  	// Copy limit-files for more connections
   292  	ioutil.WriteFile(path.Join(d.deployDir, "simul.conf"),
   293  		[]byte(simulConnectionsConf), 0444)
   294  
   295  	// Copying build-files to deploy-directory
   296  	build, err := ioutil.ReadDir(d.buildDir)
   297  	for _, file := range build {
   298  		err = exec.Command("cp", d.buildDir+"/"+file.Name(), d.deployDir).Run()
   299  		if err != nil {
   300  			log.Fatal("error copying build-file:", d.buildDir, file.Name(), d.deployDir, err)
   301  		}
   302  	}
   303  
   304  	// Copy everything over to Deterlab
   305  	log.Lvl1("Copying over to", d.Login, "@", d.Host)
   306  	err = Rsync(d.Login, d.Host, d.deployDir+"/", "remote/")
   307  	if err != nil {
   308  		log.Fatal(err)
   309  	}
   310  	log.Lvl2("Done copying")
   311  
   312  	return nil
   313  }
   314  
   315  // Start creates a tunnel for the monitor-output and contacts the Deterlab-
   316  // server to run the simulation
   317  func (d *Deterlab) Start(args ...string) error {
   318  	// setup port forwarding for viewing log server
   319  	d.started = true
   320  	// Remote tunneling : the sink port is used both for the sink and for the
   321  	// proxy => the proxy redirects packets to the same port the sink is
   322  	// listening.
   323  	// -n = stdout == /Dev/null, -N => no command stream, -T => no tty
   324  	redirection := strconv.Itoa(d.MonitorPort) + ":" + d.ProxyAddress + ":" + strconv.Itoa(d.MonitorPort)
   325  	cmd := []string{"-nNTf", "-o", "StrictHostKeyChecking=no", "-o", "ExitOnForwardFailure=yes", "-R",
   326  		redirection, fmt.Sprintf("%s@%s", d.Login, d.Host)}
   327  	exCmd := exec.Command("ssh", cmd...)
   328  	if err := exCmd.Start(); err != nil {
   329  		log.Fatal("Failed to start the ssh port forwarding:", err)
   330  	}
   331  	if err := exCmd.Wait(); err != nil {
   332  		log.Fatal("ssh port forwarding exited in failure:", err)
   333  	}
   334  	log.Lvl3("Setup remote port forwarding", cmd)
   335  	go func() {
   336  		err := SSHRunStdout(d.Login, d.Host, "cd remote; GOMAXPROCS=8 ./users -suite="+d.Suite)
   337  		if err != nil {
   338  			log.Lvl3(err)
   339  		}
   340  		d.sshDeter <- "finished"
   341  	}()
   342  
   343  	return nil
   344  }
   345  
   346  // Wait for the process to finish
   347  func (d *Deterlab) Wait() error {
   348  	wait, err := time.ParseDuration(d.RunWait)
   349  	if err != nil || wait == 0 {
   350  		wait = 600 * time.Second
   351  		err = nil
   352  	}
   353  	if d.started {
   354  		log.Lvl3("Simulation is started")
   355  		select {
   356  		case msg := <-d.sshDeter:
   357  			if msg == "finished" {
   358  				log.Lvl3("Received finished-message, not killing users")
   359  				return nil
   360  			}
   361  			log.Lvl1("Received out-of-line message", msg)
   362  		case <-time.After(wait):
   363  			log.Lvl1("Quitting after waiting", wait)
   364  			d.started = false
   365  		}
   366  		d.started = false
   367  	}
   368  	return nil
   369  }
   370  
   371  // Write the hosts.txt file automatically
   372  // from project name and number of servers
   373  func (d *Deterlab) createHosts() {
   374  	// Query deterlab's API for servers
   375  	log.Lvl2("Querying Deterlab's API to retrieve server names and addresses")
   376  	command := fmt.Sprintf("/usr/testbed/bin/expinfo -l -e %s,%s", d.Project, d.Experiment)
   377  	apiReply, err := SSHRun(d.Login, d.Host, command)
   378  	if err != nil {
   379  		log.Fatal("Error while querying Deterlab:", err)
   380  	}
   381  	log.ErrFatal(d.parseHosts(string(apiReply)))
   382  }
   383  
   384  func (d *Deterlab) parseHosts(str string) error {
   385  	// Get the link-information, which is the second block in `expinfo`-output
   386  	infos := strings.Split(str, "\n\n")
   387  	if len(infos) < 2 {
   388  		return errors.New("didn't recognize output of 'expinfo'")
   389  	}
   390  	linkInfo := infos[1]
   391  	// Test for correct version in case the API-output changes
   392  	if !strings.HasPrefix(linkInfo, "Virtual Lan/Link Info:") {
   393  		return errors.New("didn't recognize output of 'expinfo'")
   394  	}
   395  	linkLines := strings.Split(linkInfo, "\n")
   396  	if len(linkLines) < 5 {
   397  		return errors.New("didn't recognice output of 'expinfo'")
   398  	}
   399  	nodes := linkLines[3:]
   400  
   401  	d.Phys = []string{}
   402  	d.Virt = []string{}
   403  	names := make(map[string]bool)
   404  
   405  	for i, node := range nodes {
   406  		if i%2 == 1 {
   407  			continue
   408  		}
   409  		matches := strings.Fields(node)
   410  		if len(matches) != 6 {
   411  			return errors.New("expinfo-output seems to have changed")
   412  		}
   413  		// Convert client-0:0 to client-0
   414  		name := strings.Split(matches[1], ":")[0]
   415  		ip := matches[2]
   416  
   417  		fullName := fmt.Sprintf("%s.%s.%s.isi.deterlab.net", name, d.Experiment, d.Project)
   418  		log.Lvl3("Discovered", fullName, "on ip", ip)
   419  
   420  		if _, exists := names[fullName]; !exists {
   421  			d.Phys = append(d.Phys, fullName)
   422  			d.Virt = append(d.Virt, ip)
   423  			names[fullName] = true
   424  		}
   425  	}
   426  
   427  	log.Lvl2("Physical:", d.Phys)
   428  	log.Lvl2("Internal:", d.Virt)
   429  	return nil
   430  }
   431  
   432  // Checks whether host, login and project are defined. If any of them are missing, it will
   433  // ask on the command-line.
   434  // For the login-variable, it will try to set up a connection to d.Host and copy over the
   435  // public key for a more easy communication
   436  func (d *Deterlab) loadAndCheckDeterlabVars() {
   437  	deter := Deterlab{}
   438  	err := onet.ReadTomlConfig(&deter, "deter.toml")
   439  	d.Host, d.Login, d.Project, d.Experiment, d.ProxyAddress, d.MonitorAddress =
   440  		deter.Host, deter.Login, deter.Project, deter.Experiment,
   441  		deter.ProxyAddress, deter.MonitorAddress
   442  
   443  	if err != nil {
   444  		log.Lvl1("Couldn't read config-file - asking for default values")
   445  	}
   446  
   447  	if d.Host == "" {
   448  		d.Host = readString("Please enter the hostname of deterlab", "users.deterlab.net")
   449  	}
   450  
   451  	login, err := user.Current()
   452  	log.ErrFatal(err)
   453  	if d.Login == "" {
   454  		d.Login = readString("Please enter the login-name on "+d.Host, login.Username)
   455  	}
   456  
   457  	if d.Project == "" {
   458  		d.Project = readString("Please enter the project on deterlab", "SAFER")
   459  	}
   460  
   461  	if d.Experiment == "" {
   462  		d.Experiment = readString("Please enter the Experiment on "+d.Project, "Dissent-CS")
   463  	}
   464  
   465  	if d.MonitorAddress == "" {
   466  		d.MonitorAddress = readString("Please enter the Monitor address (where clients will connect)", "users.isi.deterlab.net")
   467  	}
   468  	if d.ProxyAddress == "" {
   469  		d.ProxyAddress = readString("Please enter the proxy redirection address", "localhost")
   470  	}
   471  
   472  	onet.WriteTomlConfig(*d, "deter.toml")
   473  }
   474  
   475  // Shows a messages and reads in a string, eventually returning a default (dft) string
   476  func readString(msg, dft string) string {
   477  	fmt.Printf("%s [%s]:", msg, dft)
   478  
   479  	reader := bufio.NewReader(os.Stdin)
   480  	strnl, _ := reader.ReadString('\n')
   481  	str := strings.TrimSpace(strnl)
   482  	if str == "" {
   483  		return dft
   484  	}
   485  	return str
   486  }
   487  
   488  const simulConnectionsConf = `
   489  # This is for the onet-deterlab testbed, which can use up an awful lot of connections
   490  
   491  * soft nofile 128000
   492  * hard nofile 128000
   493  `