gopkg.in/dedis/onet.v2@v2.0.0-20181115163211-c8f3724038a7/simul/platform/deterlab.go (about) 1 // Deterlab is responsible for setting up everything to test the application 2 // on deterlab.net 3 // Given a list of hostnames, it will create an overlay 4 // tree topology, using all but the last node. It will create multiple 5 // nodes per server and run timestamping processes. The last node is 6 // reserved for the logging server, which is forwarded to localhost:8081 7 // 8 // Creates the following directory structure: 9 // build/ - where all cross-compiled executables are stored 10 // remote/ - directory to be copied to the deterlab server 11 // 12 // The following apps are used: 13 // deter - runs on the user-machine in deterlab and launches the others 14 // forkexec - runs on the other servers and launches the app, so it can measure its cpu usage 15 16 package platform 17 18 import ( 19 "bufio" 20 "errors" 21 "fmt" 22 "io/ioutil" 23 "os" 24 "os/exec" 25 "os/user" 26 "path" 27 "path/filepath" 28 "runtime" 29 "strconv" 30 "strings" 31 "sync" 32 "time" 33 34 "github.com/BurntSushi/toml" 35 "gopkg.in/dedis/onet.v2" 36 "gopkg.in/dedis/onet.v2/app" 37 "gopkg.in/dedis/onet.v2/log" 38 ) 39 40 // Deterlab holds all fields necessary for a Deterlab-run 41 type Deterlab struct { 42 // *** Deterlab-related configuration 43 // The login on the platform 44 Login string 45 // The outside host on the platform 46 Host string 47 // The name of the project 48 Project string 49 // Name of the Experiment - also name of hosts 50 Experiment string 51 // Directory holding the simulation-main file 52 simulDir string 53 // Directory where the deterlab-users-file is held 54 usersDir string 55 // Directory where everything is copied into 56 deployDir string 57 // Directory for building 58 buildDir string 59 // Directory holding all go-files of onet/simul/platform 60 platformDir string 61 // DNS-resolvable names 62 Phys []string 63 // VLAN-IP names (physical machines) 64 Virt []string 65 // Channel to communication stopping of experiment 66 sshDeter chan string 67 // Whether the simulation is started 68 started bool 69 70 // ProxyAddress : the proxy will redirect every traffic it 71 // receives to this address 72 ProxyAddress string 73 // MonitorAddress is the address given to clients to connect to the monitor 74 // It is actually the Proxy that will listen to that address and clients 75 // won't know a thing about it 76 MonitorAddress string 77 // Port number of the monitor and the proxy 78 MonitorPort int 79 80 // Number of available servers 81 Servers int 82 // Name of the simulation 83 Simulation string 84 // Number of machines 85 Hosts int 86 // Debugging-level: 0 is none - 5 is everything 87 Debug int 88 // RunWait for long simulations 89 RunWait string 90 // suite used for the simulation 91 Suite string 92 // PreScript defines a script that is run before the simulation 93 PreScript string 94 // Tags to use when compiling 95 Tags string 96 } 97 98 var simulConfig *onet.SimulationConfig 99 100 // Configure initialises the directories and loads the saved config 101 // for Deterlab 102 func (d *Deterlab) Configure(pc *Config) { 103 // Directory setup - would also be possible in /tmp 104 pwd, _ := os.Getwd() 105 d.Suite = pc.Suite 106 d.simulDir = pwd 107 d.deployDir = pwd + "/deploy" 108 d.buildDir = pwd + "/build" 109 _, file, _, _ := runtime.Caller(0) 110 d.platformDir = path.Dir(file) 111 os.RemoveAll(d.deployDir) 112 os.Mkdir(d.deployDir, 0770) 113 os.Mkdir(d.buildDir, 0770) 114 d.MonitorPort = pc.MonitorPort 115 log.Lvl3("Dirs are:", pwd, d.deployDir) 116 d.loadAndCheckDeterlabVars() 117 118 d.Debug = pc.Debug 119 if d.Simulation == "" { 120 log.Fatal("No simulation defined in runconfig") 121 } 122 123 // Setting up channel 124 d.sshDeter = make(chan string) 125 } 126 127 type pkg struct { 128 name string 129 processor string 130 system string 131 path string 132 } 133 134 // Build prepares all binaries for the Deterlab-simulation. 135 // If 'build' is empty, all binaries are created, else only 136 // the ones indicated. Either "simul" or "users" 137 func (d *Deterlab) Build(build string, arg ...string) error { 138 log.Lvl1("Building for", d.Login, d.Host, d.Project, build, "simulDir=", d.simulDir) 139 start := time.Now() 140 141 var wg sync.WaitGroup 142 143 if err := os.RemoveAll(d.buildDir); err != nil { 144 return err 145 } 146 if err := os.Mkdir(d.buildDir, 0777); err != nil { 147 return err 148 } 149 150 // start building the necessary binaries - it's always the same, 151 // but built for another architecture. 152 packages := []pkg{ 153 {"simul", "amd64", "linux", d.simulDir}, 154 {"users", "386", "freebsd", path.Join(d.platformDir, "deterlab_users")}, 155 } 156 if build == "" { 157 build = "simul,users" 158 } 159 var tags []string 160 if d.Tags != "" { 161 tags = append([]string{"-tags"}, strings.Split(d.Tags, " ")...) 162 } 163 log.Lvl3("Starting to build all executables", packages) 164 for _, p := range packages { 165 if !strings.Contains(build, p.name) { 166 log.Lvl2("Skipping build of", p.name) 167 continue 168 } 169 log.Lvl3("Building", p) 170 wg.Add(1) 171 go func(p pkg) { 172 defer wg.Done() 173 dst := path.Join(d.buildDir, p.name) 174 path, err := filepath.Rel(d.simulDir, p.path) 175 log.ErrFatal(err) 176 // deter has an amd64, linux architecture 177 var out string 178 if p.name == "simul" { 179 out, err = Build(path, dst, 180 p.processor, p.system, append(arg, tags...)...) 181 } else { 182 out, err = Build(path, dst, 183 p.processor, p.system, arg...) 184 } 185 if err != nil { 186 KillGo() 187 log.Lvl1(out) 188 log.Fatal(err) 189 } 190 }(p) 191 } 192 // wait for the build to finish 193 wg.Wait() 194 log.Lvl1("Build is finished after", time.Since(start)) 195 return nil 196 } 197 198 // Cleanup kills all eventually remaining processes from the last Deploy-run 199 func (d *Deterlab) Cleanup() error { 200 // Cleanup eventual ssh from the proxy-forwarding to the logserver 201 err := exec.Command("pkill", "-9", "-f", "ssh -nNTf").Run() 202 if err != nil { 203 log.Lvl3("Error stopping ssh:", err) 204 } 205 206 // SSH to the deterlab-server and end all running users-processes 207 log.Lvl3("Going to kill everything") 208 var sshKill chan string 209 sshKill = make(chan string) 210 go func() { 211 // Cleanup eventual residues of previous round - users and sshd 212 if _, err := SSHRun(d.Login, d.Host, "killall -9 users sshd"); err != nil { 213 log.Lvl3("Error while cleaning up:", err) 214 } 215 216 err := SSHRunStdout(d.Login, d.Host, "test -f remote/users && ( cd remote; ./users -kill )") 217 if err != nil { 218 log.Lvl1("NOT-Normal error from cleanup") 219 sshKill <- "error" 220 } 221 sshKill <- "stopped" 222 }() 223 224 for { 225 select { 226 case msg := <-sshKill: 227 if msg == "stopped" { 228 log.Lvl3("Users stopped") 229 return nil 230 } 231 log.Lvl2("Received other command", msg, "probably the app didn't quit correctly") 232 case <-time.After(time.Second * 20): 233 log.Lvl3("Timeout error when waiting for end of ssh") 234 return nil 235 } 236 } 237 } 238 239 // Deploy creates the appropriate configuration-files and copies everything to the 240 // deterlab-installation. 241 func (d *Deterlab) Deploy(rc *RunConfig) error { 242 if err := os.RemoveAll(d.deployDir); err != nil { 243 return err 244 } 245 if err := os.Mkdir(d.deployDir, 0777); err != nil { 246 return err 247 } 248 249 // Check for PreScript and copy it to the deploy-dir 250 d.PreScript = rc.Get("PreScript") 251 if d.PreScript != "" { 252 _, err := os.Stat(d.PreScript) 253 if !os.IsNotExist(err) { 254 if err := app.Copy(d.deployDir, d.PreScript); err != nil { 255 return err 256 } 257 } 258 } 259 260 // deploy will get rsync to /remote on the NFS 261 262 log.Lvl2("Localhost: Deploying and writing config-files") 263 sim, err := onet.NewSimulation(d.Simulation, string(rc.Toml())) 264 if err != nil { 265 return err 266 } 267 // Initialize the deter-struct with our current structure (for debug-levels 268 // and such), then read in the app-configuration to overwrite eventual 269 // 'Machines', 'ppm', '' or other fields 270 deter := *d 271 deterConfig := d.deployDir + "/deter.toml" 272 _, err = toml.Decode(string(rc.Toml()), &deter) 273 if err != nil { 274 return err 275 } 276 log.Lvl3("Creating hosts") 277 deter.createHosts() 278 log.Lvl3("Writing the config file :", deter) 279 onet.WriteTomlConfig(deter, deterConfig, d.deployDir) 280 281 simulConfig, err = sim.Setup(d.deployDir, deter.Virt) 282 if err != nil { 283 return err 284 } 285 simulConfig.Config = string(rc.Toml()) 286 log.Lvl3("Saving configuration") 287 if err := simulConfig.Save(d.deployDir); err != nil { 288 log.Error("Couldn't save configuration:", err) 289 } 290 291 // Copy limit-files for more connections 292 ioutil.WriteFile(path.Join(d.deployDir, "simul.conf"), 293 []byte(simulConnectionsConf), 0444) 294 295 // Copying build-files to deploy-directory 296 build, err := ioutil.ReadDir(d.buildDir) 297 for _, file := range build { 298 err = exec.Command("cp", d.buildDir+"/"+file.Name(), d.deployDir).Run() 299 if err != nil { 300 log.Fatal("error copying build-file:", d.buildDir, file.Name(), d.deployDir, err) 301 } 302 } 303 304 // Copy everything over to Deterlab 305 log.Lvl1("Copying over to", d.Login, "@", d.Host) 306 err = Rsync(d.Login, d.Host, d.deployDir+"/", "remote/") 307 if err != nil { 308 log.Fatal(err) 309 } 310 log.Lvl2("Done copying") 311 312 return nil 313 } 314 315 // Start creates a tunnel for the monitor-output and contacts the Deterlab- 316 // server to run the simulation 317 func (d *Deterlab) Start(args ...string) error { 318 // setup port forwarding for viewing log server 319 d.started = true 320 // Remote tunneling : the sink port is used both for the sink and for the 321 // proxy => the proxy redirects packets to the same port the sink is 322 // listening. 323 // -n = stdout == /Dev/null, -N => no command stream, -T => no tty 324 redirection := strconv.Itoa(d.MonitorPort) + ":" + d.ProxyAddress + ":" + strconv.Itoa(d.MonitorPort) 325 cmd := []string{"-nNTf", "-o", "StrictHostKeyChecking=no", "-o", "ExitOnForwardFailure=yes", "-R", 326 redirection, fmt.Sprintf("%s@%s", d.Login, d.Host)} 327 exCmd := exec.Command("ssh", cmd...) 328 if err := exCmd.Start(); err != nil { 329 log.Fatal("Failed to start the ssh port forwarding:", err) 330 } 331 if err := exCmd.Wait(); err != nil { 332 log.Fatal("ssh port forwarding exited in failure:", err) 333 } 334 log.Lvl3("Setup remote port forwarding", cmd) 335 go func() { 336 err := SSHRunStdout(d.Login, d.Host, "cd remote; GOMAXPROCS=8 ./users -suite="+d.Suite) 337 if err != nil { 338 log.Lvl3(err) 339 } 340 d.sshDeter <- "finished" 341 }() 342 343 return nil 344 } 345 346 // Wait for the process to finish 347 func (d *Deterlab) Wait() error { 348 wait, err := time.ParseDuration(d.RunWait) 349 if err != nil || wait == 0 { 350 wait = 600 * time.Second 351 err = nil 352 } 353 if d.started { 354 log.Lvl3("Simulation is started") 355 select { 356 case msg := <-d.sshDeter: 357 if msg == "finished" { 358 log.Lvl3("Received finished-message, not killing users") 359 return nil 360 } 361 log.Lvl1("Received out-of-line message", msg) 362 case <-time.After(wait): 363 log.Lvl1("Quitting after waiting", wait) 364 d.started = false 365 } 366 d.started = false 367 } 368 return nil 369 } 370 371 // Write the hosts.txt file automatically 372 // from project name and number of servers 373 func (d *Deterlab) createHosts() { 374 // Query deterlab's API for servers 375 log.Lvl2("Querying Deterlab's API to retrieve server names and addresses") 376 command := fmt.Sprintf("/usr/testbed/bin/expinfo -l -e %s,%s", d.Project, d.Experiment) 377 apiReply, err := SSHRun(d.Login, d.Host, command) 378 if err != nil { 379 log.Fatal("Error while querying Deterlab:", err) 380 } 381 log.ErrFatal(d.parseHosts(string(apiReply))) 382 } 383 384 func (d *Deterlab) parseHosts(str string) error { 385 // Get the link-information, which is the second block in `expinfo`-output 386 infos := strings.Split(str, "\n\n") 387 if len(infos) < 2 { 388 return errors.New("didn't recognize output of 'expinfo'") 389 } 390 linkInfo := infos[1] 391 // Test for correct version in case the API-output changes 392 if !strings.HasPrefix(linkInfo, "Virtual Lan/Link Info:") { 393 return errors.New("didn't recognize output of 'expinfo'") 394 } 395 linkLines := strings.Split(linkInfo, "\n") 396 if len(linkLines) < 5 { 397 return errors.New("didn't recognice output of 'expinfo'") 398 } 399 nodes := linkLines[3:] 400 401 d.Phys = []string{} 402 d.Virt = []string{} 403 names := make(map[string]bool) 404 405 for i, node := range nodes { 406 if i%2 == 1 { 407 continue 408 } 409 matches := strings.Fields(node) 410 if len(matches) != 6 { 411 return errors.New("expinfo-output seems to have changed") 412 } 413 // Convert client-0:0 to client-0 414 name := strings.Split(matches[1], ":")[0] 415 ip := matches[2] 416 417 fullName := fmt.Sprintf("%s.%s.%s.isi.deterlab.net", name, d.Experiment, d.Project) 418 log.Lvl3("Discovered", fullName, "on ip", ip) 419 420 if _, exists := names[fullName]; !exists { 421 d.Phys = append(d.Phys, fullName) 422 d.Virt = append(d.Virt, ip) 423 names[fullName] = true 424 } 425 } 426 427 log.Lvl2("Physical:", d.Phys) 428 log.Lvl2("Internal:", d.Virt) 429 return nil 430 } 431 432 // Checks whether host, login and project are defined. If any of them are missing, it will 433 // ask on the command-line. 434 // For the login-variable, it will try to set up a connection to d.Host and copy over the 435 // public key for a more easy communication 436 func (d *Deterlab) loadAndCheckDeterlabVars() { 437 deter := Deterlab{} 438 err := onet.ReadTomlConfig(&deter, "deter.toml") 439 d.Host, d.Login, d.Project, d.Experiment, d.ProxyAddress, d.MonitorAddress = 440 deter.Host, deter.Login, deter.Project, deter.Experiment, 441 deter.ProxyAddress, deter.MonitorAddress 442 443 if err != nil { 444 log.Lvl1("Couldn't read config-file - asking for default values") 445 } 446 447 if d.Host == "" { 448 d.Host = readString("Please enter the hostname of deterlab", "users.deterlab.net") 449 } 450 451 login, err := user.Current() 452 log.ErrFatal(err) 453 if d.Login == "" { 454 d.Login = readString("Please enter the login-name on "+d.Host, login.Username) 455 } 456 457 if d.Project == "" { 458 d.Project = readString("Please enter the project on deterlab", "SAFER") 459 } 460 461 if d.Experiment == "" { 462 d.Experiment = readString("Please enter the Experiment on "+d.Project, "Dissent-CS") 463 } 464 465 if d.MonitorAddress == "" { 466 d.MonitorAddress = readString("Please enter the Monitor address (where clients will connect)", "users.isi.deterlab.net") 467 } 468 if d.ProxyAddress == "" { 469 d.ProxyAddress = readString("Please enter the proxy redirection address", "localhost") 470 } 471 472 onet.WriteTomlConfig(*d, "deter.toml") 473 } 474 475 // Shows a messages and reads in a string, eventually returning a default (dft) string 476 func readString(msg, dft string) string { 477 fmt.Printf("%s [%s]:", msg, dft) 478 479 reader := bufio.NewReader(os.Stdin) 480 strnl, _ := reader.ReadString('\n') 481 str := strings.TrimSpace(strnl) 482 if str == "" { 483 return dft 484 } 485 return str 486 } 487 488 const simulConnectionsConf = ` 489 # This is for the onet-deterlab testbed, which can use up an awful lot of connections 490 491 * soft nofile 128000 492 * hard nofile 128000 493 `