github.com/DARA-Project/GoDist-Scheduler@v0.0.0-20201030134746-668de4acea0d/overlord/overlord.go (about) 1 package main 2 3 import ( 4 "encoding/json" 5 "errors" 6 "flag" 7 "fmt" 8 "github.com/DARA-Project/GoDist-Scheduler/instrumenter" 9 "io" 10 "io/ioutil" 11 "log" 12 "net" 13 "net/rpc" 14 "os" 15 "os/exec" 16 "path/filepath" 17 "strconv" 18 "strings" 19 "time" 20 ) 21 22 var ( 23 l* log.Logger 24 ) 25 26 //Options struct which configures the Dara run 27 //This stores the parsed config results 28 type Options struct { 29 Exec ExecOptions `json:"exec"` 30 Instr InstrumentOptions `json:"instr"` 31 Bench BenchOptions `json:"bench"` 32 } 33 34 //Custom build & run script options 35 type BuildOptions struct { 36 BuildScript string `json:"build_path"` 37 RunScript string `json:"run_path"` 38 CleanScript string `json:"clean_path"` // CleanScript cleans the environment before every restart 39 } 40 41 //Dara execution options 42 type ExecOptions struct { 43 Path string `json:"path"` 44 SharedMemSize string `json:"size"` 45 NumProcesses int `json:"processes"` 46 SchedFile string `json:"sched"` 47 LogLevel string `json:"loglevel"` 48 Build BuildOptions `json:"build"` 49 PreloadReplay bool `json:"fast_replay"` 50 PropertyFile string `json:"property_file"` 51 BlocksFile string `json:"blocks_file"` 52 Strategy string `json:"strategy"` 53 Microbenchmark bool `json:"microbench"` 54 Nanobenchmark bool `json:"nanobench"` 55 MaxDepth int `json:"maxdepth"` 56 MaxRuns int `json:"maxruns"` 57 } 58 59 //Options specific for benchmarking 60 type BenchOptions struct { 61 Outfile string `json:"path"` 62 Iterations int `json:"iter"` 63 } 64 65 //Options specific for instrumentation 66 type InstrumentOptions struct { 67 Dir string `json:"dir"` 68 File string `json:"file"` 69 OutDir string `json:"outdir"` 70 OutFile string `json:"outfile"` 71 BlocksFile string `json:"blocks_file"` 72 } 73 74 type DaraRpcServer struct { 75 Options ExecOptions 76 logger *log.Logger 77 } 78 79 //Returns the directory from the path 80 func get_directory_from_path(path string) string { 81 return filepath.Dir(path) 82 } 83 84 func write_blocks_file(filename string, blocks []string) error { 85 f, err := os.Create(filename) 86 if err != nil { 87 return err 88 } 89 defer f.Close() 90 for _, block := range blocks { 91 _, err = f.WriteString(block + "\n") 92 if err != nil { 93 return err 94 } 95 } 96 return nil 97 } 98 99 //Instruments a given file using Dinv's capture module 100 func instrument_file(filename string, outfile string) ([]string, error) { 101 f, err := instrumenter.Annotate(filename) 102 if err != nil { 103 return []string{}, err 104 } 105 if outfile == "" { 106 l.Println("Output file not provided; overwriting original file") 107 outfile = filename 108 } 109 return f.GetBlockIDs(), f.WriteAnnotatedFile(outfile) 110 } 111 112 //Instruments all go files in a directory 113 func instrument_dir(directory string, outdir string, blocks_file string) error { 114 if outdir == "" { 115 l.Println("Output directory not provided; overwriting original directory") 116 outdir = directory 117 } 118 var allBlocks []string 119 err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error { 120 if err != nil { 121 log.Println(err) 122 } 123 if strings.Contains(path, "vendor/") { 124 return nil 125 } 126 if !info.IsDir() && filepath.Ext(path) == ".go" { 127 outpath := strings.Replace(path, directory, outdir, -1) 128 err = os.MkdirAll(filepath.Dir(outpath), 0777) 129 if err != nil { 130 return err 131 } 132 blocks, err := instrument_file(path, outpath) 133 allBlocks = append(allBlocks, blocks...) 134 return err 135 } 136 return nil 137 }) 138 if err != nil { 139 log.Println(err) 140 } 141 err = write_blocks_file(blocks_file, allBlocks) 142 log.Println("Total number of blocks is", len(allBlocks)) 143 return err 144 } 145 146 //Sets the Program name as $PROGRAM for use in exec script 147 func set_environment(program string) { 148 // Set the Program name here as PROGRAM 149 os.Setenv("PROGRAM", program) 150 } 151 152 //Sets the $RUN_SCRIPT variable to the value provided in config file 153 func set_env_run_script(script string) { 154 // Set the run script as RUN_SCRIPT 155 os.Setenv("RUN_SCRIPT", script) 156 } 157 158 //Sets the $PROP_FILE variable to the value provided in config file 159 func set_env_property_file(filepath string) { 160 // Set the property file as PROP_FILE 161 os.Setenv("PROP_FILE", filepath) 162 } 163 164 //Sets the Fast replay option where the replay works from a loaded schedule 165 func set_fast_replay() { 166 os.Setenv("FAST_REPLAY", "true") 167 } 168 169 //Sets the log level for the entire Dara run 170 func set_log_level(loglevel string) error { 171 level := "" 172 switch loglevel { 173 case "DEBUG": 174 level = "0" 175 case "INFO": 176 level = "1" 177 case "WARN": 178 level = "2" 179 case "FATAL": 180 level = "3" 181 case "OFF": 182 level = "4" 183 default: 184 return errors.New("Invalid log level specified in configuration file") 185 } 186 os.Setenv("DARA_LOG_LEVEL", level) 187 return nil 188 } 189 190 //Sets the Dara mode environment variable 191 func set_dara_mode(mode string) { 192 os.Setenv("DARA_MODE", mode) 193 } 194 195 //Sets Nanobenchmark enivornment variable 196 func set_nanobenchmark() { 197 os.Setenv("NANOBENCH", "true") 198 } 199 200 //Set Microbenchmark environment variable 201 func set_microbenchmark() { 202 os.Setenv("UBENCH", "true") 203 } 204 205 //Generic function for copying file from src to dst 206 func copy_file(src string, dst string) error { 207 in, err := os.Open(src) 208 if err != nil { 209 return err 210 } 211 defer in.Close() 212 213 out, err := os.Create(dst) 214 if err != nil { 215 return err 216 } 217 defer out.Close() 218 219 _, err = io.Copy(out, in) 220 if err != nil { 221 return err 222 } 223 return out.Close() 224 } 225 226 //Installs the global scheduler 227 func install_global_scheduler() error { 228 cmd := exec.Command("/usr/bin/dgo", "install", "github.com/DARA-Project/GoDist-Scheduler") 229 cmd.Stdout = os.Stdout 230 cmd.Stderr = os.Stderr 231 err := cmd.Run() 232 if err != nil { 233 fmt.Println("Failed to install global scheduler : ", err) 234 } 235 return err 236 } 237 238 //Launches the global scheduler and the run script to run the system 239 func launch_global_scheduler(mode string, numProcs int, sched_file string, strategy string, maxdepth int, maxruns int) (*exec.Cmd, error) { 240 arg := "--" + mode + "=true --procs=" + strconv.Itoa(numProcs) + " --schedule=" + sched_file 241 if strategy != "" { 242 arg += " --strategy=" + strategy 243 } 244 if maxdepth != 0 { 245 arg += " --maxdepth=" + strconv.Itoa(maxdepth) 246 } 247 if maxruns != 0 { 248 arg += " --maxruns=" + strconv.Itoa(maxruns) 249 } 250 cmd := exec.Command("/bin/bash", "./exec_script.sh", arg) 251 cmd.Stdout = os.Stdout 252 cmd.Stderr = os.Stderr 253 err := cmd.Start() 254 return cmd, err 255 } 256 257 //Starts running the go benchmark 258 func start_go_benchmark() (*exec.Cmd, error) { 259 cmd := exec.Command("/bin/bash", "./bench_script.sh") 260 cmd.Stdout = os.Stdout 261 cmd.Stderr = os.Stderr 262 err := cmd.Start() 263 return cmd, err 264 } 265 266 //Starts the global scheduler for this dara run 267 func start_global_scheduler(mode string, numProcs int, sched_file string, strategy string, maxdepth int, maxruns int) (*exec.Cmd, error) { 268 err := install_global_scheduler() 269 if err != nil { 270 return nil, err 271 } 272 cmd, err := launch_global_scheduler(mode, numProcs, sched_file, strategy, maxdepth, maxruns) 273 if err != nil { 274 return nil, err 275 } 276 return cmd, err 277 } 278 279 //Setup the shared memory to be used by the global and local schedulers 280 func setup_shared_mem(size string, dir string) error { 281 // Remove existing shared memory 282 path := dir + "/DaraSharedMem" 283 err := os.Remove(path) 284 if err != nil { 285 // Ignore if shared memory didn't exist 286 err = nil 287 } 288 // Get shared memor from device 0 289 outputFileArg := "of=" + path 290 blockSize := "bs=" + size 291 cmd := exec.Command("dd", "if=/dev/zero", outputFileArg, blockSize, "count=1") 292 cmd.Stdout = os.Stdout 293 cmd.Stderr = os.Stderr 294 err = cmd.Run() 295 if err != nil { 296 return err 297 } 298 // Change permissions of shared memory 299 err = os.Chmod(path, 0777) 300 return err 301 } 302 303 //Execute the build script to build the program 304 func execute_build_script(script string, execution_dir string) error { 305 cmd := exec.Command(script) 306 cmd.Stdout = os.Stdout 307 cmd.Stderr = os.Stderr 308 err := cmd.Run() 309 if err != nil { 310 return err 311 } 312 l.Println("Finished building using build script") 313 err = os.Chdir(execution_dir) 314 return err 315 } 316 317 func execute_clean_script(script string) error { 318 cmd := exec.Command(script) 319 cmd.Stdout = os.Stdout 320 cmd.Stderr = os.Stderr 321 err := cmd.Run() 322 if err != nil { 323 return err 324 } 325 return err 326 } 327 328 //Build the program executable using vanilla dgo 329 func build_target_program(dir string) error { 330 err := os.Chdir(dir) 331 if err != nil { 332 return err 333 } 334 cmd := exec.Command("dgo", "build", "-v") 335 cmd.Stdout = os.Stdout 336 cmd.Stderr = os.Stderr 337 err = cmd.Run() 338 return err 339 } 340 341 //Build the program executable using vanilla go 342 func build_target_program_go(dir string) error { 343 err := os.Chdir(dir) 344 if err != nil { 345 return err 346 } 347 cmd := exec.Command("go", "build", "-v") 348 cmd.Stdout = os.Stdout 349 cmd.Stderr = os.Stderr 350 err = cmd.Run() 351 return err 352 } 353 354 //Copies the execution script to the directory that contains the run script/executable 355 func copy_launch_script(dir string) error { 356 cwd, err := os.Getwd() 357 if err != nil { 358 return err 359 } 360 log.Println("Copying exec script from", cwd, " to ", dir) 361 err = copy_file(cwd+"/exec_script.sh", dir+"/exec_script.sh") 362 return err 363 } 364 365 //Copies the benchmarking script to the directory that contains the run script/executable 366 func copy_bench_script(dir string) error { 367 cwd, err := os.Getwd() 368 if err != nil { 369 return err 370 } 371 err = copy_file(cwd+"/bench_script.sh", dir+"/bench_script.sh") 372 return err 373 } 374 375 //Initial handler for instrumentation mode 376 func instrument(options InstrumentOptions) error { 377 if options.File == "" && options.Dir == "" { 378 return errors.New("Instrument must have only one option(file or dir) selected.") 379 } 380 381 if options.BlocksFile == "" { 382 return errors.New("Argument not provided for filename for the list of blocks") 383 } 384 385 if options.File != "" { 386 blocks, err := instrument_file(options.File, options.OutFile) 387 // Write blocks to the blocks file 388 if err != nil { 389 return err 390 } 391 err = write_blocks_file(options.BlocksFile,blocks) 392 return err 393 } 394 395 if options.Dir != "" { 396 return instrument_dir(options.Dir, options.OutDir, options.BlocksFile) 397 } 398 399 return nil 400 } 401 402 func (d * DaraRpcServer) killprogram() error { 403 dir := get_directory_from_path(d.Options.Path) 404 program := filepath.Base(dir) 405 if d.Options.Build.RunScript == "" { 406 cmd := exec.Command("pkill", program) 407 err := cmd.Run() 408 if err != nil { 409 // This "error" means the program ended before we could kill it 410 //d.logger.Println("Error while killing program", err) 411 return err 412 } 413 } else { 414 cmd := exec.Command("pkill", d.Options.Build.RunScript) 415 err := cmd.Run() 416 if err != nil { 417 // This "error" means the program ended before we could kill it 418 //d.logger.Println("Error while killing program", err) 419 return err 420 } 421 } 422 return nil 423 } 424 425 func (d * DaraRpcServer) KillExecution(unused_arg int, ack *bool) error { 426 // Issue a kill command for killing the program under test 427 err := d.killprogram() 428 if err != nil { 429 d.logger.Println("Failed to kill program") 430 return err 431 } 432 *ack = true 433 return nil 434 } 435 436 func (d * DaraRpcServer) RestartExecution(unused_arg int, ack * bool) error { 437 if d.Options.Build.CleanScript != "" { 438 // Run the environment clean script between runs 439 err := execute_clean_script(d.Options.Build.CleanScript) 440 d.logger.Println("Failed to clean environment", err) 441 } 442 f, err := os.Create("./explore_restart") 443 if err != nil { 444 d.logger.Println("Failed to finish exploration") 445 return err 446 } 447 f.Close() 448 return nil 449 } 450 451 func (d * DaraRpcServer) FinishExecution(unused_arg int, ack *bool) error { 452 // Issue a finish command to the exec script somehow 453 cwd, err := os.Getwd() 454 if err != nil { 455 d.logger.Println("Error while getting current directory", err) 456 return err 457 } 458 d.logger.Println("Current Directory:", cwd) 459 // Just create a file that is called explore_finish to signify end of exploration and the exec script can just stat if the file exists 460 f, err := os.Create("./explore_finish") 461 if err != nil { 462 d.logger.Println("Failed to finish exploration") 463 return err 464 } 465 f.Close() 466 err = os.Chdir(cwd) 467 if err != nil { 468 d.logger.Println("Error while changing directory") 469 return err 470 } 471 // Now we are ready to kill the program! 472 err = d.killprogram() 473 if err != nil { 474 l.Println("[Overlord-RpcServer] Failed to kill program") 475 return err 476 } 477 *ack = true 478 return nil 479 } 480 481 func init_rpc_server(options ExecOptions) *DaraRpcServer{ 482 server := DaraRpcServer{options, log.New(os.Stdout, "[Overlord-RpcServer]", log.Lshortfile)} 483 return &server 484 } 485 486 func start_rpc_server(options ExecOptions) { 487 addr, err := net.ResolveTCPAddr("tcp", "0.0.0.0:45000") 488 // TODO: Maybe these errors should really be fatal errors 489 if err != nil { 490 l.Println("[Overlord] Failed to resolve TCP address for RPC Server") 491 return 492 } 493 inbound, err := net.ListenTCP("tcp", addr) 494 if err != nil { 495 l.Println("[Overlord] Failed to initialize inbound listener for RPC Server") 496 return 497 } 498 499 server := init_rpc_server(options) 500 rpc.Register(server) 501 rpc.Accept(inbound) 502 } 503 504 //Sets up the environment and scripts for benchmarking go programs 505 func go_setup(options ExecOptions) error { 506 dir := get_directory_from_path(options.Path) 507 err := copy_bench_script(dir) 508 if err != nil { 509 return err 510 } 511 err = build_target_program(dir) 512 if err != nil { 513 return err 514 } 515 set_environment(filepath.Base(dir)) 516 return nil 517 } 518 519 //Sets up the environment and scripts for running Dara 520 func setup(options ExecOptions, mode string) error { 521 dir := get_directory_from_path(options.Path) 522 set_dara_mode(mode) 523 err := set_log_level(options.LogLevel) 524 if err != nil { 525 return err 526 } 527 if options.Nanobenchmark { 528 set_nanobenchmark() 529 } 530 if options.Microbenchmark { 531 set_microbenchmark() 532 } 533 err = copy_launch_script(dir) 534 if err != nil { 535 return err 536 } 537 err = setup_shared_mem(options.SharedMemSize, dir) 538 if err != nil { 539 return err 540 } 541 build_script := options.Build.BuildScript 542 if build_script == "" { 543 err = build_target_program(dir) 544 if err != nil { 545 return err 546 } 547 } else { 548 err = execute_build_script(build_script, dir) 549 if err != nil { 550 return err 551 } 552 set_env_run_script(options.Build.RunScript) 553 } 554 set_environment(filepath.Base(dir)) 555 set_env_property_file(options.PropertyFile) 556 return nil 557 } 558 559 //Handler for recording executions 560 func record(options ExecOptions) error { 561 err := setup(options, "record") 562 if err != nil { 563 return err 564 } 565 cmd, err := start_global_scheduler("record", options.NumProcesses, options.SchedFile, "", 0, 0) 566 if err != nil { 567 return err 568 } 569 err = cmd.Wait() 570 return err 571 } 572 573 //Handler for replaying executions 574 func replay(options ExecOptions) error { 575 err := setup(options, "replay") 576 if err != nil { 577 return err 578 } 579 if options.PreloadReplay { 580 set_fast_replay() 581 } 582 cmd, err := start_global_scheduler("replay", options.NumProcesses, options.SchedFile, "", 0, 0) 583 if err != nil { 584 return err 585 } 586 err = cmd.Wait() 587 return err 588 } 589 590 func post_exploration_cleanup(options ExecOptions) error { 591 // Remove the explore_finish file which was used to terminate the exec_script 592 return os.Remove("./explore_finish") 593 } 594 595 //Handler for exploring state space of a program 596 func explore(options ExecOptions) error { 597 err := setup(options, "explore") 598 if err != nil { 599 return err 600 } 601 go start_rpc_server(options) 602 cmd, err := start_global_scheduler("explore", options.NumProcesses, options.SchedFile, options.Strategy, options.MaxDepth, options.MaxRuns) 603 if err != nil { 604 return err 605 } 606 err = cmd.Wait() 607 if err != nil { 608 return err 609 } 610 err = post_exploration_cleanup(options) 611 return err 612 } 613 614 //Handler for benchmarking between go and dgo 615 func bench(options ExecOptions, bOptions BenchOptions) error { 616 NUM_ITERATIONS := bOptions.Iterations 617 normal_vals := make([]float64, NUM_ITERATIONS) 618 record_vals := make([]float64, NUM_ITERATIONS) 619 replay_vals := make([]float64, NUM_ITERATIONS) 620 fast_replay_vals := make([]float64, NUM_ITERATIONS) 621 cwd, err := os.Getwd() 622 if err != nil { 623 return err 624 } 625 err = go_setup(options) 626 if err != nil { 627 return err 628 } 629 for i := 0; i < NUM_ITERATIONS; i++ { 630 fmt.Println("Normal Iteration #", i) 631 start := time.Now() 632 cmd, err := start_go_benchmark() 633 if err != nil { 634 return err 635 } 636 err = cmd.Wait() 637 normal_vals[i] = time.Since(start).Seconds() 638 if err != nil { 639 return err 640 } 641 } 642 //os.Setenv("BENCH_RECORD", "true") 643 for i := 0; i < NUM_ITERATIONS; i++ { 644 // Reset working directory 645 err = os.Chdir(cwd) 646 if err != nil { 647 return err 648 } 649 err = setup(options, "record") 650 if err != nil { 651 return err 652 } 653 fmt.Println("Record Iteration #", i) 654 start := time.Now() 655 cmd, err := start_global_scheduler("record", options.NumProcesses, options.SchedFile, "", 0, 0) 656 if err != nil { 657 return err 658 } 659 err = cmd.Wait() 660 record_vals[i] = time.Since(start).Seconds() 661 if err != nil { 662 return err 663 } 664 //dat, err := ioutil.ReadFile("record.tmp") 665 //if err != nil { 666 // return err 667 //} 668 //record_time, err := strconv.ParseFloat(strings.TrimSpace(string(dat)), 64) 669 //if err != nil { 670 // return err 671 //} 672 //record_vals[i] = record_time 673 if err != nil { 674 return err 675 } 676 } 677 //os.Unsetenv("BENCH_RECORD") 678 for i := 0; i < NUM_ITERATIONS; i++ { 679 // Reset working directory 680 err = os.Chdir(cwd) 681 if err != nil { 682 return err 683 } 684 err = setup(options, "replay") 685 if err != nil { 686 return err 687 } 688 fmt.Println("Replay Iteration #", i) 689 start := time.Now() 690 cmd, err := start_global_scheduler("replay", options.NumProcesses, options.SchedFile, "", 0, 0) 691 if err != nil { 692 return err 693 } 694 err = cmd.Wait() 695 replay_vals[i] = time.Since(start).Seconds() 696 if err != nil { 697 return err 698 } 699 } 700 if options.PreloadReplay { 701 for i := 0; i < NUM_ITERATIONS; i++ { 702 err = os.Chdir(cwd) 703 if err != nil { 704 return err 705 } 706 err = setup(options, "replay") 707 if err != nil { 708 return err 709 } 710 set_fast_replay() 711 fmt.Println("Fast Replay Iteration #", i) 712 start := time.Now() 713 cmd, err := start_global_scheduler("replay", options.NumProcesses, options.SchedFile, "", 0, 0) 714 if err != nil { 715 return err 716 } 717 err = cmd.Wait() 718 fast_replay_vals[i] = time.Since(start).Seconds() 719 if err != nil { 720 return err 721 } 722 } 723 } 724 f, err := os.Create(bOptions.Outfile) 725 if err != nil { 726 return err 727 } 728 defer f.Close() 729 header_string := "Normal,Record,Replay" 730 if options.PreloadReplay { 731 header_string += ",Fast_Replay" 732 } 733 _, err = f.WriteString(header_string + "\n") 734 if err != nil { 735 return err 736 } 737 for i := 0; i < NUM_ITERATIONS; i++ { 738 val0 := normal_vals[i] 739 val1 := record_vals[i] 740 val2 := replay_vals[i] 741 s := fmt.Sprintf("%f,%f,%f", val0, val1, val2) 742 if options.PreloadReplay { 743 val3 := fast_replay_vals[i] 744 s = fmt.Sprintf("%s,%f", s, val3) 745 } 746 _, err = f.WriteString(s + "\n") 747 if err != nil { 748 return err 749 } 750 } 751 return nil 752 } 753 754 //Parse the config file provided by command line 755 func parse_options(optionsFile string) (options Options, err error) { 756 file, err := os.Open(optionsFile) 757 if err != nil { 758 return options, err 759 } 760 defer file.Close() 761 bytes, err := ioutil.ReadAll(file) 762 if err != nil { 763 return options, err 764 } 765 json.Unmarshal(bytes, &options) 766 return options, nil 767 } 768 769 func main() { 770 modePtr := flag.String("mode", "", "The action that needs to be performed : record, replay, explore, instrument, benchmark") 771 filePtr := flag.String("optFile", "", "json file containing the configuration options") 772 773 flag.Parse() 774 775 l = log.New(os.Stdout, "[Overlord]", log.Lshortfile) 776 777 if *modePtr == "" || *filePtr == "" { 778 l.Fatal("Usage : go run overlord.go -mode=[record,replay,explore,instrument] -optFile=<path_to_options_file>") 779 } 780 781 options, err := parse_options(*filePtr) 782 if err != nil { 783 l.Fatal(err) 784 } 785 786 if *modePtr == "instrument" { 787 err := instrument(options.Instr) 788 if err != nil { 789 l.Fatal("Failed to instrument file : ", err) 790 } 791 } else if *modePtr == "record" { 792 err := record(options.Exec) 793 if err != nil { 794 l.Fatal("Failed to record execution : ", err) 795 } 796 } else if *modePtr == "replay" { 797 err := replay(options.Exec) 798 if err != nil { 799 l.Fatal("Failed to replay execution : ", err) 800 } 801 } else if *modePtr == "explore" { 802 err := explore(options.Exec) 803 if err != nil { 804 l.Fatal("Failed to explore : ", err) 805 } 806 } else if *modePtr == "bench" { 807 err := bench(options.Exec, options.Bench) 808 if err != nil { 809 l.Fatal("Failed to bench : ", err) 810 } 811 } else { 812 l.Fatal("Invalid mode") 813 } 814 }