github.com/Cloud-Foundations/Dominator@v0.3.4/cmd/subd/main.go (about) 1 package main 2 3 import ( 4 "flag" 5 "fmt" 6 "io" 7 "os" 8 "os/signal" 9 "path" 10 "runtime" 11 "strconv" 12 "strings" 13 "syscall" 14 15 "github.com/Cloud-Foundations/Dominator/lib/constants" 16 "github.com/Cloud-Foundations/Dominator/lib/cpulimiter" 17 "github.com/Cloud-Foundations/Dominator/lib/filter" 18 "github.com/Cloud-Foundations/Dominator/lib/flags/loadflags" 19 "github.com/Cloud-Foundations/Dominator/lib/flagutil" 20 "github.com/Cloud-Foundations/Dominator/lib/format" 21 "github.com/Cloud-Foundations/Dominator/lib/fsbench" 22 "github.com/Cloud-Foundations/Dominator/lib/fsrateio" 23 "github.com/Cloud-Foundations/Dominator/lib/goroutine" 24 "github.com/Cloud-Foundations/Dominator/lib/html" 25 "github.com/Cloud-Foundations/Dominator/lib/log/serverlogger" 26 "github.com/Cloud-Foundations/Dominator/lib/memstats" 27 "github.com/Cloud-Foundations/Dominator/lib/netspeed" 28 "github.com/Cloud-Foundations/Dominator/lib/rateio" 29 "github.com/Cloud-Foundations/Dominator/lib/srpc" 30 "github.com/Cloud-Foundations/Dominator/lib/srpc/setupserver" 31 "github.com/Cloud-Foundations/Dominator/lib/wsyscall" 32 "github.com/Cloud-Foundations/Dominator/proto/sub" 33 "github.com/Cloud-Foundations/Dominator/sub/httpd" 34 "github.com/Cloud-Foundations/Dominator/sub/rpcd" 35 "github.com/Cloud-Foundations/Dominator/sub/scanner" 36 "github.com/Cloud-Foundations/tricorder/go/tricorder" 37 "github.com/Cloud-Foundations/tricorder/go/tricorder/units" 38 ) 39 40 var ( 41 configDirectory = flag.String("configDirectory", "/etc/subd/conf.d", 42 "Directory of optional JSON configuration files") 43 defaultCpuPercent = flag.Uint("defaultCpuPercent", 0, 44 "CPU speed as percentage of capacity (default 50)") 45 defaultNetworkSpeedPercent = flag.Uint("defaultNetworkSpeedPercent", 0, 46 "Network speed as percentage of capacity (default 10)") 47 defaultScanSpeedPercent = flag.Uint("defaultScanSpeedPercent", 0, 48 "Scan speed as percentage of capacity (default 2)") 49 disruptionManager = flag.String("disruptionManager", "", 50 "Path to DisruptionManager tool") 51 maxThreads = flag.Uint("maxThreads", 1, 52 "Maximum number of parallel OS threads to use") 53 noteGenerator = flag.String("noteGenerator", "", 54 "Optional command to run (usually after succesful update) to generate a short note") 55 permitInsecureMode = flag.Bool("permitInsecureMode", false, 56 "If true, run in insecure mode. This gives remote root access to all") 57 pidfile = flag.String("pidfile", "/var/run/subd.pid", 58 "Name of file to write my PID to") 59 portNum = flag.Uint("portNum", constants.SubPortNumber, 60 "Port number to allocate and listen on for HTTP/RPC") 61 rootDeviceBytesPerSecond flagutil.Size 62 rootDir = flag.String("rootDir", "/", 63 "Name of root of directory tree to manage") 64 scanExcludeList flagutil.StringList 65 showStats = flag.Bool("showStats", false, 66 "If true, show statistics after each cycle") 67 subdDir = flag.String("subdDir", ".subd", 68 "Name of subd private directory, relative to rootDir. This must be on the same file-system as rootDir") 69 testExternallyPatchable = flag.Bool("testExternallyPatchable", false, 70 "If true, test if externally patchable and exit=0 if so or exit=1 if not") 71 ) 72 73 func init() { 74 // Ensure the main goroutine runs on the startup thread. 75 runtime.LockOSThread() 76 flag.Var(&rootDeviceBytesPerSecond, "rootDeviceBytesPerSecond", 77 "Fallback root device speed (default 0)") 78 flag.Var(&scanExcludeList, "scanExcludeList", 79 `Comma separated list of patterns to exclude from scanning (default `+strings.Join(constants.ScanExcludeList, ",")+`")`) 80 } 81 82 func sanityCheck() bool { 83 r_devnum, err := fsbench.GetDevnumForFile(*rootDir) 84 if err != nil { 85 fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n", 86 *rootDir, err) 87 return false 88 } 89 subdDirPathname := path.Join(*rootDir, *subdDir) 90 s_devnum, err := fsbench.GetDevnumForFile(subdDirPathname) 91 if err != nil { 92 fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n", 93 subdDirPathname, err) 94 return false 95 } 96 if r_devnum != s_devnum { 97 fmt.Fprintf(os.Stderr, 98 "rootDir and subdDir must be on the same file-system\n") 99 return false 100 } 101 return true 102 } 103 104 func createDirectory(dirname string) bool { 105 if err := os.MkdirAll(dirname, 0750); err != nil { 106 fmt.Fprintf(os.Stderr, "Unable to create directory: %s: %s\n", 107 dirname, err) 108 return false 109 } 110 return true 111 } 112 113 func mountTmpfs(dirname string) bool { 114 var statfs syscall.Statfs_t 115 if err := syscall.Statfs(dirname, &statfs); err != nil { 116 fmt.Fprintf(os.Stderr, "Unable to create Statfs: %s: %s\n", 117 dirname, err) 118 return false 119 } 120 if statfs.Type != 0x01021994 { 121 err := wsyscall.Mount("none", dirname, "tmpfs", 0, 122 "size=65536,mode=0750") 123 if err == nil { 124 fmt.Printf("Mounted tmpfs on: %s\n", dirname) 125 } else { 126 fmt.Fprintf(os.Stderr, "Unable to mount tmpfs on: %s: %s\n", 127 dirname, err) 128 return false 129 } 130 } 131 return true 132 } 133 134 func unshareAndBind(workingRootDir string) error { 135 if err := wsyscall.UnshareMountNamespace(); err != nil { 136 return fmt.Errorf("unable to unshare mount namesace: %s\n", err) 137 } 138 syscall.Unmount(workingRootDir, 0) 139 err := wsyscall.Mount(*rootDir, workingRootDir, "", wsyscall.MS_BIND, "") 140 if err != nil { 141 return fmt.Errorf("unable to bind mount %s to %s: %s\n", 142 *rootDir, workingRootDir, err) 143 } 144 return nil 145 } 146 147 func getCachedFsSpeed(workingRootDir string, 148 cacheDirname string) (bytesPerSecond, blocksPerSecond uint64, 149 computed, ok bool) { 150 bytesPerSecond = 0 151 blocksPerSecond = 0 152 devnum, err := fsbench.GetDevnumForFile(workingRootDir) 153 if err != nil { 154 fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n", 155 workingRootDir, err) 156 return 0, 0, false, false 157 } 158 fsbenchDir := path.Join(cacheDirname, "fsbench") 159 if !createDirectory(fsbenchDir) { 160 return 0, 0, false, false 161 } 162 cacheFilename := path.Join(fsbenchDir, strconv.FormatUint(devnum, 16)) 163 file, err := os.Open(cacheFilename) 164 if err == nil { 165 n, err := fmt.Fscanf(file, "%d %d", &bytesPerSecond, &blocksPerSecond) 166 file.Close() 167 if n == 2 || err == nil { 168 return bytesPerSecond, blocksPerSecond, false, true 169 } 170 } 171 bytesPerSecond, blocksPerSecond, err = fsbench.GetReadSpeed(workingRootDir) 172 if err != nil { 173 fmt.Fprintf(os.Stderr, "Unable to measure read speed: %s\n", err) 174 return 0, 0, true, false 175 } 176 file, err = os.Create(cacheFilename) 177 if err != nil { 178 fmt.Fprintf(os.Stderr, "Unable to open: %s for write: %s\n", 179 cacheFilename, err) 180 return 0, 0, true, false 181 } 182 fmt.Fprintf(file, "%d %d\n", bytesPerSecond, blocksPerSecond) 183 file.Close() 184 return bytesPerSecond, blocksPerSecond, true, true 185 } 186 187 func publishFsSpeed(bytesPerSecond, blocksPerSecond uint64) { 188 tricorder.RegisterMetric("/root-read-speed", &bytesPerSecond, 189 units.BytePerSecond, "read speed of root file-system media") 190 tricorder.RegisterMetric("/root-block-read-speed", &blocksPerSecond, 191 units.None, "read speed of root file-system media in blocks/second") 192 } 193 194 func getCachedNetworkSpeed(cacheFilename string) uint64 { 195 if speed, ok := netspeed.GetSpeedToHost(""); ok { 196 return speed 197 } 198 file, err := os.Open(cacheFilename) 199 if err != nil { 200 return 0 201 } 202 defer file.Close() 203 var bytesPerSecond uint64 204 n, err := fmt.Fscanf(file, "%d", &bytesPerSecond) 205 if n == 1 || err == nil { 206 return bytesPerSecond 207 } 208 return 0 209 } 210 211 type DumpableFileSystemHistory struct { 212 fsh *scanner.FileSystemHistory 213 } 214 215 func (fsh *DumpableFileSystemHistory) WriteHtml(writer io.Writer) { 216 fs := fsh.fsh.FileSystem() 217 if fs == nil { 218 return 219 } 220 fmt.Fprintln(writer, "<pre>") 221 fs.List(writer) 222 fmt.Fprintln(writer, "</pre>") 223 } 224 225 func gracefulCleanup() { 226 os.Remove(*pidfile) 227 os.Exit(1) 228 } 229 230 func writePidfile() { 231 file, err := os.Create(*pidfile) 232 if err != nil { 233 fmt.Fprintln(os.Stderr, err.Error()) 234 os.Exit(1) 235 } 236 defer file.Close() 237 fmt.Fprintln(file, os.Getpid()) 238 } 239 240 func main() { 241 // Ensure the startup thread is reserved for the main function. 242 runtime.LockOSThread() 243 if err := loadflags.LoadForDaemon("subd"); err != nil { 244 fmt.Fprintln(os.Stderr, err) 245 os.Exit(1) 246 } 247 flag.Parse() 248 if *testExternallyPatchable { 249 runTestAndExit(checkExternallyPatchable) 250 } 251 if err := wsyscall.SetMyPriority(1); err != nil { 252 fmt.Fprintln(os.Stderr, err) 253 os.Exit(1) 254 } 255 tricorder.RegisterFlags() 256 subdDirPathname := path.Join(*rootDir, *subdDir) 257 workingRootDir := path.Join(subdDirPathname, "root") 258 objectsDir := path.Join(workingRootDir, *subdDir, "objects") 259 tmpDir := path.Join(subdDirPathname, "tmp") 260 netbenchFilename := path.Join(subdDirPathname, "netbench") 261 oldTriggersFilename := path.Join(subdDirPathname, "triggers.previous") 262 if !createDirectory(workingRootDir) { 263 os.Exit(1) 264 } 265 if !sanityCheck() { 266 os.Exit(1) 267 } 268 if !createDirectory(tmpDir) { 269 os.Exit(1) 270 } 271 if !mountTmpfs(tmpDir) { 272 os.Exit(1) 273 } 274 // Create a goroutine for performing updates. 275 workdirGoroutine := goroutine.New() 276 var err error 277 workdirGoroutine.Run(func() { err = unshareAndBind(workingRootDir) }) 278 if err != nil { 279 fmt.Fprintln(os.Stderr, err) 280 os.Exit(1) 281 } 282 runtime.GOMAXPROCS(int(*maxThreads)) 283 logger := serverlogger.New("") 284 srpc.SetDefaultLogger(logger) 285 params := setupserver.Params{Logger: logger} 286 if err := setupserver.SetupTlsWithParams(params); err != nil { 287 if *permitInsecureMode { 288 logger.Println(err) 289 } else { 290 logger.Fatalln(err) 291 } 292 } 293 bytesPerSecond, blocksPerSecond, firstScan, ok := getCachedFsSpeed( 294 workingRootDir, tmpDir) 295 if !ok { 296 if rootDeviceBytesPerSecond < 1<<20 { 297 os.Exit(1) 298 } 299 bytesPerSecond = uint64(rootDeviceBytesPerSecond) 300 blocksPerSecond = bytesPerSecond >> 9 301 logger.Printf("Falling back to -rootDeviceBytesPerSecond option: %s\n", 302 format.FormatBytes(bytesPerSecond)) 303 } 304 publishFsSpeed(bytesPerSecond, blocksPerSecond) 305 configParams := sub.Configuration{} 306 loadConfiguration(*configDirectory, &configParams, logger) 307 // Command-line flags override file configuration. 308 if *defaultCpuPercent > 0 { 309 configParams.CpuPercent = *defaultCpuPercent 310 } 311 if *defaultNetworkSpeedPercent > 0 { 312 configParams.NetworkSpeedPercent = *defaultNetworkSpeedPercent 313 } 314 if *defaultScanSpeedPercent > 0 { 315 configParams.ScanSpeedPercent = *defaultScanSpeedPercent 316 } 317 var configuration scanner.Configuration 318 configuration.CpuLimiter = cpulimiter.New(100) 319 configuration.DefaultCpuPercent = configParams.CpuPercent 320 // Apply built-in defaults if nothing specified. 321 if configuration.DefaultCpuPercent < 1 { 322 configuration.DefaultCpuPercent = constants.DefaultCpuPercent 323 go adjustVcpuLimit(&configuration.DefaultCpuPercent, logger) 324 } 325 if configParams.NetworkSpeedPercent < 1 { 326 configParams.NetworkSpeedPercent = constants.DefaultNetworkSpeedPercent 327 } 328 if configParams.ScanSpeedPercent < 1 { 329 configParams.ScanSpeedPercent = constants.DefaultScanSpeedPercent 330 } 331 filterLines := configParams.ScanExclusionList 332 if len(scanExcludeList) > 0 { 333 filterLines = scanExcludeList 334 } 335 if len(filterLines) < 1 { 336 filterLines = constants.ScanExcludeList 337 } 338 configuration.ScanFilter, err = filter.New(filterLines) 339 if err != nil { 340 fmt.Fprintf(os.Stderr, "Unable to set initial scan exclusions: %s\n", 341 err) 342 os.Exit(1) 343 } 344 configuration.FsScanContext = fsrateio.NewReaderContext(bytesPerSecond, 345 blocksPerSecond, uint64(configParams.ScanSpeedPercent)) 346 defaultSpeed := configuration.FsScanContext.GetContext().SpeedPercent() 347 if firstScan { 348 configuration.FsScanContext.GetContext().SetSpeedPercent(100) 349 } 350 if *showStats { 351 fmt.Println(configuration.FsScanContext) 352 } 353 var fsh scanner.FileSystemHistory 354 mainFunc := func(fsChannel <-chan *scanner.FileSystem, 355 disableScanner func(disableScanner bool)) { 356 networkReaderContext := rateio.NewReaderContext( 357 getCachedNetworkSpeed(netbenchFilename), 358 uint64(configParams.NetworkSpeedPercent), &rateio.ReadMeasurer{}) 359 configuration.NetworkReaderContext = networkReaderContext 360 invalidateNextScanObjectCache := false 361 rescanFunc := func() { 362 invalidateNextScanObjectCache = true 363 if err := fsh.UpdateObjectCacheOnly(); err != nil { 364 logger.Printf("Error updating object cache: %s\n", err) 365 } 366 } 367 rpcdHtmlWriter := rpcd.Setup( 368 rpcd.Config{ 369 DisruptionManager: *disruptionManager, 370 NetworkBenchmarkFilename: netbenchFilename, 371 NoteGeneratorCommand: *noteGenerator, 372 ObjectsDirectoryName: objectsDir, 373 OldTriggersFilename: oldTriggersFilename, 374 RootDirectoryName: workingRootDir, 375 SubConfiguration: configParams, 376 }, 377 rpcd.Params{ 378 DisableScannerFunction: disableScanner, 379 FileSystemHistory: &fsh, 380 Logger: logger, 381 NetworkReaderContext: networkReaderContext, 382 RescanObjectCacheFunction: rescanFunc, 383 ScannerConfiguration: &configuration, 384 SubdDirectory: subdDirPathname, 385 WorkdirGoroutine: workdirGoroutine, 386 }) 387 configMetricsDir, err := tricorder.RegisterDirectory("/config") 388 if err != nil { 389 fmt.Fprintf(os.Stderr, 390 "Unable to create /config metrics directory: %s\n", 391 err) 392 os.Exit(1) 393 } 394 configuration.RegisterMetrics(configMetricsDir) 395 if err != nil { 396 fmt.Fprintf(os.Stderr, "Unable to create config metrics: %s\n", err) 397 os.Exit(1) 398 } 399 httpd.AddHtmlWriter(rpcdHtmlWriter) 400 httpd.AddHtmlWriter(&fsh) 401 httpd.AddHtmlWriter(&configuration) 402 httpd.AddHtmlWriter(logger) 403 html.RegisterHtmlWriterForPattern("/dumpFileSystem", 404 "Scanned File System", 405 &DumpableFileSystemHistory{&fsh}) 406 if err = httpd.StartServer(*portNum, logger); err != nil { 407 fmt.Fprintf(os.Stderr, "Unable to create http server: %s\n", err) 408 os.Exit(1) 409 } 410 fsh.Update(nil) 411 sighupChannel := make(chan os.Signal, 1) 412 signal.Notify(sighupChannel, syscall.SIGHUP) 413 sigtermChannel := make(chan os.Signal, 1) 414 signal.Notify(sigtermChannel, syscall.SIGTERM, syscall.SIGINT) 415 writePidfile() 416 for iter := 0; true; { 417 select { 418 case <-sighupChannel: 419 logger.Printf("Caught SIGHUP: re-execing with: %v\n", os.Args) 420 logger.Flush() 421 err = syscall.Exec(os.Args[0], os.Args, os.Environ()) 422 if err != nil { 423 logger.Printf("Unable to Exec:%s: %s\n", os.Args[0], err) 424 } 425 case <-sigtermChannel: 426 logger.Printf("Caught SIGTERM: performing graceful cleanup\n") 427 logger.Flush() 428 gracefulCleanup() 429 case fs := <-fsChannel: 430 if *showStats { 431 fmt.Printf("Completed cycle: %d\n", iter) 432 } 433 if invalidateNextScanObjectCache { 434 workdirGoroutine.Run(func() { 435 if err := fs.ScanObjectCache(); err != nil { 436 logger.Printf("Error scanning object cache: %s\n", 437 err) 438 } 439 }) 440 invalidateNextScanObjectCache = false 441 } 442 oldGenerationCount := fsh.GenerationCount() 443 oldScanCount := fsh.ScanCount() 444 fsh.Update(fs) 445 iter++ 446 generationCount := fsh.GenerationCount() 447 scanCount := fsh.ScanCount() 448 if generationCount != oldGenerationCount { 449 logger.Printf("Generation count: %d, scan count: %d\n", 450 generationCount, scanCount) 451 } else if scanCount != oldScanCount { 452 logger.Debugf(0, "Generation count: %d, scan count: %d\n", 453 generationCount, scanCount) 454 } 455 runtime.GC() // An opportune time to take out the garbage. 456 if *showStats { 457 fmt.Print(&fsh) // Use pointer to silence go vet. 458 fmt.Print(fsh.FileSystem()) 459 memstats.WriteMemoryStats(os.Stdout) 460 fmt.Println() 461 } 462 if firstScan { 463 configuration.FsScanContext.GetContext().SetSpeedPercent( 464 defaultSpeed) 465 firstScan = false 466 if *showStats { 467 fmt.Println(configuration.FsScanContext) 468 } 469 } 470 } 471 } 472 } 473 // Create a goroutine prior to mutating the startup thread to ensure that 474 // new goroutines are started from a "clean" thread. 475 mainGoroutine := goroutine.New() 476 // Setup environment for scanning. 477 if err := unshareAndBind(workingRootDir); err != nil { 478 logger.Fatalln(err) 479 } 480 if !createDirectory(objectsDir) { // Must be done after unshareAndBind(). 481 os.Exit(1) 482 } 483 scanner.StartScanning(workingRootDir, objectsDir, &configuration, logger, 484 func(fsChannel <-chan *scanner.FileSystem, 485 disableScanner func(disableScanner bool)) { 486 mainGoroutine.Start(func() { mainFunc(fsChannel, disableScanner) }) 487 }) 488 }