github.com/cloud-foundations/dominator@v0.0.0-20221004181915-6e4fee580046/cmd/subd/main.go (about) 1 package main 2 3 import ( 4 "flag" 5 "fmt" 6 "io" 7 "os" 8 "os/signal" 9 "path" 10 "runtime" 11 "strconv" 12 "strings" 13 "syscall" 14 15 "github.com/Cloud-Foundations/Dominator/lib/constants" 16 "github.com/Cloud-Foundations/Dominator/lib/cpulimiter" 17 "github.com/Cloud-Foundations/Dominator/lib/filter" 18 "github.com/Cloud-Foundations/Dominator/lib/flags/loadflags" 19 "github.com/Cloud-Foundations/Dominator/lib/flagutil" 20 "github.com/Cloud-Foundations/Dominator/lib/format" 21 "github.com/Cloud-Foundations/Dominator/lib/fsbench" 22 "github.com/Cloud-Foundations/Dominator/lib/fsrateio" 23 "github.com/Cloud-Foundations/Dominator/lib/html" 24 "github.com/Cloud-Foundations/Dominator/lib/log/serverlogger" 25 "github.com/Cloud-Foundations/Dominator/lib/memstats" 26 "github.com/Cloud-Foundations/Dominator/lib/netspeed" 27 "github.com/Cloud-Foundations/Dominator/lib/rateio" 28 "github.com/Cloud-Foundations/Dominator/lib/srpc/setupserver" 29 "github.com/Cloud-Foundations/Dominator/lib/wsyscall" 30 "github.com/Cloud-Foundations/Dominator/proto/sub" 31 "github.com/Cloud-Foundations/Dominator/sub/httpd" 32 "github.com/Cloud-Foundations/Dominator/sub/rpcd" 33 "github.com/Cloud-Foundations/Dominator/sub/scanner" 34 "github.com/Cloud-Foundations/tricorder/go/tricorder" 35 "github.com/Cloud-Foundations/tricorder/go/tricorder/units" 36 ) 37 38 var ( 39 configDirectory = flag.String("configDirectory", "/etc/subd/conf.d", 40 "Directory of optional JSON configuration files") 41 defaultCpuPercent = flag.Uint("defaultCpuPercent", 0, 42 "CPU speed as percentage of capacity (default 50)") 43 defaultNetworkSpeedPercent = flag.Uint("defaultNetworkSpeedPercent", 0, 44 "Network speed as percentage of capacity (default 10)") 45 defaultScanSpeedPercent = flag.Uint("defaultScanSpeedPercent", 0, 46 "Scan speed as percentage of capacity (default 2)") 47 maxThreads = flag.Uint("maxThreads", 1, 48 "Maximum number of parallel OS threads to use") 49 permitInsecureMode = flag.Bool("permitInsecureMode", false, 50 "If true, run in insecure mode. This gives remote root access to all") 51 pidfile = flag.String("pidfile", "/var/run/subd.pid", 52 "Name of file to write my PID to") 53 portNum = flag.Uint("portNum", constants.SubPortNumber, 54 "Port number to allocate and listen on for HTTP/RPC") 55 rootDeviceBytesPerSecond flagutil.Size 56 rootDir = flag.String("rootDir", "/", 57 "Name of root of directory tree to manage") 58 scanExcludeList flagutil.StringList 59 showStats = flag.Bool("showStats", false, 60 "If true, show statistics after each cycle") 61 subdDir = flag.String("subdDir", ".subd", 62 "Name of subd private directory, relative to rootDir. This must be on the same file-system as rootDir") 63 testExternallyPatchable = flag.Bool("testExternallyPatchable", false, 64 "If true, test if externally patchable and exit=0 if so or exit=1 if not") 65 unshare = flag.Bool("unshare", true, "Internal use only.") 66 ) 67 68 func init() { 69 runtime.LockOSThread() 70 flag.Var(&rootDeviceBytesPerSecond, "rootDeviceBytesPerSecond", 71 "Fallback root device speed (default 0)") 72 flag.Var(&scanExcludeList, "scanExcludeList", 73 `Comma separated list of patterns to exclude from scanning (default `+strings.Join(constants.ScanExcludeList, ",")+`")`) 74 } 75 76 func sanityCheck() bool { 77 r_devnum, err := fsbench.GetDevnumForFile(*rootDir) 78 if err != nil { 79 fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n", 80 *rootDir, err) 81 return false 82 } 83 subdDirPathname := path.Join(*rootDir, *subdDir) 84 s_devnum, err := fsbench.GetDevnumForFile(subdDirPathname) 85 if err != nil { 86 fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n", 87 subdDirPathname, err) 88 return false 89 } 90 if r_devnum != s_devnum { 91 fmt.Fprintf(os.Stderr, 92 "rootDir and subdDir must be on the same file-system\n") 93 return false 94 } 95 return true 96 } 97 98 func createDirectory(dirname string) bool { 99 if err := os.MkdirAll(dirname, 0750); err != nil { 100 fmt.Fprintf(os.Stderr, "Unable to create directory: %s: %s\n", 101 dirname, err) 102 return false 103 } 104 return true 105 } 106 107 func mountTmpfs(dirname string) bool { 108 var statfs syscall.Statfs_t 109 if err := syscall.Statfs(dirname, &statfs); err != nil { 110 fmt.Fprintf(os.Stderr, "Unable to create Statfs: %s: %s\n", 111 dirname, err) 112 return false 113 } 114 if statfs.Type != 0x01021994 { 115 err := wsyscall.Mount("none", dirname, "tmpfs", 0, 116 "size=65536,mode=0750") 117 if err == nil { 118 fmt.Printf("Mounted tmpfs on: %s\n", dirname) 119 } else { 120 fmt.Fprintf(os.Stderr, "Unable to mount tmpfs on: %s: %s\n", 121 dirname, err) 122 return false 123 } 124 } 125 return true 126 } 127 128 func unshareAndBind(workingRootDir string) bool { 129 if *unshare { 130 // Re-exec myself using the unshare syscall while on a locked thread. 131 // This hack is required because syscall.Unshare() operates on only one 132 // thread in the process, and Go switches execution between threads 133 // randomly. Thus, the namespace can be suddenly switched for running 134 // code. This is an aspect of Go that was not well thought out. 135 runtime.LockOSThread() 136 if err := wsyscall.UnshareMountNamespace(); err != nil { 137 fmt.Fprintf(os.Stderr, "Unable to unshare mount namesace: %s\n", 138 err) 139 return false 140 } 141 // Ensure the process is slightly niced. Since the Linux implementation 142 // of setpriority(2) only applies to a thread, not the whole process 143 // (contrary to the POSIX specification), do this in the pinned OS 144 // thread so that the whole process (after exec) will be niced. 145 syscall.Setpriority(syscall.PRIO_PROCESS, 0, 1) 146 args := append(os.Args, "-unshare=false") 147 if err := syscall.Exec(args[0], args, os.Environ()); err != nil { 148 fmt.Fprintf(os.Stderr, "Unable to Exec:%s: %s\n", args[0], err) 149 return false 150 } 151 } 152 syscall.Unmount(workingRootDir, 0) 153 err := wsyscall.Mount(*rootDir, workingRootDir, "", wsyscall.MS_BIND, "") 154 if err != nil { 155 fmt.Fprintf(os.Stderr, "Unable to bind mount %s to %s: %s\n", 156 *rootDir, workingRootDir, err) 157 return false 158 } 159 // Clean up -unshare=false so that a subsequent re-exec starts from scratch. 160 args := make([]string, 0, len(os.Args)-1) 161 for _, arg := range os.Args { 162 if arg != "-unshare=false" { 163 args = append(args, arg) 164 } 165 } 166 os.Args = args 167 return true 168 } 169 170 func getCachedFsSpeed(workingRootDir string, 171 cacheDirname string) (bytesPerSecond, blocksPerSecond uint64, 172 computed, ok bool) { 173 bytesPerSecond = 0 174 blocksPerSecond = 0 175 devnum, err := fsbench.GetDevnumForFile(workingRootDir) 176 if err != nil { 177 fmt.Fprintf(os.Stderr, "Unable to get device number for: %s: %s\n", 178 workingRootDir, err) 179 return 0, 0, false, false 180 } 181 fsbenchDir := path.Join(cacheDirname, "fsbench") 182 if !createDirectory(fsbenchDir) { 183 return 0, 0, false, false 184 } 185 cacheFilename := path.Join(fsbenchDir, strconv.FormatUint(devnum, 16)) 186 file, err := os.Open(cacheFilename) 187 if err == nil { 188 n, err := fmt.Fscanf(file, "%d %d", &bytesPerSecond, &blocksPerSecond) 189 file.Close() 190 if n == 2 || err == nil { 191 return bytesPerSecond, blocksPerSecond, false, true 192 } 193 } 194 bytesPerSecond, blocksPerSecond, err = fsbench.GetReadSpeed(workingRootDir) 195 if err != nil { 196 fmt.Fprintf(os.Stderr, "Unable to measure read speed: %s\n", err) 197 return 0, 0, true, false 198 } 199 file, err = os.Create(cacheFilename) 200 if err != nil { 201 fmt.Fprintf(os.Stderr, "Unable to open: %s for write: %s\n", 202 cacheFilename, err) 203 return 0, 0, true, false 204 } 205 fmt.Fprintf(file, "%d %d\n", bytesPerSecond, blocksPerSecond) 206 file.Close() 207 return bytesPerSecond, blocksPerSecond, true, true 208 } 209 210 func publishFsSpeed(bytesPerSecond, blocksPerSecond uint64) { 211 tricorder.RegisterMetric("/root-read-speed", &bytesPerSecond, 212 units.BytePerSecond, "read speed of root file-system media") 213 tricorder.RegisterMetric("/root-block-read-speed", &blocksPerSecond, 214 units.None, "read speed of root file-system media in blocks/second") 215 } 216 217 func getCachedNetworkSpeed(cacheFilename string) uint64 { 218 if speed, ok := netspeed.GetSpeedToHost(""); ok { 219 return speed 220 } 221 file, err := os.Open(cacheFilename) 222 if err != nil { 223 return 0 224 } 225 defer file.Close() 226 var bytesPerSecond uint64 227 n, err := fmt.Fscanf(file, "%d", &bytesPerSecond) 228 if n == 1 || err == nil { 229 return bytesPerSecond 230 } 231 return 0 232 } 233 234 type DumpableFileSystemHistory struct { 235 fsh *scanner.FileSystemHistory 236 } 237 238 func (fsh *DumpableFileSystemHistory) WriteHtml(writer io.Writer) { 239 fs := fsh.fsh.FileSystem() 240 if fs == nil { 241 return 242 } 243 fmt.Fprintln(writer, "<pre>") 244 fs.List(writer) 245 fmt.Fprintln(writer, "</pre>") 246 } 247 248 func gracefulCleanup() { 249 os.Remove(*pidfile) 250 os.Exit(1) 251 } 252 253 func writePidfile() { 254 file, err := os.Create(*pidfile) 255 if err != nil { 256 fmt.Fprintln(os.Stderr, err.Error()) 257 os.Exit(1) 258 } 259 defer file.Close() 260 fmt.Fprintln(file, os.Getpid()) 261 } 262 263 func main() { 264 if err := loadflags.LoadForDaemon("subd"); err != nil { 265 fmt.Fprintln(os.Stderr, err) 266 os.Exit(1) 267 } 268 flag.Parse() 269 if *testExternallyPatchable { 270 runTestAndExit(checkExternallyPatchable) 271 } 272 tricorder.RegisterFlags() 273 subdDirPathname := path.Join(*rootDir, *subdDir) 274 workingRootDir := path.Join(subdDirPathname, "root") 275 objectsDir := path.Join(workingRootDir, *subdDir, "objects") 276 tmpDir := path.Join(subdDirPathname, "tmp") 277 netbenchFilename := path.Join(subdDirPathname, "netbench") 278 oldTriggersFilename := path.Join(subdDirPathname, "triggers.previous") 279 if !createDirectory(workingRootDir) { 280 os.Exit(1) 281 } 282 if !sanityCheck() { 283 os.Exit(1) 284 } 285 if !createDirectory(tmpDir) { 286 os.Exit(1) 287 } 288 if !mountTmpfs(tmpDir) { 289 os.Exit(1) 290 } 291 if !unshareAndBind(workingRootDir) { 292 os.Exit(1) 293 } 294 if !createDirectory(objectsDir) { 295 os.Exit(1) 296 } 297 runtime.GOMAXPROCS(int(*maxThreads)) 298 logger := serverlogger.New("") 299 if err := setupserver.SetupTls(); err != nil { 300 if *permitInsecureMode { 301 logger.Println(err) 302 } else { 303 logger.Fatalln(err) 304 } 305 } 306 bytesPerSecond, blocksPerSecond, firstScan, ok := getCachedFsSpeed( 307 workingRootDir, tmpDir) 308 if !ok { 309 if rootDeviceBytesPerSecond < 1<<20 { 310 os.Exit(1) 311 } 312 bytesPerSecond = uint64(rootDeviceBytesPerSecond) 313 blocksPerSecond = bytesPerSecond >> 9 314 logger.Printf("Falling back to -rootDeviceBytesPerSecond option: %s\n", 315 format.FormatBytes(bytesPerSecond)) 316 } 317 publishFsSpeed(bytesPerSecond, blocksPerSecond) 318 configParams := sub.Configuration{} 319 loadConfiguration(*configDirectory, &configParams, logger) 320 // Command-line flags override file configuration. 321 if *defaultCpuPercent > 0 { 322 configParams.CpuPercent = *defaultCpuPercent 323 } 324 if *defaultNetworkSpeedPercent > 0 { 325 configParams.NetworkSpeedPercent = *defaultNetworkSpeedPercent 326 } 327 if *defaultScanSpeedPercent > 0 { 328 configParams.ScanSpeedPercent = *defaultScanSpeedPercent 329 } 330 var configuration scanner.Configuration 331 configuration.CpuLimiter = cpulimiter.New(100) 332 configuration.DefaultCpuPercent = configParams.CpuPercent 333 // Apply built-in defaults if nothing specified. 334 if configuration.DefaultCpuPercent < 1 { 335 configuration.DefaultCpuPercent = constants.DefaultCpuPercent 336 go adjustVcpuLimit(&configuration.DefaultCpuPercent, logger) 337 } 338 if configParams.NetworkSpeedPercent < 1 { 339 configParams.NetworkSpeedPercent = constants.DefaultNetworkSpeedPercent 340 } 341 if configParams.ScanSpeedPercent < 1 { 342 configParams.ScanSpeedPercent = constants.DefaultScanSpeedPercent 343 } 344 filterLines := configParams.ScanExclusionList 345 if len(scanExcludeList) > 0 { 346 filterLines = scanExcludeList 347 } 348 if len(filterLines) < 1 { 349 filterLines = constants.ScanExcludeList 350 } 351 var err error 352 configuration.ScanFilter, err = filter.New(filterLines) 353 if err != nil { 354 fmt.Fprintf(os.Stderr, "Unable to set initial scan exclusions: %s\n", 355 err) 356 os.Exit(1) 357 } 358 configuration.FsScanContext = fsrateio.NewReaderContext(bytesPerSecond, 359 blocksPerSecond, uint64(configParams.ScanSpeedPercent)) 360 defaultSpeed := configuration.FsScanContext.GetContext().SpeedPercent() 361 if firstScan { 362 configuration.FsScanContext.GetContext().SetSpeedPercent(100) 363 } 364 if *showStats { 365 fmt.Println(configuration.FsScanContext) 366 } 367 var fsh scanner.FileSystemHistory 368 mainFunc := func(fsChannel <-chan *scanner.FileSystem, 369 disableScanner func(disableScanner bool)) { 370 networkReaderContext := rateio.NewReaderContext( 371 getCachedNetworkSpeed(netbenchFilename), 372 uint64(configParams.NetworkSpeedPercent), &rateio.ReadMeasurer{}) 373 configuration.NetworkReaderContext = networkReaderContext 374 invalidateNextScanObjectCache := false 375 rpcdHtmlWriter := 376 rpcd.Setup(&configuration, &fsh, objectsDir, 377 workingRootDir, networkReaderContext, netbenchFilename, 378 oldTriggersFilename, disableScanner, 379 func() { 380 invalidateNextScanObjectCache = true 381 fsh.UpdateObjectCacheOnly() 382 }, 383 logger) 384 configMetricsDir, err := tricorder.RegisterDirectory("/config") 385 if err != nil { 386 fmt.Fprintf(os.Stderr, 387 "Unable to create /config metrics directory: %s\n", 388 err) 389 os.Exit(1) 390 } 391 configuration.RegisterMetrics(configMetricsDir) 392 if err != nil { 393 fmt.Fprintf(os.Stderr, "Unable to create config metrics: %s\n", err) 394 os.Exit(1) 395 } 396 httpd.AddHtmlWriter(rpcdHtmlWriter) 397 httpd.AddHtmlWriter(&fsh) 398 httpd.AddHtmlWriter(&configuration) 399 httpd.AddHtmlWriter(logger) 400 html.RegisterHtmlWriterForPattern("/dumpFileSystem", 401 "Scanned File System", 402 &DumpableFileSystemHistory{&fsh}) 403 if err = httpd.StartServer(*portNum, logger); err != nil { 404 fmt.Fprintf(os.Stderr, "Unable to create http server: %s\n", err) 405 os.Exit(1) 406 } 407 fsh.Update(nil) 408 sighupChannel := make(chan os.Signal, 1) 409 signal.Notify(sighupChannel, syscall.SIGHUP) 410 sigtermChannel := make(chan os.Signal, 1) 411 signal.Notify(sigtermChannel, syscall.SIGTERM, syscall.SIGINT) 412 writePidfile() 413 for iter := 0; true; { 414 select { 415 case <-sighupChannel: 416 logger.Printf("Caught SIGHUP: re-execing with: %v\n", os.Args) 417 logger.Flush() 418 err = syscall.Exec(os.Args[0], os.Args, os.Environ()) 419 if err != nil { 420 logger.Printf("Unable to Exec:%s: %s\n", os.Args[0], err) 421 } 422 case <-sigtermChannel: 423 logger.Printf("Caught SIGTERM: performing graceful cleanup\n") 424 logger.Flush() 425 gracefulCleanup() 426 case fs := <-fsChannel: 427 if *showStats { 428 fmt.Printf("Completed cycle: %d\n", iter) 429 } 430 if invalidateNextScanObjectCache { 431 fs.ScanObjectCache() 432 invalidateNextScanObjectCache = false 433 } 434 fsh.Update(fs) 435 iter++ 436 runtime.GC() // An opportune time to take out the garbage. 437 if *showStats { 438 fmt.Print(&fsh) // Use pointer to silence go vet. 439 fmt.Print(fsh.FileSystem()) 440 memstats.WriteMemoryStats(os.Stdout) 441 fmt.Println() 442 } 443 if firstScan { 444 configuration.FsScanContext.GetContext().SetSpeedPercent( 445 defaultSpeed) 446 firstScan = false 447 if *showStats { 448 fmt.Println(configuration.FsScanContext) 449 } 450 } 451 } 452 } 453 } 454 scanner.StartScanning(workingRootDir, objectsDir, &configuration, logger, 455 mainFunc) 456 }