github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/cmd/tidy.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2021-2022 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 ******************************************************************************/ 25 26 package cmd 27 28 import ( 29 "errors" 30 "fmt" 31 "io/fs" 32 "os" 33 "path/filepath" 34 "syscall" 35 "time" 36 37 "github.com/spf13/cobra" 38 shutil "github.com/termie/go-shutil" 39 ) 40 41 // modeRW are the read-write permission bits for user, group and other. 42 const modeRW = 0666 43 44 const dgutDBsSuffix = "dgut.dbs" 45 const pathSizeDBsSuffix = "pathsize.dbs" 46 const dbsSentinelBasename = ".dbs.updated" 47 48 // options for this cmd. 49 var tidyDir string 50 var tidyDate string 51 52 // tidyCmd represents the tidy command. 53 var tidyCmd = &cobra.Command{ 54 Use: "tidy", 55 Short: "Tidy up multi output.", 56 Long: `Tidy up multi output. 57 58 This is called by 'wrstat multi' after the main work has completed. It moves 59 final output files from the supplied unique working directory to the 60 --final_output directory, then deletes the working direcory. 61 62 multi creates a unique ("multi unique") directory, in which it creates 63 directories named after the basename of the directory of interest 64 ("interest basename"), in which it creates another unique directory ("interest 65 unique"), in which it creates the output files. 66 67 tidy assumes the working directory you give it is the "multi unique" from multi. 68 It probably won't do the right thing if not. 69 70 Final output files are named to include the given --date as follows: 71 [date]_[interest basename].[interest unique].[multi unique].[suffix] 72 73 Where [suffix] is one of 'stats.gz', 'byusergroup.gz', 'bygroup' or 'logs.gz'. 74 75 The base.dirs file directly inside the given "multi unique" directory is named: 76 [date]_[multi unique].basedirs 77 78 It also moves the combine.dgut.db directories to inside a directory named: 79 [date]_[multi unique].dgut.dbs 80 (making them sequentially numbered sub-directories) 81 82 Likewise, it moves the combine.pathsize.db files to inside a directory named: 83 [date]_[multi unique].pathsize.dbs 84 (making them sequentially numbered files) 85 86 Finally, it creates or touches a file named '.dgut.dbs.updated' in the 87 --final_output directory, giving it an mtime matching the oldest mtime of the 88 walk log files. 'wrstat server' will use this file to reload its database and 89 update its knowledge of when the data was captured. 90 91 The output files will be given the same user:group ownership and 92 user,group,other read & write permissions as the --final_output directory. 93 94 Once all output files have been moved, the "multi unique" directory is deleted. 95 96 It is safe to call this multiple times if it was, for example, killed half way 97 through; it won't clobber final outputs already moved.`, 98 Run: func(cmd *cobra.Command, args []string) { 99 if tidyDir == "" { 100 die("--final_output is required") 101 } 102 if len(args) != 1 { 103 die("exactly 1 unique working directory from 'wrstat multi' must be supplied") 104 } 105 106 destDir, err := filepath.Abs(tidyDir) 107 if err != nil { 108 die("could not determine absolute path to --final_output dir: %s", err) 109 } 110 111 err = os.MkdirAll(destDir, userOnlyPerm) 112 if err != nil { 113 die("failed to create --final_output dir [%s]: %s", destDir, err) 114 } 115 116 destDirInfo, err := os.Stat(destDir) 117 if err != nil { 118 die("could not stat the --final_output dir: %s", err) 119 } 120 121 sourceDir, err := filepath.Abs(args[0]) 122 if err != nil { 123 die("could not determine absolute path to source dir: %s", err) 124 } 125 126 err = moveAndDelete(sourceDir, destDir, destDirInfo, tidyDate) 127 if err != nil { 128 die("failed to tidy: %s", err) 129 } 130 }, 131 } 132 133 func init() { 134 RootCmd.AddCommand(tidyCmd) 135 136 // flags specific to this sub-command 137 tidyCmd.Flags().StringVarP(&tidyDir, "final_output", "f", "", "final output directory") 138 tidyCmd.Flags().StringVarP(&tidyDate, "date", "d", "", "datestamp of when 'wrstat multi' was called") 139 } 140 141 // moveAndDelete does the main work of this cmd. 142 func moveAndDelete(sourceDir, destDir string, destDirInfo fs.FileInfo, date string) error { 143 if err := findAndMoveOutputs(sourceDir, destDir, destDirInfo, date, 144 combineStatsOutputFileBasename, "stats.gz"); err != nil { 145 return err 146 } 147 148 if err := findAndMoveOutputs(sourceDir, destDir, destDirInfo, date, 149 combineUserGroupOutputFileBasename, "byusergroup.gz"); err != nil { 150 return err 151 } 152 153 if err := findAndMoveOutputs(sourceDir, destDir, destDirInfo, date, 154 combineGroupOutputFileBasename, "bygroup"); err != nil { 155 return err 156 } 157 158 if err := findAndMoveOutputs(sourceDir, destDir, destDirInfo, date, 159 combineLogOutputFileBasename, "logs.gz"); err != nil { 160 return err 161 } 162 163 if err := moveBaseDirsFile(sourceDir, destDir, destDirInfo, date); err != nil { 164 return err 165 } 166 167 if err := findAndMoveDBs(sourceDir, destDir, destDirInfo, date); err != nil { 168 return err 169 } 170 171 return os.RemoveAll(sourceDir) 172 } 173 174 // findAndMoveOutputs finds output files in the given sourceDir with given 175 // suffix and moves them to destDir, including date in the name, and adjusting 176 // ownership and permissions to match the destDir. 177 func findAndMoveOutputs(sourceDir, destDir string, destDirInfo fs.FileInfo, 178 date, inputSuffix, outputSuffix string) error { 179 outputPaths, err := filepath.Glob(fmt.Sprintf("%s/*/*/%s", sourceDir, inputSuffix)) 180 if err != nil { 181 return err 182 } 183 184 err = moveOutputs(outputPaths, destDir, destDirInfo, date, outputSuffix) 185 if err != nil { 186 return err 187 } 188 189 return nil 190 } 191 192 // moveOutputs calls moveOutput() on each outputPaths source file. 193 func moveOutputs(outputPaths []string, destDir string, destDirInfo fs.FileInfo, date, suffix string) error { 194 for _, path := range outputPaths { 195 err := moveOutput(path, destDir, destDirInfo, date, suffix) 196 if err != nil { 197 return err 198 } 199 } 200 201 return nil 202 } 203 204 // moveOutput moves an output file to the finalDir and changes its name to 205 // the correct format, then adjusts ownership and permissions to match the 206 // destDir. 207 func moveOutput(source string, destDir string, destDirInfo fs.FileInfo, date, suffix string) error { 208 interestUniqueDir := filepath.Dir(source) 209 interestBaseDir := filepath.Dir(interestUniqueDir) 210 multiUniqueDir := filepath.Dir(interestBaseDir) 211 dest := filepath.Join(destDir, fmt.Sprintf("%s_%s.%s.%s.%s", 212 date, 213 filepath.Base(interestBaseDir), 214 filepath.Base(interestUniqueDir), 215 filepath.Base(multiUniqueDir), 216 suffix)) 217 218 return renameAndMatchPerms(source, dest, destDirInfo) 219 } 220 221 // renameAndMatchPerms tries 2 ways to rename the file (resorting to a copy if 222 // this is across filesystem boundaries), then matches the dest file permissions 223 // to the given FileInfo. 224 // 225 // If source doesn't exist, but dest does, assumes the rename was done 226 // previously and just tries to match the permissions. 227 func renameAndMatchPerms(source, dest string, destDirInfo fs.FileInfo) error { 228 if _, err := os.Stat(source); errors.Is(err, os.ErrNotExist) { 229 if _, err = os.Stat(dest); err == nil { 230 return matchPerms(dest, destDirInfo) 231 } 232 } 233 234 err := os.Rename(source, dest) 235 if err != nil { 236 if err = shutil.CopyFile(source, dest, false); err != nil { 237 return err 238 } 239 } 240 241 return matchPerms(dest, destDirInfo) 242 } 243 244 // matchPerms ensures that the given file has the same ownership and read-write 245 // permissions as the given fileinfo. 246 func matchPerms(path string, desired fs.FileInfo) error { 247 current, err := os.Stat(path) 248 if err != nil { 249 return err 250 } 251 252 if err = matchOwnership(path, current, desired); err != nil { 253 return err 254 } 255 256 return matchReadWrite(path, current, desired) 257 } 258 259 // matchOwnership ensures that the given file with the current fileinfo has the 260 // same user and group ownership as the desired fileinfo. 261 func matchOwnership(path string, current, desired fs.FileInfo) error { 262 uid, gid := getUIDAndGID(current) 263 desiredUID, desiredGID := getUIDAndGID(desired) 264 265 if uid == desiredUID && gid == desiredGID { 266 return nil 267 } 268 269 return os.Lchown(path, desiredUID, desiredGID) 270 } 271 272 // getUIDAndGID extracts the UID and GID from a FileInfo. NB: this will only 273 // work on linux. 274 func getUIDAndGID(info fs.FileInfo) (int, int) { 275 return int(info.Sys().(*syscall.Stat_t).Uid), int(info.Sys().(*syscall.Stat_t).Gid) //nolint:forcetypeassert 276 } 277 278 // matchReadWrite ensures that the given file with the current fileinfo has the 279 // same user,group,other read&write permissions as the desired fileinfo. 280 func matchReadWrite(path string, current, desired fs.FileInfo) error { 281 currentMode := current.Mode() 282 currentRW := currentMode & modeRW 283 desiredRW := desired.Mode() & modeRW 284 285 if currentRW == desiredRW { 286 return nil 287 } 288 289 return os.Chmod(path, currentMode|desiredRW) 290 } 291 292 // moveBaseDirsFile moves the base.dirs file in sourceDir to a uniquely named 293 // .basedirs file in destDir that includes the given date. 294 func moveBaseDirsFile(sourceDir, destDir string, destDirInfo fs.FileInfo, date string) error { 295 source := filepath.Join(sourceDir, basedirBasename) 296 297 dest := filepath.Join(destDir, fmt.Sprintf("%s_%s.basedirs", 298 date, 299 filepath.Base(sourceDir))) 300 301 return renameAndMatchPerms(source, dest, destDirInfo) 302 } 303 304 // findAndMoveDBs finds the combine.dgut.db directories in the given sourceDir 305 // and moves them to a uniquely named dir in destDir that includes the given 306 // date, and adjusts ownership and permissions to match the destDir. 307 // 308 // Likewise, it finds the combine.pathsize.db files in the given sourceDir and 309 // moves them to a similarly named dir in destDir, with ownership and perms 310 // matched. 311 // 312 // It also touches a file that 'wrstat server' monitors to know when to reload 313 // its database files. It gives that file an mtime corresponding to the oldest 314 // mtime of the walk log files. 315 func findAndMoveDBs(sourceDir, destDir string, destDirInfo fs.FileInfo, date string) error { 316 err := findAndMoveSpecificDBs(combineDGUTOutputFileBasename, dgutDBsSuffix, 317 sourceDir, destDir, destDirInfo, date) 318 if err != nil { 319 return err 320 } 321 322 err = findAndMoveSpecificDBs(combinePathSizeOutputFileBasename, pathSizeDBsSuffix, 323 sourceDir, destDir, destDirInfo, date) 324 if err != nil { 325 return err 326 } 327 328 return touchDBUpdatedFile(sourceDir, destDir, destDirInfo) 329 } 330 331 func findAndMoveSpecificDBs(sourceBase, destSuffix, 332 sourceDir, destDir string, destDirInfo fs.FileInfo, date string) error { 333 sources, errg := filepath.Glob(fmt.Sprintf("%s/*/*/%s", sourceDir, sourceBase)) 334 if errg != nil { 335 return errg 336 } 337 338 dbsDir, err := makeDBsDir(sourceDir, destDir, destSuffix, destDirInfo, date) 339 if err != nil { 340 return err 341 } 342 343 for i, source := range sources { 344 if _, err = os.Stat(source); err != nil { 345 return err 346 } 347 348 dest := filepath.Join(dbsDir, fmt.Sprintf("%d", i)) 349 350 err = renameAndMatchPerms(source, dest, destDirInfo) 351 if err != nil { 352 return err 353 } 354 } 355 356 return matchPermsInsideDir(dbsDir, destDirInfo) 357 } 358 359 // makeDBsDir makes a uniquely named directory featuring the given date to hold 360 // database files in destDir. If it already exists, does nothing. Returns the 361 // path to the database directory and any error. 362 func makeDBsDir(sourceDir, destDir, suffix string, destDirInfo fs.FileInfo, date string) (string, error) { 363 dbsDir := filepath.Join(destDir, fmt.Sprintf("%s_%s.%s", 364 date, 365 filepath.Base(sourceDir), 366 suffix, 367 )) 368 369 err := os.Mkdir(dbsDir, destDirInfo.Mode().Perm()) 370 if os.IsExist(err) { 371 err = nil 372 } 373 374 return dbsDir, err 375 } 376 377 // matchPermsInsideDir does matchPerms for all the files in the given dir 378 // recursively. 379 func matchPermsInsideDir(dir string, desired fs.FileInfo) error { 380 return filepath.WalkDir(dir, func(path string, de fs.DirEntry, err error) error { 381 if err != nil { 382 return err 383 } 384 385 return matchPerms(path, desired) 386 }) 387 } 388 389 // touchDBUpdatedFile touches a file that the server monitors so that it knows 390 // to try and reload the databases. Matches the permissions of the touched file 391 // to the given permissions. Gives the file an mtime corresponding to the oldest 392 // mtime of walk log files. 393 func touchDBUpdatedFile(sourceDir, destDir string, desired fs.FileInfo) error { 394 sentinel := filepath.Join(destDir, dbsSentinelBasename) 395 396 oldest, err := getOldestMtimeOfWalkFiles(sourceDir) 397 if err != nil { 398 return err 399 } 400 401 _, err = os.Stat(sentinel) 402 if os.IsNotExist(err) { 403 if err = createFile(sentinel); err != nil { 404 return err 405 } 406 } 407 408 if err = touchFile(sentinel, oldest); err != nil { 409 return err 410 } 411 412 return matchPerms(sentinel, desired) 413 } 414 415 // createFile creates the given path. 416 func createFile(path string) error { 417 file, err := os.Create(path) 418 if err != nil { 419 return err 420 } 421 422 file.Close() 423 424 return nil 425 } 426 427 // touchFile updates the a&mtime of the given path to the given time. 428 func touchFile(path string, t time.Time) error { 429 return os.Chtimes(path, t.Local(), t.Local()) 430 } 431 432 // getOldestMtimeOfWalkFiles looks in sourceDir for walk log files and returns 433 // their oldest mtime. 434 func getOldestMtimeOfWalkFiles(dir string) (time.Time, error) { 435 paths, err := filepath.Glob(fmt.Sprintf("%s/*/*/*%s", dir, statLogOutputFileSuffix)) 436 if err != nil || len(paths) == 0 { 437 die("failed to find walk log files based on [%s/*/*/*%s] (err: %s)", dir, statLogOutputFileSuffix, err) 438 } 439 440 oldestT := time.Now() 441 442 for _, path := range paths { 443 info, err := os.Stat(path) 444 if err != nil { 445 return time.Time{}, err 446 } 447 448 if info.ModTime().Before(oldestT) { 449 oldestT = info.ModTime() 450 } 451 } 452 453 return oldestT, nil 454 }