github.com/wtsi-ssg/wrstat/v3@v3.2.3/neaten/neaten.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2022 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * Author: Kyle Mace <km34@sanger.ac.uk> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 ******************************************************************************/ 26 27 package neaten 28 29 import ( 30 "errors" 31 "fmt" 32 "io/fs" 33 "os" 34 "path/filepath" 35 "syscall" 36 "time" 37 38 "github.com/termie/go-shutil" 39 fileCheck "github.com/wtsi-ssg/wrstat/v3/fs" 40 ) 41 42 type Error string 43 44 func (e Error) Error() string { return string(e) } 45 46 const ErrNoOutputsFound = Error("There are no existing files according to the provided input and output suffixes.") 47 48 // modeRW are the read-write permission bits for user, group and other. 49 const modeRW = 0666 50 51 // Up struct defines your source directory, suffixes and glob patterns to find 52 // input files, and information about your destination directory, so that Up() 53 // can tidy your source files to the DestDir. 54 type Tidy struct { 55 SrcDir string 56 DestDir string 57 58 // Date used in the renaming of files. 59 Date string 60 61 // File suffixes of combine files in the SrcDir, and their counterpart in 62 // the destDir. 63 CombineFileSuffixes map[string]string 64 65 // File suffixes of db files in the SrcDir, and their counterpart in the 66 // destDir. 67 DBFileSuffixes map[string]string 68 69 // File suffixes of base files in the SrcDir, and their counterpart in the 70 // destDir. 71 BaseFileSuffixes map[string]string 72 73 // Glob pattern describing the path of combine files in SrcDir. 74 CombineFileGlobPattern string 75 76 // Glob pattern describing the path of db files in SrcDir. 77 DBFileGlobPattern string 78 79 // Glob pattern describing the path of walk files in SrcDir. 80 WalkFilePathGlobPattern string 81 82 // The perms of destdir if we make the destdir ourselves. 83 DestDirPerms fs.FileMode 84 85 destDirInfo fs.FileInfo 86 } 87 88 // Up takes our source directory of wrstat output files, renames them and 89 // relocates them to our dest directory, using our date. Also ensures that the 90 // permissions of wrstat output files match those of dest directory. If our dest 91 // dir doesn't exist, it will be created. And it touches a file called 92 // .dgut.db.updated, setting its mTime equal to the oldest of all those from our 93 // srcDir. Finally, deletes the source directory. 94 // 95 // For debugging purposes, set disableDeletion to true to disable deletion of the 96 // source directory after a successful move. 97 func (t *Tidy) Up(disableDeletion bool) error { 98 if err := fileCheck.DirValid(t.SrcDir); err != nil { 99 return err 100 } 101 102 err := fileCheck.DirValid(t.DestDir) 103 if os.IsNotExist(err) { 104 err = os.MkdirAll(t.DestDir, t.DestDirPerms) 105 if err != nil { 106 return err 107 } 108 } 109 110 t.destDirInfo, err = os.Stat(t.DestDir) 111 if err != nil { 112 return err 113 } 114 115 return t.moveAndDelete(disableDeletion) 116 } 117 118 // moveAndDelete does the main work of this package: move various files to our 119 // destDir, then delete our SrcDir if disableDeletion is false. 120 func (t *Tidy) moveAndDelete(disableDeletion bool) error { 121 if err := t.move(); err != nil { 122 return err 123 } 124 125 if disableDeletion { 126 return nil 127 } 128 129 return os.RemoveAll(t.SrcDir) 130 } 131 132 // move finds, renames and moves the combine, base and db files, ensuring that 133 // their permissions match those of our destDir. 134 func (t *Tidy) move() error { 135 for inSuffix, outSuffix := range t.CombineFileSuffixes { 136 if err := t.findAndMoveOutputs(inSuffix, outSuffix); err != nil { 137 return err 138 } 139 } 140 141 for inSuffix, outSuffix := range t.DBFileSuffixes { 142 if err := t.findAndMoveDBs(inSuffix, outSuffix); err != nil { 143 return err 144 } 145 } 146 147 for inSuffix, outSuffix := range t.BaseFileSuffixes { 148 if err := t.moveBaseDirsFile(inSuffix, outSuffix); err != nil { 149 return err 150 } 151 } 152 153 return nil 154 } 155 156 // findAndMoveOutputs finds output files in the given sourceDir with given 157 // suffix and moves them to our destDir, including date in the name, and adjusts 158 // ownership and permissions to match the destDir. 159 func (t *Tidy) findAndMoveOutputs(inSuffix, outSuffix string) error { 160 outputPaths, err := filepath.Glob(fmt.Sprintf(t.CombineFileGlobPattern, t.SrcDir, inSuffix)) 161 if err != nil { 162 return err 163 } 164 165 if len(outputPaths) == 0 { 166 return ErrNoOutputsFound 167 } 168 169 for _, path := range outputPaths { 170 err := t.moveOutput(path, outSuffix) 171 if err != nil { 172 return err 173 } 174 } 175 176 return nil 177 } 178 179 // moveOutput moves an output file to our desrDir and changes its name to the 180 // correct format, then adjusts ownership and permissions to match the destDir. 181 func (t *Tidy) moveOutput(source string, suffix string) error { 182 interestUniqueDir := filepath.Dir(source) 183 interestBaseDir := filepath.Dir(interestUniqueDir) 184 multiUniqueDir := filepath.Dir(interestBaseDir) 185 dest := filepath.Join(t.DestDir, fmt.Sprintf("%s_%s.%s.%s.%s", 186 t.Date, 187 filepath.Base(interestBaseDir), 188 filepath.Base(interestUniqueDir), 189 filepath.Base(multiUniqueDir), 190 suffix)) 191 192 return t.renameAndCorrectPerms(source, dest) 193 } 194 195 // renameAndCorrectPerms tries 2 ways to rename the file (resorting to a copy if 196 // this is across filesystem boundaries), then matches the dest file permissions 197 // to those of our FileInfo. 198 // 199 // If source doesn't exist, but dest does, assumes the rename was done 200 // previously and just tries to match the permissions. 201 func (t *Tidy) renameAndCorrectPerms(source, dest string) error { 202 if _, err := os.Stat(source); errors.Is(err, os.ErrNotExist) { 203 if _, err = os.Stat(dest); err == nil { 204 return CorrectPerms(dest, t.destDirInfo) 205 } 206 } 207 208 err := os.Rename(source, dest) 209 if err != nil { 210 if err = shutil.CopyFile(source, dest, false); err != nil { 211 return err 212 } 213 } 214 215 return CorrectPerms(dest, t.destDirInfo) 216 } 217 218 // CorrectPerms checks whether the given file has the same ownership and 219 // read-write permissions as the given destDir info. If permissions do not 220 // match, they will be changed accordingly. 221 func CorrectPerms(path string, destDirInfo fs.FileInfo) error { 222 current, err := os.Stat(path) 223 if err != nil { 224 return err 225 } 226 227 if err = matchOwnership(path, current, destDirInfo); err != nil { 228 return err 229 } 230 231 return matchReadWrite(path, current, destDirInfo) 232 } 233 234 // ownershipMatches checks whether the given file with the current fileinfo has 235 // the same user and group ownership as the desired fileinfo. If the user and 236 // group ownerships do not match, they will be changed accordingly. 237 func matchOwnership(path string, current, desired fs.FileInfo) error { 238 uid, gid := getUIDAndGID(current) 239 desiredUID, desiredGID := getUIDAndGID(desired) 240 241 if uid == desiredUID && gid == desiredGID { 242 return nil 243 } 244 245 return os.Lchown(path, desiredUID, desiredGID) 246 } 247 248 // getUIDAndGID extracts the UID and GID from a FileInfo. NB: this will only 249 // work on linux. 250 func getUIDAndGID(info fs.FileInfo) (int, int) { 251 return int(info.Sys().(*syscall.Stat_t).Uid), int(info.Sys().(*syscall.Stat_t).Gid) //nolint:forcetypeassert 252 } 253 254 // matchReadWrite checks whether the given file with the current fileinfo has 255 // the same user, group, other read&write permissions as our destDir. If they do 256 // not match they will be changed accordingly. 257 func matchReadWrite(path string, current, destDirInfo fs.FileInfo) error { 258 currentMode := current.Mode() 259 currentRW := currentMode & modeRW 260 desiredRW := destDirInfo.Mode() & modeRW 261 262 if currentRW == desiredRW { 263 return nil 264 } 265 266 return os.Chmod(path, currentMode|desiredRW) 267 } 268 269 // moveBaseDirsFile moves the base.dirs file in sourceDir to a uniquely named 270 // .basedirs file in destDir that includes our date. 271 func (t *Tidy) moveBaseDirsFile(inSuffix, outSuffix string) error { 272 source := filepath.Join(t.SrcDir, inSuffix) 273 274 dest := filepath.Join(t.DestDir, fmt.Sprintf("%s_%s.%s", 275 t.Date, 276 filepath.Base(t.SrcDir), 277 outSuffix)) 278 279 return t.renameAndCorrectPerms(source, dest) 280 } 281 282 // findAndMoveDBs finds the combine.dgut.db directories in our sourceDir and 283 // moves them to a uniquely named dir in destDir that includes our date, and 284 // adjusts ownership and permissions to match our destDir. 285 // 286 // It also touches a file that 'wrstat server' monitors to know when to reload 287 // its database files. It gives that file an mtime corresponding to the oldest 288 // mtime of the walk log files. 289 func (t *Tidy) findAndMoveDBs(inSuffix, outSuffix string) error { 290 sources, err := filepath.Glob(fmt.Sprintf(t.DBFileGlobPattern, t.SrcDir, inSuffix)) 291 if err != nil { 292 return err 293 } 294 295 dbsDir, err := t.makeDBsDir(outSuffix) 296 if err != nil { 297 return err 298 } 299 300 for i, source := range sources { 301 if _, err = os.Stat(source); err != nil { 302 return err 303 } 304 305 dest := filepath.Join(dbsDir, fmt.Sprintf("%d", i)) 306 307 err = t.renameAndCorrectPerms(source, dest) 308 if err != nil { 309 return err 310 } 311 } 312 313 err = t.matchPermsInsideDir(dbsDir) 314 if err != nil { 315 return err 316 } 317 318 return t.touchDBUpdatedFile("." + outSuffix + ".updated") 319 } 320 321 // makeDBsDir makes a uniquely named directory featuring the given date to hold 322 // database files in destDir. If it already exists, does nothing. Returns the 323 // path to the database directory and any error. 324 func (t *Tidy) makeDBsDir(dgutDBsSuffix string) (string, error) { 325 dbsDir := filepath.Join(t.DestDir, fmt.Sprintf("%s_%s.%s", 326 t.Date, 327 filepath.Base(t.SrcDir), 328 dgutDBsSuffix, 329 )) 330 331 err := os.Mkdir(dbsDir, t.destDirInfo.Mode().Perm()) 332 if os.IsExist(err) { 333 err = nil 334 } 335 336 return dbsDir, err 337 } 338 339 // matchPermsInsideDir does matchPerms for all the files in the given dir 340 // recursively. 341 func (t *Tidy) matchPermsInsideDir(dir string) error { 342 return filepath.WalkDir(dir, func(path string, de fs.DirEntry, err error) error { 343 if err != nil { 344 return err 345 } 346 347 return CorrectPerms(path, t.destDirInfo) 348 }) 349 } 350 351 // touchDBUpdatedFile touches a file that the server monitors so that it knows 352 // to try and reload the databases. Matches the permissions of the touched file 353 // to the given permissions. Gives the file an mtime corresponding to the oldest 354 // mtime of walk log files. 355 func (t *Tidy) touchDBUpdatedFile(dgutDBsSentinelBasename string) error { 356 sentinel := filepath.Join(t.DestDir, dgutDBsSentinelBasename) 357 358 oldest, err := t.getOldestMtimeOfWalkFiles(t.SrcDir, ".log") 359 if err != nil { 360 return err 361 } 362 363 _, err = os.Stat(sentinel) 364 if os.IsNotExist(err) { 365 if err = createFile(sentinel); err != nil { 366 return err 367 } 368 } 369 370 if err = changeAMFileTime(sentinel, oldest); err != nil { 371 return err 372 } 373 374 return CorrectPerms(sentinel, t.destDirInfo) 375 } 376 377 // createFile creates a file in the given path. 378 func createFile(path string) error { 379 file, err := os.Create(path) 380 if err != nil { 381 return err 382 } 383 384 file.Close() 385 386 return nil 387 } 388 389 // changeAMFileTime updates the a&m time of the given path to the given time. 390 func changeAMFileTime(path string, t time.Time) error { 391 return os.Chtimes(path, t.Local(), t.Local()) 392 } 393 394 // getOldestMtimeOfWalkFiles looks in our sourceDir for walk log files and 395 // returns the oldest mtime of them all. 396 func (t *Tidy) getOldestMtimeOfWalkFiles(dir, statLogOutputFileSuffix string) (time.Time, error) { 397 paths, err := filepath.Glob(fmt.Sprintf(t.WalkFilePathGlobPattern, dir, statLogOutputFileSuffix)) 398 if err != nil || len(paths) == 0 { 399 return time.Now(), err 400 } 401 402 oldestT := time.Now() 403 404 for _, path := range paths { 405 info, err := os.Stat(path) 406 if err != nil { 407 return time.Time{}, err 408 } 409 410 if info.ModTime().Before(oldestT) { 411 oldestT = info.ModTime() 412 } 413 } 414 415 return oldestT, nil 416 }