github.com/wtsi-ssg/wrstat/v4@v4.5.1/neaten/neaten.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2022 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   * Author: Kyle Mace <km34@sanger.ac.uk>
     6   *
     7   * Permission is hereby granted, free of charge, to any person obtaining
     8   * a copy of this software and associated documentation files (the
     9   * "Software"), to deal in the Software without restriction, including
    10   * without limitation the rights to use, copy, modify, merge, publish,
    11   * distribute, sublicense, and/or sell copies of the Software, and to
    12   * permit persons to whom the Software is furnished to do so, subject to
    13   * the following conditions:
    14   *
    15   * The above copyright notice and this permission notice shall be included
    16   * in all copies or substantial portions of the Software.
    17   *
    18   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    19   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    20   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    21   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    22   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    23   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    24   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    25   ******************************************************************************/
    26  
    27  package neaten
    28  
    29  import (
    30  	"errors"
    31  	"fmt"
    32  	"io/fs"
    33  	"os"
    34  	"path/filepath"
    35  	"syscall"
    36  	"time"
    37  
    38  	"github.com/termie/go-shutil"
    39  	fileCheck "github.com/wtsi-ssg/wrstat/v4/fs"
    40  )
    41  
    42  type Error string
    43  
    44  func (e Error) Error() string { return string(e) }
    45  
    46  const ErrNoOutputsFound = Error("There are no existing files according to the provided input and output suffixes.")
    47  
    48  // modeRW are the read-write permission bits for user, group and other.
    49  const modeRW = 0666
    50  
    51  // Up struct defines your source directory, suffixes and glob patterns to find
    52  // input files, and information about your destination directory, so that Up()
    53  // can tidy your source files to the DestDir.
    54  type Tidy struct {
    55  	SrcDir  string
    56  	DestDir string
    57  
    58  	// Date used in the renaming of files.
    59  	Date string
    60  
    61  	// File suffixes of combine files in the SrcDir, and their counterpart in
    62  	// the destDir.
    63  	CombineFileSuffixes map[string]string
    64  
    65  	// File suffixes of db files in the SrcDir, and their counterpart in the
    66  	// destDir.
    67  	DBFileSuffixes map[string]string
    68  
    69  	// File suffixes of base files in the SrcDir, and their counterpart in the
    70  	// destDir.
    71  	BaseFileSuffixes map[string]string
    72  
    73  	// Glob pattern describing the path of combine files in SrcDir.
    74  	CombineFileGlobPattern string
    75  
    76  	// Glob pattern describing the path of db files in SrcDir.
    77  	DBFileGlobPattern string
    78  
    79  	// Glob pattern describing the path of walk files in SrcDir.
    80  	WalkFilePathGlobPattern string
    81  
    82  	// The perms of destdir if we make the destdir ourselves.
    83  	DestDirPerms fs.FileMode
    84  
    85  	destDirInfo fs.FileInfo
    86  }
    87  
    88  // Up takes our source directory of wrstat output files, renames them and
    89  // relocates them to our dest directory, using our date. Also ensures that the
    90  // permissions of wrstat output files match those of dest directory. If our dest
    91  // dir doesn't exist, it will be created. And it touches a file called
    92  // .dgut.db.updated, setting its mTime equal to the oldest of all those from our
    93  // srcDir. Finally, deletes the source directory.
    94  //
    95  // For debugging purposes, set disableDeletion to true to disable deletion of the
    96  // source directory after a successful move.
    97  func (t *Tidy) Up(disableDeletion bool) error {
    98  	if err := fileCheck.DirValid(t.SrcDir); err != nil {
    99  		return err
   100  	}
   101  
   102  	err := fileCheck.DirValid(t.DestDir)
   103  	if os.IsNotExist(err) {
   104  		err = os.MkdirAll(t.DestDir, t.DestDirPerms)
   105  		if err != nil {
   106  			return err
   107  		}
   108  	}
   109  
   110  	t.destDirInfo, err = os.Stat(t.DestDir)
   111  	if err != nil {
   112  		return err
   113  	}
   114  
   115  	return t.moveAndDelete(disableDeletion)
   116  }
   117  
   118  // moveAndDelete does the main work of this package: move various files to our
   119  // destDir, then delete our SrcDir if disableDeletion is false.
   120  func (t *Tidy) moveAndDelete(disableDeletion bool) error {
   121  	if err := t.move(); err != nil {
   122  		return err
   123  	}
   124  
   125  	if disableDeletion {
   126  		return t.matchPermsInsideDir(t.SrcDir)
   127  	}
   128  
   129  	return os.RemoveAll(t.SrcDir)
   130  }
   131  
   132  // move finds, renames and moves the combine, base and db files, ensuring that
   133  // their permissions match those of our destDir.
   134  func (t *Tidy) move() error {
   135  	for inSuffix, outSuffix := range t.CombineFileSuffixes {
   136  		if err := t.findAndMoveOutputs(inSuffix, outSuffix); err != nil {
   137  			return err
   138  		}
   139  	}
   140  
   141  	for inSuffix, outSuffix := range t.DBFileSuffixes {
   142  		if err := t.findAndMoveDBs(inSuffix, outSuffix); err != nil {
   143  			return err
   144  		}
   145  	}
   146  
   147  	for inSuffix, outSuffix := range t.BaseFileSuffixes {
   148  		if err := t.moveBaseDirsFile(inSuffix, outSuffix); err != nil {
   149  			return err
   150  		}
   151  	}
   152  
   153  	return nil
   154  }
   155  
   156  // findAndMoveOutputs finds output files in the given sourceDir with given
   157  // suffix and moves them to our destDir, including date in the name, and adjusts
   158  // ownership and permissions to match the destDir.
   159  func (t *Tidy) findAndMoveOutputs(inSuffix, outSuffix string) error {
   160  	outputPaths, err := filepath.Glob(fmt.Sprintf(t.CombineFileGlobPattern, t.SrcDir, inSuffix))
   161  	if err != nil {
   162  		return err
   163  	}
   164  
   165  	if len(outputPaths) == 0 {
   166  		return ErrNoOutputsFound
   167  	}
   168  
   169  	for _, path := range outputPaths {
   170  		err := t.moveOutput(path, outSuffix)
   171  		if err != nil {
   172  			return err
   173  		}
   174  	}
   175  
   176  	return nil
   177  }
   178  
   179  // moveOutput moves an output file to our desrDir and changes its name to the
   180  // correct format, then adjusts ownership and permissions to match the destDir.
   181  func (t *Tidy) moveOutput(source string, suffix string) error {
   182  	interestUniqueDir := filepath.Dir(source)
   183  	interestBaseDir := filepath.Dir(interestUniqueDir)
   184  	multiUniqueDir := filepath.Dir(interestBaseDir)
   185  	dest := filepath.Join(t.DestDir, fmt.Sprintf("%s_%s.%s.%s.%s",
   186  		t.Date,
   187  		filepath.Base(interestBaseDir),
   188  		filepath.Base(interestUniqueDir),
   189  		filepath.Base(multiUniqueDir),
   190  		suffix))
   191  
   192  	return t.renameAndCorrectPerms(source, dest)
   193  }
   194  
   195  // renameAndCorrectPerms tries 2 ways to rename the file (resorting to a copy if
   196  // this is across filesystem boundaries), then matches the dest file permissions
   197  // to those of our FileInfo.
   198  //
   199  // If source doesn't exist, but dest does, assumes the rename was done
   200  // previously and just tries to match the permissions.
   201  func (t *Tidy) renameAndCorrectPerms(source, dest string) error {
   202  	if _, err := os.Stat(source); errors.Is(err, os.ErrNotExist) {
   203  		if _, err = os.Stat(dest); err == nil {
   204  			return CorrectPerms(dest, t.destDirInfo)
   205  		}
   206  	}
   207  
   208  	err := os.Rename(source, dest)
   209  	if err != nil {
   210  		if err = shutil.CopyFile(source, dest, false); err != nil {
   211  			return err
   212  		}
   213  	}
   214  
   215  	return CorrectPerms(dest, t.destDirInfo)
   216  }
   217  
   218  // CorrectPerms checks whether the given file has the same ownership and
   219  // read-write permissions as the given destDir info. If permissions do not
   220  // match, they will be changed accordingly.
   221  func CorrectPerms(path string, destDirInfo fs.FileInfo) error {
   222  	current, err := os.Stat(path)
   223  	if err != nil {
   224  		return err
   225  	}
   226  
   227  	if err = matchOwnership(path, current, destDirInfo); err != nil {
   228  		return err
   229  	}
   230  
   231  	return matchReadWrite(path, current, destDirInfo)
   232  }
   233  
   234  // ownershipMatches checks whether the given file with the current fileinfo has
   235  // the same user and group ownership as the desired fileinfo. If the user and
   236  // group ownerships do not match, they will be changed accordingly.
   237  func matchOwnership(path string, current, desired fs.FileInfo) error {
   238  	uid, gid := getUIDAndGID(current)
   239  	desiredUID, desiredGID := getUIDAndGID(desired)
   240  
   241  	if uid == desiredUID && gid == desiredGID {
   242  		return nil
   243  	}
   244  
   245  	return os.Lchown(path, desiredUID, desiredGID)
   246  }
   247  
   248  // getUIDAndGID extracts the UID and GID from a FileInfo. NB: this will only
   249  // work on linux.
   250  func getUIDAndGID(info fs.FileInfo) (int, int) {
   251  	return int(info.Sys().(*syscall.Stat_t).Uid), int(info.Sys().(*syscall.Stat_t).Gid) //nolint:forcetypeassert
   252  }
   253  
   254  // matchReadWrite checks whether the given file with the current fileinfo has
   255  // the same user, group, other read&write permissions as our destDir. If they do
   256  // not match they will be changed accordingly.
   257  func matchReadWrite(path string, current, destDirInfo fs.FileInfo) error {
   258  	currentMode := current.Mode()
   259  	currentRW := currentMode & modeRW
   260  	desiredRW := destDirInfo.Mode() & modeRW
   261  
   262  	if currentRW == desiredRW {
   263  		return nil
   264  	}
   265  
   266  	return os.Chmod(path, currentMode|desiredRW)
   267  }
   268  
   269  // moveBaseDirsFile moves the base.dirs file in sourceDir to a uniquely named
   270  // .basedirs file in destDir that includes our date.
   271  func (t *Tidy) moveBaseDirsFile(inSuffix, outSuffix string) error {
   272  	source := filepath.Join(t.SrcDir, inSuffix)
   273  
   274  	dest := filepath.Join(t.DestDir, fmt.Sprintf("%s_%s.%s",
   275  		t.Date,
   276  		filepath.Base(t.SrcDir),
   277  		outSuffix))
   278  
   279  	return t.renameAndCorrectPerms(source, dest)
   280  }
   281  
   282  // findAndMoveDBs finds the combine.dgut.db directories in our sourceDir and
   283  // moves them to a uniquely named dir in destDir that includes our date, and
   284  // adjusts ownership and permissions to match our destDir.
   285  //
   286  // It also touches a file that 'wrstat server' monitors to know when to reload
   287  // its database files. It gives that file an mtime corresponding to the oldest
   288  // mtime of the walk log files.
   289  func (t *Tidy) findAndMoveDBs(inSuffix, outSuffix string) error {
   290  	sources, err := filepath.Glob(fmt.Sprintf(t.DBFileGlobPattern, t.SrcDir, inSuffix))
   291  	if err != nil {
   292  		return err
   293  	}
   294  
   295  	dbsDir, err := t.makeDBsDir(outSuffix)
   296  	if err != nil {
   297  		return err
   298  	}
   299  
   300  	for i, source := range sources {
   301  		if _, err = os.Stat(source); err != nil {
   302  			return err
   303  		}
   304  
   305  		dest := filepath.Join(dbsDir, fmt.Sprintf("%d", i))
   306  
   307  		err = t.renameAndCorrectPerms(source, dest)
   308  		if err != nil {
   309  			return err
   310  		}
   311  	}
   312  
   313  	err = t.matchPermsInsideDir(dbsDir)
   314  	if err != nil {
   315  		return err
   316  	}
   317  
   318  	return t.touchDBUpdatedFile("." + outSuffix + ".updated")
   319  }
   320  
   321  // makeDBsDir makes a uniquely named directory featuring the given date to hold
   322  // database files in destDir. If it already exists, does nothing. Returns the
   323  // path to the database directory and any error.
   324  func (t *Tidy) makeDBsDir(dgutDBsSuffix string) (string, error) {
   325  	dbsDir := filepath.Join(t.DestDir, fmt.Sprintf("%s_%s.%s",
   326  		t.Date,
   327  		filepath.Base(t.SrcDir),
   328  		dgutDBsSuffix,
   329  	))
   330  
   331  	err := os.Mkdir(dbsDir, t.destDirInfo.Mode().Perm())
   332  	if os.IsExist(err) {
   333  		err = nil
   334  	}
   335  
   336  	return dbsDir, err
   337  }
   338  
   339  // matchPermsInsideDir does matchPerms for all the files in the given dir
   340  // recursively.
   341  func (t *Tidy) matchPermsInsideDir(dir string) error {
   342  	return filepath.WalkDir(dir, func(path string, de fs.DirEntry, err error) error {
   343  		if err != nil {
   344  			return err
   345  		}
   346  
   347  		return CorrectPerms(path, t.destDirInfo)
   348  	})
   349  }
   350  
   351  // touchDBUpdatedFile touches a file that the server monitors so that it knows
   352  // to try and reload the databases. Matches the permissions of the touched file
   353  // to the given permissions. Gives the file an mtime corresponding to the oldest
   354  // mtime of walk log files.
   355  func (t *Tidy) touchDBUpdatedFile(dgutDBsSentinelBasename string) error {
   356  	sentinel := filepath.Join(t.DestDir, dgutDBsSentinelBasename)
   357  
   358  	oldest, err := t.getOldestMtimeOfWalkFiles(t.SrcDir, ".log")
   359  	if err != nil {
   360  		return err
   361  	}
   362  
   363  	_, err = os.Stat(sentinel)
   364  	if os.IsNotExist(err) {
   365  		if err = createFile(sentinel); err != nil {
   366  			return err
   367  		}
   368  	}
   369  
   370  	if err = changeAMFileTime(sentinel, oldest); err != nil {
   371  		return err
   372  	}
   373  
   374  	return CorrectPerms(sentinel, t.destDirInfo)
   375  }
   376  
   377  // createFile creates a file in the given path.
   378  func createFile(path string) error {
   379  	file, err := os.Create(path)
   380  	if err != nil {
   381  		return err
   382  	}
   383  
   384  	file.Close()
   385  
   386  	return nil
   387  }
   388  
   389  // changeAMFileTime updates the a&m time of the given path to the given time.
   390  func changeAMFileTime(path string, t time.Time) error {
   391  	return os.Chtimes(path, t.Local(), t.Local())
   392  }
   393  
   394  // getOldestMtimeOfWalkFiles looks in our sourceDir for walk log files and
   395  // returns the oldest mtime of them all.
   396  func (t *Tidy) getOldestMtimeOfWalkFiles(dir, statLogOutputFileSuffix string) (time.Time, error) {
   397  	paths, err := filepath.Glob(fmt.Sprintf(t.WalkFilePathGlobPattern, dir, statLogOutputFileSuffix))
   398  	if err != nil || len(paths) == 0 {
   399  		return time.Now(), err
   400  	}
   401  
   402  	oldestT := time.Now()
   403  
   404  	for _, path := range paths {
   405  		info, err := os.Stat(path)
   406  		if err != nil {
   407  			return time.Time{}, err
   408  		}
   409  
   410  		if info.ModTime().Before(oldestT) {
   411  			oldestT = info.ModTime()
   412  		}
   413  	}
   414  
   415  	return oldestT, nil
   416  }
   417  
   418  // Touch modifies path's a and mtime to the current time.
   419  func Touch(path string) error {
   420  	now := time.Now().Local()
   421  
   422  	return changeAMFileTime(path, now)
   423  }
   424  
   425  // DeleteAllPrefixedDirEntries deletes all files and directories in the given
   426  // directory that have the given prefix.
   427  func DeleteAllPrefixedDirEntries(dir, prefix string) error {
   428  	paths, err := filepath.Glob(fmt.Sprintf("%s/%s*", dir, prefix))
   429  	if err != nil {
   430  		return err
   431  	}
   432  
   433  	for _, path := range paths {
   434  		err = os.RemoveAll(path)
   435  		if err != nil && !os.IsNotExist(err) {
   436  			return err
   437  		}
   438  	}
   439  
   440  	return nil
   441  }