github.com/wtsi-ssg/wrstat/v4@v4.5.1/cmd/basedir.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2022, 2023 Genome Research Ltd.
     3   *
     4   * Authors:
     5   *   Sendu Bala <sb10@sanger.ac.uk>
     6   *   Michael Woolnough <mw31@sanger.ac.uk>
     7   *
     8   * Permission is hereby granted, free of charge, to any person obtaining
     9   * a copy of this software and associated documentation files (the
    10   * "Software"), to deal in the Software without restriction, including
    11   * without limitation the rights to use, copy, modify, merge, publish,
    12   * distribute, sublicense, and/or sell copies of the Software, and to
    13   * permit persons to whom the Software is furnished to do so, subject to
    14   * the following conditions:
    15   *
    16   * The above copyright notice and this permission notice shall be included
    17   * in all copies or substantial portions of the Software.
    18   *
    19   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    20   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    21   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    22   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    23   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    24   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    25   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    26   ******************************************************************************/
    27  
    28  package cmd
    29  
    30  import (
    31  	"errors"
    32  	"fmt"
    33  	"io"
    34  	"os"
    35  	"path/filepath"
    36  	"time"
    37  
    38  	"github.com/spf13/cobra"
    39  	"github.com/wtsi-ssg/wrstat/v4/basedirs"
    40  	"github.com/wtsi-ssg/wrstat/v4/dgut"
    41  	ifs "github.com/wtsi-ssg/wrstat/v4/internal/fs"
    42  )
    43  
    44  const (
    45  	basedirBasename        = "basedirs.db"
    46  	basedirSplits          = 4
    47  	basedirMinDirs         = 4
    48  	basedirMinDirsHumgen   = basedirMinDirs + 1
    49  	basedirMinDirsMDTExtra = 1
    50  	groupUsageBasename     = "basedirs.groupusage.tsv"
    51  	userUsageBasename      = "basedirs.userusage.tsv"
    52  	numBasedirArgs         = 2
    53  )
    54  
    55  // options for this cmd.
    56  var quotaPath string
    57  var ownersPath string
    58  
    59  // basedirCmd represents the basedir command.
    60  var basedirCmd = &cobra.Command{
    61  	Use:   "basedir",
    62  	Short: "Create a database that summarises disk usage by unix group and base directory.",
    63  	Long: `Create a database that summarises disk usage by unix group and base directory.
    64  
    65  Provide the unique subdirectory of your 'wrstat multi -w' directory as an unamed
    66  argument to this command, along with the multi -f directory used for the last
    67  time this was run (or the current -f directory for a first run).
    68  
    69  You must also provide a csv file of gid,disk,size_quota,inode_quota via the
    70  --quota option (where size_quota is the maximum disk usage allowed for that
    71  group on that disk in bytes, and inode_quota is the maximum number of file they
    72  can have).
    73  
    74  You must also provide a csv file of gid,owner_name via the --owners option.
    75  
    76  This is called by 'wrstat multi' after the combine step has completed. It does
    77  some 'wrstat where'-type calls for every unix group to come up with hopefully
    78  meaningful and useful "base directories" for every group and ever user.
    79  
    80  Unlike the real 'wrstat where', this is not restricted by authorization and
    81  directly accesses the database files to see all data.
    82  
    83  A base directory is a directory where all a group/user's data lies nested
    84  within.
    85  
    86  Since a group/user could have files in multiple mount points mounted at /, the
    87  true base directory would likely always be '/', which wouldn't be useful.
    88  Instead, a 'wrstat where' split of 4 is used, and only paths consisting of at
    89  least 4 sub directories are returned. Paths that are subdirectories of other
    90  results are ignored. As a special case, if a path contains 'mdt[n]' as a
    91  directory, where n is a number, then 5 sub directories are required.
    92  
    93  Disk usage summaries are stored in database keyed on the group/user and base
    94  directories. The summaries include quota information for groups, taking
    95  that information from the given --quota file. Eg. if the csv has the line:
    96  foo,/mount/a,1024
    97  Then the summary of group foo's data in a base directory /mount/a/groups/foo
    98  would say the quota for that location was 1KB.
    99  The summaries also include the owner of each group, taken from the --ownersfile.
   100  
   101  The output is a database named 'basedirs.db' in the given directory. If the file
   102  already exists, that database will be updated with the latest summary
   103  information.
   104  
   105  In addition to the database file, it also outputs basedirs.groupusage.tsv which
   106  is a tsv file with these columns:
   107  group_name
   108  owner_name
   109  directory_path
   110  last_modified (number of days ago)
   111  used size (used bytes)
   112  quota size (maximum allowed bytes)
   113  used inodes (number of files)
   114  quota inodes (maximum allowed number of bytes)
   115  warning ("OK" or "Not OK" if quota is estimated to have run out in 3 days)
   116  
   117  There's also a similar basedirs.userusage.tsv file with the same columns (but
   118  quota will always be 0, warning will always be "OK", owner_name will always
   119  be blank), and the first column will be user_name instead of group_name.
   120  `,
   121  	Run: func(cmd *cobra.Command, args []string) {
   122  		if len(args) != numBasedirArgs {
   123  			die("you must supply the path to your unique subdir of your 'wrstat multi -w' working directory, " +
   124  				"and the multi -f output directory")
   125  		}
   126  
   127  		if quotaPath == "" {
   128  			die("you must supply --quota")
   129  		}
   130  
   131  		if ownersPath == "" {
   132  			die("you must supply --owners")
   133  		}
   134  
   135  		quotas, err := basedirs.ParseQuotas(quotaPath)
   136  		if err != nil {
   137  			die("failed to parse quota information: %s", err)
   138  		}
   139  
   140  		t := time.Now()
   141  		tree, err := dgut.NewTree(dgutDBCombinePaths(args[0])...)
   142  		if err != nil {
   143  			die("failed to load dgut databases: %s", err)
   144  		}
   145  		info("opening databases took %s", time.Since(t))
   146  
   147  		dbPath := filepath.Join(args[0], basedirBasename)
   148  
   149  		if err = copyExistingBaseDirsDB(args[1], dbPath); err != nil {
   150  			die("failed to get existing base directories database: %s", err)
   151  		}
   152  
   153  		bd, err := basedirs.NewCreator(dbPath, tree, quotas)
   154  		if err != nil {
   155  			die("failed to create base directories database: %s", err)
   156  		}
   157  
   158  		t = time.Now()
   159  		err = bd.CreateDatabase(time.Now())
   160  		if err != nil {
   161  			die("failed to create base directories database: %s", err)
   162  		}
   163  
   164  		info("creating base dirs took %s", time.Since(t))
   165  
   166  		t = time.Now()
   167  		bdr, err := basedirs.NewReader(dbPath, ownersPath)
   168  		if err != nil {
   169  			die("failed to create base directories database: %s", err)
   170  		}
   171  
   172  		gut, err := bdr.GroupUsageTable()
   173  		if err != nil {
   174  			die("failed to get group usage table: %s", err)
   175  		}
   176  
   177  		if err = writeFile(filepath.Join(args[0], groupUsageBasename), gut); err != nil {
   178  			die("failed to write group usage table: %s", err)
   179  		}
   180  
   181  		uut, err := bdr.UserUsageTable()
   182  		if err != nil {
   183  			die("failed to get group usage table: %s", err)
   184  		}
   185  
   186  		if err = writeFile(filepath.Join(args[0], userUsageBasename), uut); err != nil {
   187  			die("failed to write group usage table: %s", err)
   188  		}
   189  
   190  		if err = bdr.Close(); err != nil {
   191  			die("failed to close basedirs database reader: %s", err)
   192  		}
   193  
   194  		info("reading base dirs took %s", time.Since(t))
   195  	},
   196  }
   197  
   198  func init() {
   199  	RootCmd.AddCommand(basedirCmd)
   200  
   201  	// flags specific to this sub-command
   202  	basedirCmd.Flags().StringVarP(&quotaPath, "quota", "q", "", "gid,disk,size_quota,inode_quota csv file")
   203  	basedirCmd.Flags().StringVarP(&ownersPath, "owners", "o", "", "gid,owner csv file")
   204  }
   205  
   206  // dgutDBCombinePaths returns the dgut db directories that 'wrstat combine'
   207  // creates in the given output directory.
   208  func dgutDBCombinePaths(dir string) []string {
   209  	paths, err := filepath.Glob(fmt.Sprintf("%s/*/*/%s", dir, combineDGUTOutputFileBasename))
   210  	if err != nil || len(paths) == 0 {
   211  		die("failed to find dgut database directories based on [%s/*/*/%s] (err: %s)",
   212  			dir, combineDGUTOutputFileBasename, err)
   213  	}
   214  
   215  	info("%+v", paths)
   216  
   217  	return paths
   218  }
   219  
   220  func copyExistingBaseDirsDB(existingDir, newDBPath string) error {
   221  	existingDBPath, err := ifs.FindLatestDirectoryEntry(existingDir, basedirBasename)
   222  	if err != nil && !errors.Is(err, ifs.ErrNoDirEntryFound) {
   223  		return err
   224  	}
   225  
   226  	if existingDBPath == "" {
   227  		return nil
   228  	}
   229  
   230  	fr, err := os.Open(existingDBPath)
   231  	if err != nil {
   232  		return err
   233  	}
   234  
   235  	defer fr.Close()
   236  
   237  	fw, err := os.Create(newDBPath)
   238  	if err != nil {
   239  		return err
   240  	}
   241  
   242  	_, err = io.Copy(fw, fr)
   243  
   244  	errc := fw.Close()
   245  	if err == nil {
   246  		err = errc
   247  	}
   248  
   249  	return err
   250  }
   251  
   252  func writeFile(path, contents string) error {
   253  	f, err := os.Create(path)
   254  	if err != nil {
   255  		return err
   256  	}
   257  
   258  	if _, err := io.WriteString(f, contents); err != nil {
   259  		return err
   260  	}
   261  
   262  	return f.Close()
   263  }