github.com/wtsi-ssg/wrstat/v4@v4.5.1/cmd/basedir.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2022, 2023 Genome Research Ltd. 3 * 4 * Authors: 5 * Sendu Bala <sb10@sanger.ac.uk> 6 * Michael Woolnough <mw31@sanger.ac.uk> 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining 9 * a copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sublicense, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * The above copyright notice and this permission notice shall be included 17 * in all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 ******************************************************************************/ 27 28 package cmd 29 30 import ( 31 "errors" 32 "fmt" 33 "io" 34 "os" 35 "path/filepath" 36 "time" 37 38 "github.com/spf13/cobra" 39 "github.com/wtsi-ssg/wrstat/v4/basedirs" 40 "github.com/wtsi-ssg/wrstat/v4/dgut" 41 ifs "github.com/wtsi-ssg/wrstat/v4/internal/fs" 42 ) 43 44 const ( 45 basedirBasename = "basedirs.db" 46 basedirSplits = 4 47 basedirMinDirs = 4 48 basedirMinDirsHumgen = basedirMinDirs + 1 49 basedirMinDirsMDTExtra = 1 50 groupUsageBasename = "basedirs.groupusage.tsv" 51 userUsageBasename = "basedirs.userusage.tsv" 52 numBasedirArgs = 2 53 ) 54 55 // options for this cmd. 56 var quotaPath string 57 var ownersPath string 58 59 // basedirCmd represents the basedir command. 60 var basedirCmd = &cobra.Command{ 61 Use: "basedir", 62 Short: "Create a database that summarises disk usage by unix group and base directory.", 63 Long: `Create a database that summarises disk usage by unix group and base directory. 64 65 Provide the unique subdirectory of your 'wrstat multi -w' directory as an unamed 66 argument to this command, along with the multi -f directory used for the last 67 time this was run (or the current -f directory for a first run). 68 69 You must also provide a csv file of gid,disk,size_quota,inode_quota via the 70 --quota option (where size_quota is the maximum disk usage allowed for that 71 group on that disk in bytes, and inode_quota is the maximum number of file they 72 can have). 73 74 You must also provide a csv file of gid,owner_name via the --owners option. 75 76 This is called by 'wrstat multi' after the combine step has completed. It does 77 some 'wrstat where'-type calls for every unix group to come up with hopefully 78 meaningful and useful "base directories" for every group and ever user. 79 80 Unlike the real 'wrstat where', this is not restricted by authorization and 81 directly accesses the database files to see all data. 82 83 A base directory is a directory where all a group/user's data lies nested 84 within. 85 86 Since a group/user could have files in multiple mount points mounted at /, the 87 true base directory would likely always be '/', which wouldn't be useful. 88 Instead, a 'wrstat where' split of 4 is used, and only paths consisting of at 89 least 4 sub directories are returned. Paths that are subdirectories of other 90 results are ignored. As a special case, if a path contains 'mdt[n]' as a 91 directory, where n is a number, then 5 sub directories are required. 92 93 Disk usage summaries are stored in database keyed on the group/user and base 94 directories. The summaries include quota information for groups, taking 95 that information from the given --quota file. Eg. if the csv has the line: 96 foo,/mount/a,1024 97 Then the summary of group foo's data in a base directory /mount/a/groups/foo 98 would say the quota for that location was 1KB. 99 The summaries also include the owner of each group, taken from the --ownersfile. 100 101 The output is a database named 'basedirs.db' in the given directory. If the file 102 already exists, that database will be updated with the latest summary 103 information. 104 105 In addition to the database file, it also outputs basedirs.groupusage.tsv which 106 is a tsv file with these columns: 107 group_name 108 owner_name 109 directory_path 110 last_modified (number of days ago) 111 used size (used bytes) 112 quota size (maximum allowed bytes) 113 used inodes (number of files) 114 quota inodes (maximum allowed number of bytes) 115 warning ("OK" or "Not OK" if quota is estimated to have run out in 3 days) 116 117 There's also a similar basedirs.userusage.tsv file with the same columns (but 118 quota will always be 0, warning will always be "OK", owner_name will always 119 be blank), and the first column will be user_name instead of group_name. 120 `, 121 Run: func(cmd *cobra.Command, args []string) { 122 if len(args) != numBasedirArgs { 123 die("you must supply the path to your unique subdir of your 'wrstat multi -w' working directory, " + 124 "and the multi -f output directory") 125 } 126 127 if quotaPath == "" { 128 die("you must supply --quota") 129 } 130 131 if ownersPath == "" { 132 die("you must supply --owners") 133 } 134 135 quotas, err := basedirs.ParseQuotas(quotaPath) 136 if err != nil { 137 die("failed to parse quota information: %s", err) 138 } 139 140 t := time.Now() 141 tree, err := dgut.NewTree(dgutDBCombinePaths(args[0])...) 142 if err != nil { 143 die("failed to load dgut databases: %s", err) 144 } 145 info("opening databases took %s", time.Since(t)) 146 147 dbPath := filepath.Join(args[0], basedirBasename) 148 149 if err = copyExistingBaseDirsDB(args[1], dbPath); err != nil { 150 die("failed to get existing base directories database: %s", err) 151 } 152 153 bd, err := basedirs.NewCreator(dbPath, tree, quotas) 154 if err != nil { 155 die("failed to create base directories database: %s", err) 156 } 157 158 t = time.Now() 159 err = bd.CreateDatabase(time.Now()) 160 if err != nil { 161 die("failed to create base directories database: %s", err) 162 } 163 164 info("creating base dirs took %s", time.Since(t)) 165 166 t = time.Now() 167 bdr, err := basedirs.NewReader(dbPath, ownersPath) 168 if err != nil { 169 die("failed to create base directories database: %s", err) 170 } 171 172 gut, err := bdr.GroupUsageTable() 173 if err != nil { 174 die("failed to get group usage table: %s", err) 175 } 176 177 if err = writeFile(filepath.Join(args[0], groupUsageBasename), gut); err != nil { 178 die("failed to write group usage table: %s", err) 179 } 180 181 uut, err := bdr.UserUsageTable() 182 if err != nil { 183 die("failed to get group usage table: %s", err) 184 } 185 186 if err = writeFile(filepath.Join(args[0], userUsageBasename), uut); err != nil { 187 die("failed to write group usage table: %s", err) 188 } 189 190 if err = bdr.Close(); err != nil { 191 die("failed to close basedirs database reader: %s", err) 192 } 193 194 info("reading base dirs took %s", time.Since(t)) 195 }, 196 } 197 198 func init() { 199 RootCmd.AddCommand(basedirCmd) 200 201 // flags specific to this sub-command 202 basedirCmd.Flags().StringVarP("aPath, "quota", "q", "", "gid,disk,size_quota,inode_quota csv file") 203 basedirCmd.Flags().StringVarP(&ownersPath, "owners", "o", "", "gid,owner csv file") 204 } 205 206 // dgutDBCombinePaths returns the dgut db directories that 'wrstat combine' 207 // creates in the given output directory. 208 func dgutDBCombinePaths(dir string) []string { 209 paths, err := filepath.Glob(fmt.Sprintf("%s/*/*/%s", dir, combineDGUTOutputFileBasename)) 210 if err != nil || len(paths) == 0 { 211 die("failed to find dgut database directories based on [%s/*/*/%s] (err: %s)", 212 dir, combineDGUTOutputFileBasename, err) 213 } 214 215 info("%+v", paths) 216 217 return paths 218 } 219 220 func copyExistingBaseDirsDB(existingDir, newDBPath string) error { 221 existingDBPath, err := ifs.FindLatestDirectoryEntry(existingDir, basedirBasename) 222 if err != nil && !errors.Is(err, ifs.ErrNoDirEntryFound) { 223 return err 224 } 225 226 if existingDBPath == "" { 227 return nil 228 } 229 230 fr, err := os.Open(existingDBPath) 231 if err != nil { 232 return err 233 } 234 235 defer fr.Close() 236 237 fw, err := os.Create(newDBPath) 238 if err != nil { 239 return err 240 } 241 242 _, err = io.Copy(fw, fr) 243 244 errc := fw.Close() 245 if err == nil { 246 err = errc 247 } 248 249 return err 250 } 251 252 func writeFile(path, contents string) error { 253 f, err := os.Create(path) 254 if err != nil { 255 return err 256 } 257 258 if _, err := io.WriteString(f, contents); err != nil { 259 return err 260 } 261 262 return f.Close() 263 }